]> git.pld-linux.org Git - packages/kernel.git/blob - linux-2.6-vs2.1.patch
- addedd Linux-ABI support. Bcond, default disabled.
[packages/kernel.git] / linux-2.6-vs2.1.patch
1 diff -NurpP --minimal linux-2.6.16.20/Documentation/vserver/debug.txt linux-2.6.16.20-vs2.1.1-rc22/Documentation/vserver/debug.txt
2 --- linux-2.6.16.20/Documentation/vserver/debug.txt     1970-01-01 01:00:00 +0100
3 +++ linux-2.6.16.20-vs2.1.1-rc22/Documentation/vserver/debug.txt        2006-04-26 19:06:59 +0200
4 @@ -0,0 +1,108 @@
5 +
6 +debug_cvirt:
7 +
8 + 2   4 "vx_map_tgid: %p/%llx: %d -> %d"
9 +       "vx_rmap_tgid: %p/%llx: %d -> %d"
10 +
11 +debug_dlim:
12 +
13 + 0   1 "ALLOC (%p,#%d)%c inode (%d)"
14 +       "FREE  (%p,#%d)%c inode"
15 + 1   2 "ALLOC (%p,#%d)%c %lld bytes (%d)"
16 +       "FREE  (%p,#%d)%c %lld bytes"
17 + 2   4 "ADJUST: %lld,%lld on %d,%d [mult=%d]"
18 + 3   8 "ext3_has_free_blocks(%p): %u<%u+1, %c, %u!=%u r=%d"
19 +       "ext3_has_free_blocks(%p): free=%u, root=%u"
20 +       "rcu_free_dl_info(%p)"
21 + 4  10 "alloc_dl_info(%p,%d) = %p"
22 +       "dealloc_dl_info(%p)"
23 +       "get_dl_info(%p[#%d.%d])"
24 +       "put_dl_info(%p[#%d.%d])"
25 + 5  20 "alloc_dl_info(%p,%d)*"
26 + 6  40 "__hash_dl_info: %p[#%d]"
27 +       "__unhash_dl_info: %p[#%d]"
28 + 7  80 "locate_dl_info(%p,#%d) = %p"
29 +
30 +debug_net:
31 +
32 + 2   4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d"
33 + 3   8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
34 +       "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
35 + 4  10 "ip_route_connect(%p) %p,%p;%lx"
36 + 5  20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
37 + 6  40 "sk,egf: %p [#%d] (from %d)"
38 +       "sk,egn: %p [#%d] (from %d)"
39 +       "sk,req: %p [#%d] (from %d)"
40 +       "sk: %p [#%d] (from %d)"
41 +       "tw: %p [#%d] (from %d)"
42 + 7  80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
43 +       "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
44 +
45 +debug_nid:
46 +
47 + 0   1 "__lookup_nx_info(#%u): %p[#%u]"
48 +       "alloc_nx_info(%d) = %p"
49 +       "create_nx_info(%d) (dynamic rejected)"
50 +       "create_nx_info(%d) = %p (already there)"
51 +       "create_nx_info(%d) = %p (new)"
52 +       "dealloc_nx_info(%p)"
53 + 1   2 "alloc_nx_info(%d)*"
54 +       "create_nx_info(%d)*"
55 + 2   4 "get_nx_info(%p[#%d.%d])"
56 +       "put_nx_info(%p[#%d.%d])"
57 + 3   8 "claim_nx_info(%p[#%d.%d.%d]) %p"
58 +       "clr_nx_info(%p[#%d.%d])"
59 +       "init_nx_info(%p[#%d.%d])"
60 +       "release_nx_info(%p[#%d.%d.%d]) %p"
61 +       "set_nx_info(%p[#%d.%d])"
62 + 4  10 "__hash_nx_info: %p[#%d]"
63 +       "__nx_dynamic_id: [#%d]"
64 +       "__unhash_nx_info: %p[#%d]"
65 + 5  20 "moved task %p into nxi:%p[#%d]"
66 +       "nx_migrate_task(%p,%p[#%d.%d.%d])"
67 +       "task_get_nx_info(%p)"
68 +
69 +debug_switch:
70 +
71 + 0   1 "vc: VCMD_%02d_%d[%d], %d,%p,%d"
72 + 1   2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld)"
73 + 4  10 "%s: (%s %s) returned %s with %d"
74 +
75 +debug_xid:
76 +
77 + 0   1 "__lookup_vx_info(#%u): %p[#%u]"
78 +       "alloc_vx_info(%d) = %p"
79 +       "alloc_vx_info(%d)*"
80 +       "create_vx_info(%d) (dynamic rejected)"
81 +       "create_vx_info(%d) = %p (already there)"
82 +       "create_vx_info(%d) = %p (new)"
83 +       "dealloc_vx_info(%p)"
84 + 1   2 "create_vx_info(%d)*"
85 + 2   4 "get_vx_info(%p[#%d.%d])"
86 +       "put_vx_info(%p[#%d.%d])"
87 + 3   8 "claim_vx_info(%p[#%d.%d.%d]) %p"
88 +       "clr_vx_info(%p[#%d.%d])"
89 +       "init_vx_info(%p[#%d.%d])"
90 +       "release_vx_info(%p[#%d.%d.%d]) %p"
91 +       "set_vx_info(%p[#%d.%d])"
92 + 4  10 "__hash_vx_info: %p[#%d]"
93 +       "__unhash_vx_info: %p[#%d]"
94 +       "__vx_dynamic_id: [#%d]"
95 + 5  20 "moved task %p into vxi:%p[#%d]"
96 +       "task_get_vx_info(%p)"
97 +       "vx_migrate_task(%p,%p[#%d.%d])"
98 + 6  40 "vx_set_init(%p[#%d],%p[#%d,%d,%d])"
99 +       "vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
100 +       "vx_set_reaper(%p[#%d],%p[#%d,%d])"
101 + 7  80 "vx_parse_xid(»%s«): %d:#%d"
102 +       "vx_propagate_xid(%p[#%lu.%d]): %d,%d"
103 +
104 +
105 +debug_limit:
106 +
107 + n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s"
108 +       "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
109 +
110 + m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s"
111 +       "vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
112 +       "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
113 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/Kconfig
114 --- linux-2.6.16.20/arch/alpha/Kconfig  2006-02-18 14:39:40 +0100
115 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/Kconfig     2006-04-26 19:06:59 +0200
116 @@ -619,6 +619,8 @@ source "arch/alpha/oprofile/Kconfig"
117  
118  source "arch/alpha/Kconfig.debug"
119  
120 +source "kernel/vserver/Kconfig"
121 +
122  source "security/Kconfig"
123  
124  source "crypto/Kconfig"
125 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/asm-offsets.c linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/asm-offsets.c
126 --- linux-2.6.16.20/arch/alpha/kernel/asm-offsets.c     2006-02-15 13:54:10 +0100
127 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/asm-offsets.c        2006-04-26 19:06:59 +0200
128 @@ -36,6 +36,7 @@ void foo(void)
129         DEFINE(PT_PTRACED, PT_PTRACED);
130         DEFINE(CLONE_VM, CLONE_VM);
131         DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
132 +       DEFINE(CLONE_KTHREAD, CLONE_KTHREAD);
133         DEFINE(SIGCHLD, SIGCHLD);
134         BLANK();
135  
136 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/entry.S linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/entry.S
137 --- linux-2.6.16.20/arch/alpha/kernel/entry.S   2006-04-09 13:49:39 +0200
138 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/entry.S      2006-04-26 19:06:59 +0200
139 @@ -645,7 +645,7 @@ kernel_thread:
140         stq     $2, 152($sp)            /* HAE */
141  
142         /* Shuffle FLAGS to the front; add CLONE_VM.  */
143 -       ldi     $1, CLONE_VM|CLONE_UNTRACED
144 +       ldi     $1, CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD;
145         or      $18, $1, $16
146         bsr     $26, sys_clone
147  
148 @@ -874,24 +874,15 @@ sys_getxgid:
149         .globl  sys_getxpid
150         .ent    sys_getxpid
151  sys_getxpid:
152 +       lda     $sp, -16($sp)
153 +       stq     $26, 0($sp)
154         .prologue 0
155 -       ldq     $2, TI_TASK($8)
156  
157 -       /* See linux/kernel/timer.c sys_getppid for discussion
158 -          about this loop.  */
159 -       ldq     $3, TASK_GROUP_LEADER($2)
160 -       ldq     $4, TASK_REAL_PARENT($3)
161 -       ldl     $0, TASK_TGID($2)
162 -1:     ldl     $1, TASK_TGID($4)
163 -#ifdef CONFIG_SMP
164 -       mov     $4, $5
165 -       mb
166 -       ldq     $3, TASK_GROUP_LEADER($2)
167 -       ldq     $4, TASK_REAL_PARENT($3)
168 -       cmpeq   $4, $5, $5
169 -       beq     $5, 1b
170 -#endif
171 -       stq     $1, 80($sp)
172 +       lda     $16, 96($sp)
173 +       jsr     $26, do_getxpid
174 +       ldq     $26, 0($sp)
175 +
176 +       lda     $sp, 16($sp)
177         ret
178  .end sys_getxpid
179  
180 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/osf_sys.c linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/osf_sys.c
181 --- linux-2.6.16.20/arch/alpha/kernel/osf_sys.c 2006-02-15 13:54:10 +0100
182 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/osf_sys.c    2006-05-29 16:49:23 +0200
183 @@ -38,6 +38,7 @@
184  #include <linux/uio.h>
185  #include <linux/vfs.h>
186  #include <linux/rcupdate.h>
187 +#include <linux/vs_cvirt.h>
188  
189  #include <asm/fpu.h>
190  #include <asm/io.h>
191 @@ -399,18 +400,20 @@ asmlinkage int
192  osf_utsname(char __user *name)
193  {
194         int error;
195 +       struct new_utsname *ptr;
196  
197         down_read(&uts_sem);
198 +       ptr = vx_new_utsname();
199         error = -EFAULT;
200 -       if (copy_to_user(name + 0, system_utsname.sysname, 32))
201 +       if (copy_to_user(name + 0, ptr->sysname, 32))
202                 goto out;
203 -       if (copy_to_user(name + 32, system_utsname.nodename, 32))
204 +       if (copy_to_user(name + 32, ptr->nodename, 32))
205                 goto out;
206 -       if (copy_to_user(name + 64, system_utsname.release, 32))
207 +       if (copy_to_user(name + 64, ptr->release, 32))
208                 goto out;
209 -       if (copy_to_user(name + 96, system_utsname.version, 32))
210 +       if (copy_to_user(name + 96, ptr->version, 32))
211                 goto out;
212 -       if (copy_to_user(name + 128, system_utsname.machine, 32))
213 +       if (copy_to_user(name + 128, ptr->machine, 32))
214                 goto out;
215  
216         error = 0;
217 @@ -439,6 +442,7 @@ osf_getdomainname(char __user *name, int
218  {
219         unsigned len;
220         int i;
221 +       char *domainname;
222  
223         if (!access_ok(VERIFY_WRITE, name, namelen))
224                 return -EFAULT;
225 @@ -448,9 +452,10 @@ osf_getdomainname(char __user *name, int
226                 len = 32;
227  
228         down_read(&uts_sem);
229 +       domainname = vx_new_uts(domainname);
230         for (i = 0; i < len; ++i) {
231 -               __put_user(system_utsname.domainname[i], name + i);
232 -               if (system_utsname.domainname[i] == '\0')
233 +               __put_user(domainname[i], name + i);
234 +               if (domainname[i] == '\0')
235                         break;
236         }
237         up_read(&uts_sem);
238 @@ -607,30 +612,30 @@ osf_sigstack(struct sigstack __user *uss
239  asmlinkage long
240  osf_sysinfo(int command, char __user *buf, long count)
241  {
242 -       static char * sysinfo_table[] = {
243 -               system_utsname.sysname,
244 -               system_utsname.nodename,
245 -               system_utsname.release,
246 -               system_utsname.version,
247 -               system_utsname.machine,
248 -               "alpha",        /* instruction set architecture */
249 -               "dummy",        /* hardware serial number */
250 -               "dummy",        /* hardware manufacturer */
251 -               "dummy",        /* secure RPC domain */
252 -       };
253         unsigned long offset;
254         char *res;
255         long len, err = -EINVAL;
256  
257         offset = command-1;
258 -       if (offset >= sizeof(sysinfo_table)/sizeof(char *)) {
259 +       if (offset >= 9) {
260                 /* Digital UNIX has a few unpublished interfaces here */
261                 printk("sysinfo(%d)", command);
262                 goto out;
263         }
264         
265         down_read(&uts_sem);
266 -       res = sysinfo_table[offset];
267 +       switch (offset)
268 +       {
269 +       case 0: res = vx_new_uts(sysname);  break;
270 +       case 1: res = vx_new_uts(nodename); break;
271 +       case 2: res = vx_new_uts(release);  break;
272 +       case 3: res = vx_new_uts(version);  break;
273 +       case 4: res = vx_new_uts(machine);  break;
274 +       case 5: res = "alpha";              break;
275 +       default:
276 +               res = "dummy";
277 +               break;
278 +       }
279         len = strlen(res)+1;
280         if (len > count)
281                 len = count;
282 @@ -882,7 +887,7 @@ osf_gettimeofday(struct timeval32 __user
283  {
284         if (tv) {
285                 struct timeval ktv;
286 -               do_gettimeofday(&ktv);
287 +               vx_gettimeofday(&ktv);
288                 if (put_tv32(tv, &ktv))
289                         return -EFAULT;
290         }
291 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/ptrace.c
292 --- linux-2.6.16.20/arch/alpha/kernel/ptrace.c  2006-04-09 13:49:39 +0200
293 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/ptrace.c     2006-04-28 05:07:10 +0200
294 @@ -283,6 +283,11 @@ do_sys_ptrace(long request, long pid, lo
295                 goto out_notsk;
296         }
297  
298 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
299 +               ret = -EPERM;
300 +               goto out;
301 +       }
302 +
303         if (request == PTRACE_ATTACH) {
304                 ret = ptrace_attach(child);
305                 goto out;
306 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/systbls.S linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/systbls.S
307 --- linux-2.6.16.20/arch/alpha/kernel/systbls.S 2005-08-29 22:24:49 +0200
308 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/systbls.S    2006-04-26 19:06:59 +0200
309 @@ -447,7 +447,7 @@ sys_call_table:
310         .quad sys_stat64                        /* 425 */
311         .quad sys_lstat64
312         .quad sys_fstat64
313 -       .quad sys_ni_syscall                    /* sys_vserver */
314 +       .quad sys_vserver                       /* sys_vserver */
315         .quad sys_ni_syscall                    /* sys_mbind */
316         .quad sys_ni_syscall                    /* sys_get_mempolicy */
317         .quad sys_ni_syscall                    /* sys_set_mempolicy */
318 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/traps.c
319 --- linux-2.6.16.20/arch/alpha/kernel/traps.c   2005-10-28 20:49:08 +0200
320 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/kernel/traps.c      2006-04-26 19:06:59 +0200
321 @@ -183,7 +183,8 @@ die_if_kernel(char * str, struct pt_regs
322  #ifdef CONFIG_SMP
323         printk("CPU %d ", hard_smp_processor_id());
324  #endif
325 -       printk("%s(%d): %s %ld\n", current->comm, current->pid, str, err);
326 +       printk("%s(%d[#%u]): %s %ld\n", current->comm,
327 +               current->pid, current->xid, str, err);
328         dik_show_regs(regs, r9_15);
329         dik_show_trace((unsigned long *)(regs+1));
330         dik_show_code((unsigned int *)regs->pc);
331 diff -NurpP --minimal linux-2.6.16.20/arch/alpha/mm/init.c linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/mm/init.c
332 --- linux-2.6.16.20/arch/alpha/mm/init.c        2006-02-18 14:39:40 +0100
333 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/alpha/mm/init.c   2006-04-26 19:06:59 +0200
334 @@ -21,6 +21,7 @@
335  #include <linux/init.h>
336  #include <linux/bootmem.h> /* max_low_pfn */
337  #include <linux/vmalloc.h>
338 +#include <linux/pagemap.h>
339  
340  #include <asm/system.h>
341  #include <asm/uaccess.h>
342 diff -NurpP --minimal linux-2.6.16.20/arch/arm/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/arm/Kconfig
343 --- linux-2.6.16.20/arch/arm/Kconfig    2006-04-09 13:49:39 +0200
344 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm/Kconfig       2006-04-26 19:06:59 +0200
345 @@ -827,6 +827,8 @@ source "arch/arm/oprofile/Kconfig"
346  
347  source "arch/arm/Kconfig.debug"
348  
349 +source "kernel/vserver/Kconfig"
350 +
351  source "security/Kconfig"
352  
353  source "crypto/Kconfig"
354 diff -NurpP --minimal linux-2.6.16.20/arch/arm/kernel/calls.S linux-2.6.16.20-vs2.1.1-rc22/arch/arm/kernel/calls.S
355 --- linux-2.6.16.20/arch/arm/kernel/calls.S     2006-02-18 14:39:40 +0100
356 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm/kernel/calls.S        2006-04-26 19:06:59 +0200
357 @@ -322,7 +322,7 @@
358  /* 310 */      CALL(sys_request_key)
359                 CALL(sys_keyctl)
360                 CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
361 -/* vserver */  CALL(sys_ni_syscall)
362 +               CALL(sys_vserver)
363                 CALL(sys_ioprio_set)
364  /* 315 */      CALL(sys_ioprio_get)
365                 CALL(sys_inotify_init)
366 diff -NurpP --minimal linux-2.6.16.20/arch/arm/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/arm/kernel/process.c
367 --- linux-2.6.16.20/arch/arm/kernel/process.c   2006-04-09 13:49:40 +0200
368 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm/kernel/process.c      2006-04-26 19:06:59 +0200
369 @@ -227,7 +227,8 @@ void __show_regs(struct pt_regs *regs)
370  void show_regs(struct pt_regs * regs)
371  {
372         printk("\n");
373 -       printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
374 +       printk("Pid: %d[#%u], comm: %20s\n",
375 +               current->pid, current->xid, current->comm);
376         __show_regs(regs);
377         __backtrace();
378  }
379 @@ -448,7 +449,8 @@ pid_t kernel_thread(int (*fn)(void *), v
380         regs.ARM_pc = (unsigned long)kernel_thread_helper;
381         regs.ARM_cpsr = SVC_MODE;
382  
383 -       return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
384 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
385 +               0, &regs, 0, NULL, NULL);
386  }
387  EXPORT_SYMBOL(kernel_thread);
388  
389 diff -NurpP --minimal linux-2.6.16.20/arch/arm26/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/Kconfig
390 --- linux-2.6.16.20/arch/arm26/Kconfig  2006-02-18 14:39:41 +0100
391 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/Kconfig     2006-04-26 19:06:59 +0200
392 @@ -230,6 +230,8 @@ source "drivers/usb/Kconfig"
393  
394  source "arch/arm26/Kconfig.debug"
395  
396 +source "kernel/vserver/Kconfig"
397 +
398  source "security/Kconfig"
399  
400  source "crypto/Kconfig"
401 diff -NurpP --minimal linux-2.6.16.20/arch/arm26/kernel/calls.S linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/calls.S
402 --- linux-2.6.16.20/arch/arm26/kernel/calls.S   2005-03-02 12:38:19 +0100
403 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/calls.S      2006-04-26 19:06:59 +0200
404 @@ -257,6 +257,11 @@ __syscall_start:
405                 .long   sys_lremovexattr
406                 .long   sys_fremovexattr
407                 .long   sys_tkill
408 +
409 +               .rept   313 - (. - __syscall_start) / 4
410 +                       .long   sys_ni_syscall
411 +               .endr
412 +               .long   sys_vserver     /* 313 */
413  __syscall_end:
414  
415                 .rept   NR_syscalls - (__syscall_end - __syscall_start) / 4
416 diff -NurpP --minimal linux-2.6.16.20/arch/arm26/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/process.c
417 --- linux-2.6.16.20/arch/arm26/kernel/process.c 2006-01-18 06:07:51 +0100
418 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/process.c    2006-04-26 19:06:59 +0200
419 @@ -366,7 +366,8 @@ pid_t kernel_thread(int (*fn)(void *), v
420          regs.ARM_r3 = (unsigned long)do_exit;
421          regs.ARM_pc = (unsigned long)kernel_thread_helper | MODE_SVC26;
422  
423 -        return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
424 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
425 +               0, &regs, 0, NULL, NULL);
426  }
427  EXPORT_SYMBOL(kernel_thread);
428  
429 diff -NurpP --minimal linux-2.6.16.20/arch/arm26/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/traps.c
430 --- linux-2.6.16.20/arch/arm26/kernel/traps.c   2006-01-18 06:07:51 +0100
431 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/arm26/kernel/traps.c      2006-04-26 19:06:59 +0200
432 @@ -186,8 +186,9 @@ NORET_TYPE void die(const char *str, str
433         printk("Internal error: %s: %x\n", str, err);
434         printk("CPU: %d\n", smp_processor_id());
435         show_regs(regs);
436 -       printk("Process %s (pid: %d, stack limit = 0x%p)\n",
437 -               current->comm, current->pid, end_of_stack(tsk));
438 +       printk("Process %s (pid: %d[#%u], stack limit = 0x%p)\n",
439 +               current->comm, current->pid,
440 +               current->xid, end_of_stack(tsk));
441  
442         if (!user_mode(regs) || in_interrupt()) {
443                 __dump_stack(tsk, (unsigned long)(regs + 1));
444 diff -NurpP --minimal linux-2.6.16.20/arch/cris/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/cris/Kconfig
445 --- linux-2.6.16.20/arch/cris/Kconfig   2006-02-18 14:39:42 +0100
446 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/cris/Kconfig      2006-04-26 19:06:59 +0200
447 @@ -173,6 +173,8 @@ source "drivers/usb/Kconfig"
448  
449  source "arch/cris/Kconfig.debug"
450  
451 +source "kernel/vserver/Kconfig"
452 +
453  source "security/Kconfig"
454  
455  source "crypto/Kconfig"
456 diff -NurpP --minimal linux-2.6.16.20/arch/cris/arch-v10/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/cris/arch-v10/kernel/process.c
457 --- linux-2.6.16.20/arch/cris/arch-v10/kernel/process.c 2006-01-18 06:07:51 +0100
458 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/cris/arch-v10/kernel/process.c    2006-04-26 19:06:59 +0200
459 @@ -104,7 +104,8 @@ int kernel_thread(int (*fn)(void *), voi
460         regs.dccr = 1 << I_DCCR_BITNR;
461  
462         /* Ok, create the new process.. */
463 -        return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
464 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
465 +               0, &regs, 0, NULL, NULL);
466  }
467  
468  /* setup the child's kernel stack with a pt_regs and switch_stack on it.
469 diff -NurpP --minimal linux-2.6.16.20/arch/cris/arch-v32/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/cris/arch-v32/kernel/process.c
470 --- linux-2.6.16.20/arch/cris/arch-v32/kernel/process.c 2006-01-18 06:07:51 +0100
471 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/cris/arch-v32/kernel/process.c    2006-04-26 19:06:59 +0200
472 @@ -121,7 +121,8 @@ kernel_thread(int (*fn)(void *), void * 
473         regs.ccs = 1 << (I_CCS_BITNR + CCS_SHIFT);
474  
475         /* Create the new process. */
476 -        return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
477 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
478 +               0, &regs, 0, NULL, NULL);
479  }
480  
481  /*
482 diff -NurpP --minimal linux-2.6.16.20/arch/frv/kernel/kernel_thread.S linux-2.6.16.20-vs2.1.1-rc22/arch/frv/kernel/kernel_thread.S
483 --- linux-2.6.16.20/arch/frv/kernel/kernel_thread.S     2005-03-02 12:38:20 +0100
484 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/frv/kernel/kernel_thread.S        2006-04-26 19:06:59 +0200
485 @@ -13,6 +13,8 @@
486  #include <asm/unistd.h>
487  
488  #define CLONE_VM       0x00000100      /* set if VM shared between processes */
489 +#define CLONE_KTHREAD  0x10000000      /* kernel thread */
490 +#define CLONE_KT       (CLONE_VM | CLONE_KTHREAD)      /* kernel thread flags */
491  #define        KERN_ERR        "<3>"
492  
493         .section .rodata
494 @@ -37,7 +39,7 @@ kernel_thread:
495  
496         # start by forking the current process, but with shared VM
497         setlos.p        #__NR_clone,gr7         ; syscall number
498 -       ori             gr10,#CLONE_VM,gr8      ; first syscall arg     [clone_flags]
499 +       ori             gr10,#CLONE_KT,gr8      ; first syscall arg     [clone_flags]
500         sethi.p         #0xe4e4,gr9             ; second syscall arg    [newsp]
501         setlo           #0xe4e4,gr9
502         setlos.p        #0,gr10                 ; third syscall arg     [parent_tidptr]
503 diff -NurpP --minimal linux-2.6.16.20/arch/frv/mm/mmu-context.c linux-2.6.16.20-vs2.1.1-rc22/arch/frv/mm/mmu-context.c
504 --- linux-2.6.16.20/arch/frv/mm/mmu-context.c   2005-03-02 12:38:20 +0100
505 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/frv/mm/mmu-context.c      2006-04-26 19:06:59 +0200
506 @@ -11,6 +11,7 @@
507  
508  #include <linux/sched.h>
509  #include <linux/mm.h>
510 +#include <linux/vs_pid.h>
511  #include <asm/tlbflush.h>
512  
513  #define NR_CXN 4096
514 diff -NurpP --minimal linux-2.6.16.20/arch/h8300/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/h8300/Kconfig
515 --- linux-2.6.16.20/arch/h8300/Kconfig  2006-02-18 14:39:42 +0100
516 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/h8300/Kconfig     2006-04-26 19:06:59 +0200
517 @@ -191,6 +191,8 @@ source "fs/Kconfig"
518  
519  source "arch/h8300/Kconfig.debug"
520  
521 +source "kernel/vserver/Kconfig"
522 +
523  source "security/Kconfig"
524  
525  source "crypto/Kconfig"
526 diff -NurpP --minimal linux-2.6.16.20/arch/h8300/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/h8300/kernel/process.c
527 --- linux-2.6.16.20/arch/h8300/kernel/process.c 2006-04-09 13:49:41 +0200
528 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/h8300/kernel/process.c    2006-04-26 19:06:59 +0200
529 @@ -135,7 +135,7 @@ int kernel_thread(int (*fn)(void *), voi
530  
531         fs = get_fs();
532         set_fs (KERNEL_DS);
533 -       clone_arg = flags | CLONE_VM;
534 +       clone_arg = flags | CLONE_VM | CLONE_KTHREAD;
535         __asm__("mov.l sp,er3\n\t"
536                 "sub.l er2,er2\n\t"
537                 "mov.l %2,er1\n\t"
538 diff -NurpP --minimal linux-2.6.16.20/arch/i386/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/i386/Kconfig
539 --- linux-2.6.16.20/arch/i386/Kconfig   2006-04-09 13:49:41 +0200
540 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/Kconfig      2006-04-26 19:06:59 +0200
541 @@ -466,23 +466,43 @@ choice
542           will also likely make your kernel incompatible with binary-only
543           kernel modules.
544  
545 -         If you are not absolutely sure what you are doing, leave this
546 -         option alone!
547 -
548         config VMSPLIT_3G
549 -               bool "3G/1G user/kernel split"
550 -       config VMSPLIT_3G_OPT
551 -               bool "3G/1G user/kernel split (for full 1G low memory)"
552 +               bool "3G/1G user/kernel split (Default)"
553 +               help
554 +                 This is the default split of 3GB userspace to 1GB kernel
555 +                 space, which will result in about 860MB of lowmem.
556 +
557 +       config VMSPLIT_25G
558 +               bool "2.5G/1.5G user/kernel split"
559 +               help
560 +                 This split provides 2.5GB userspace and 1.5GB kernel
561 +                 space, which will result in about 1370MB of lowmem.
562 +
563         config VMSPLIT_2G
564                 bool "2G/2G user/kernel split"
565 +               help
566 +                 This split provides 2GB userspace and 2GB kernel
567 +                 space, which will result in about 1880MB of lowmem.
568 +
569 +       config VMSPLIT_15G
570 +               bool "1.5G/2.5G user/kernel split"
571 +               help
572 +                 This split provides 1.5GB userspace and 2.5GB kernel
573 +                 space, which will result in about 2390MB of lowmem.
574 +
575         config VMSPLIT_1G
576                 bool "1G/3G user/kernel split"
577 +               help
578 +                 This split provides 1GB userspace and 3GB kernel
579 +                 space, which will result in about 2900MB of lowmem.
580 +
581  endchoice
582  
583  config PAGE_OFFSET
584         hex
585 -       default 0xB0000000 if VMSPLIT_3G_OPT
586 -       default 0x78000000 if VMSPLIT_2G
587 +       default 0xA0000000 if VMSPLIT_25G
588 +       default 0x80000000 if VMSPLIT_2G
589 +       default 0x60000000 if VMSPLIT_15G
590         default 0x40000000 if VMSPLIT_1G
591         default 0xC0000000
592  
593 @@ -1071,6 +1091,8 @@ endmenu
594  
595  source "arch/i386/Kconfig.debug"
596  
597 +source "kernel/vserver/Kconfig"
598 +
599  source "security/Kconfig"
600  
601  source "crypto/Kconfig"
602 diff -NurpP --minimal linux-2.6.16.20/arch/i386/boot/compressed/misc.c linux-2.6.16.20-vs2.1.1-rc22/arch/i386/boot/compressed/misc.c
603 --- linux-2.6.16.20/arch/i386/boot/compressed/misc.c    2006-04-09 13:49:42 +0200
604 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/boot/compressed/misc.c       2006-04-26 19:06:59 +0200
605 @@ -309,7 +309,7 @@ static void setup_normal_output_buffer(v
606  #else
607         if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
608  #endif
609 -       output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */
610 +       output_data = (char *)PHYSICAL_START; /* Normally Points to 1M */
611         free_mem_end_ptr = (long)real_mode;
612  }
613  
614 @@ -334,8 +334,8 @@ static void setup_output_buffer_if_we_ru
615         low_buffer_size = low_buffer_end - LOW_BUFFER_START;
616         high_loaded = 1;
617         free_mem_end_ptr = (long)high_buffer_start;
618 -       if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
619 -               high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
620 +       if ((PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
621 +               high_buffer_start = (uch *)(PHYSICAL_START + low_buffer_size);
622                 mv->hcount = 0; /* say: we need not to move high_buffer */
623         }
624         else mv->hcount = -1;
625 diff -NurpP --minimal linux-2.6.16.20/arch/i386/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/process.c
626 --- linux-2.6.16.20/arch/i386/kernel/process.c  2006-02-15 13:54:10 +0100
627 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/process.c     2006-04-26 19:06:59 +0200
628 @@ -291,8 +291,10 @@ void show_regs(struct pt_regs * regs)
629         unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
630  
631         printk("\n");
632 -       printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
633 -       printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
634 +       printk("Pid: %d[#%u], comm: %20s\n",
635 +               current->pid, current->xid, current->comm);
636 +       printk("EIP: %04x:[<%08lx>] CPU: %d\n",
637 +               0xffff & regs->xcs,regs->eip, smp_processor_id());
638         print_symbol("EIP is at %s\n", regs->eip);
639  
640         if (user_mode(regs))
641 @@ -352,7 +354,8 @@ int kernel_thread(int (*fn)(void *), voi
642         regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
643  
644         /* Ok, create the new process.. */
645 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
646 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
647 +               0, &regs, 0, NULL, NULL);
648  }
649  EXPORT_SYMBOL(kernel_thread);
650  
651 diff -NurpP --minimal linux-2.6.16.20/arch/i386/kernel/setup.c linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/setup.c
652 --- linux-2.6.16.20/arch/i386/kernel/setup.c    2006-04-09 13:49:42 +0200
653 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/setup.c       2006-04-26 19:06:59 +0200
654 @@ -1192,8 +1192,8 @@ void __init setup_bootmem_allocator(void
655          * the (very unlikely) case of us accidentally initializing the
656          * bootmem allocator with an invalid RAM area.
657          */
658 -       reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
659 -                        bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
660 +       reserve_bootmem(PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
661 +                        bootmap_size + PAGE_SIZE-1) - (PHYSICAL_START));
662  
663         /*
664          * reserve physical page 0 - it's a special BIOS page on many boxes,
665 diff -NurpP --minimal linux-2.6.16.20/arch/i386/kernel/sys_i386.c linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/sys_i386.c
666 --- linux-2.6.16.20/arch/i386/kernel/sys_i386.c 2004-08-14 12:56:23 +0200
667 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/sys_i386.c    2006-04-26 19:06:59 +0200
668 @@ -19,6 +19,7 @@
669  #include <linux/mman.h>
670  #include <linux/file.h>
671  #include <linux/utsname.h>
672 +#include <linux/vs_cvirt.h>
673  
674  #include <asm/uaccess.h>
675  #include <asm/ipc.h>
676 @@ -217,7 +218,7 @@ asmlinkage int sys_uname(struct old_utsn
677         if (!name)
678                 return -EFAULT;
679         down_read(&uts_sem);
680 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
681 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
682         up_read(&uts_sem);
683         return err?-EFAULT:0;
684  }
685 @@ -225,6 +226,7 @@ asmlinkage int sys_uname(struct old_utsn
686  asmlinkage int sys_olduname(struct oldold_utsname __user * name)
687  {
688         int error;
689 +       struct new_utsname *ptr;
690  
691         if (!name)
692                 return -EFAULT;
693 @@ -233,15 +235,16 @@ asmlinkage int sys_olduname(struct oldol
694    
695         down_read(&uts_sem);
696         
697 -       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
698 +       ptr = vx_new_utsname();
699 +       error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
700         error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
701 -       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
702 +       error |= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
703         error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
704 -       error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
705 +       error |= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
706         error |= __put_user(0,name->release+__OLD_UTS_LEN);
707 -       error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
708 +       error |= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
709         error |= __put_user(0,name->version+__OLD_UTS_LEN);
710 -       error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
711 +       error |= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
712         error |= __put_user(0,name->machine+__OLD_UTS_LEN);
713         
714         up_read(&uts_sem);
715 diff -NurpP --minimal linux-2.6.16.20/arch/i386/kernel/syscall_table.S linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/syscall_table.S
716 --- linux-2.6.16.20/arch/i386/kernel/syscall_table.S    2006-02-18 14:39:43 +0100
717 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/syscall_table.S       2006-04-26 19:06:59 +0200
718 @@ -272,7 +272,7 @@ ENTRY(sys_call_table)
719         .long sys_tgkill        /* 270 */
720         .long sys_utimes
721         .long sys_fadvise64_64
722 -       .long sys_ni_syscall    /* sys_vserver */
723 +       .long sys_vserver
724         .long sys_mbind
725         .long sys_get_mempolicy
726         .long sys_set_mempolicy
727 diff -NurpP --minimal linux-2.6.16.20/arch/i386/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/traps.c
728 --- linux-2.6.16.20/arch/i386/kernel/traps.c    2006-02-18 14:39:43 +0100
729 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/i386/kernel/traps.c       2006-04-26 19:06:59 +0200
730 @@ -53,6 +53,8 @@
731  #include <asm/kdebug.h>
732  
733  #include <linux/module.h>
734 +#include <linux/vserver/debug.h>
735 +#include <linux/vserver/history.h>
736  
737  #include "mach_traps.h"
738  
739 @@ -252,8 +254,9 @@ void show_registers(struct pt_regs *regs
740                 regs->esi, regs->edi, regs->ebp, esp);
741         printk(KERN_EMERG "ds: %04x   es: %04x   ss: %04x\n",
742                 regs->xds & 0xffff, regs->xes & 0xffff, ss);
743 -       printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)",
744 -               current->comm, current->pid, current_thread_info(), current);
745 +       printk(KERN_EMERG "Process %s (pid: %d[#%u], threadinfo=%p task=%p)",
746 +               current->comm, current->pid, current->xid,
747 +               current_thread_info(), current);
748         /*
749          * When in-kernel, we also print out the stack and code at the
750          * time of the fault..
751 @@ -333,6 +336,8 @@ void die(const char * str, struct pt_reg
752         static int die_counter;
753         unsigned long flags;
754  
755 +       vxh_throw_oops();
756 +
757         if (die.lock_owner != raw_smp_processor_id()) {
758                 console_verbose();
759                 spin_lock_irqsave(&die.lock, flags);
760 @@ -365,8 +370,9 @@ void die(const char * str, struct pt_reg
761  #endif
762                 if (nl)
763                         printk("\n");
764 -       notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
765 +               notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
766                 show_registers(regs);
767 +               vxh_dump_history();
768         } else
769                 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
770  
771 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/Kconfig
772 --- linux-2.6.16.20/arch/ia64/Kconfig   2006-04-09 13:49:42 +0200
773 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/Kconfig      2006-04-26 19:06:59 +0200
774 @@ -464,6 +464,8 @@ endmenu
775  
776  source "arch/ia64/Kconfig.debug"
777  
778 +source "kernel/vserver/Kconfig"
779 +
780  source "security/Kconfig"
781  
782  source "crypto/Kconfig"
783 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/ia32/binfmt_elf32.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/binfmt_elf32.c
784 --- linux-2.6.16.20/arch/ia64/ia32/binfmt_elf32.c       2006-01-03 17:29:09 +0100
785 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/binfmt_elf32.c  2006-04-26 19:06:59 +0200
786 @@ -236,7 +236,8 @@ ia32_setup_arg_pages (struct linux_binpr
787                         kmem_cache_free(vm_area_cachep, mpnt);
788                         return ret;
789                 }
790 -               current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
791 +               vx_vmpages_sub(current->mm, current->mm->total_vm - vma_pages(mpnt));
792 +               current->mm->stack_vm = current->mm->total_vm;
793         }
794  
795         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
796 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/ia32/ia32_entry.S linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/ia32_entry.S
797 --- linux-2.6.16.20/arch/ia64/ia32/ia32_entry.S 2006-04-09 13:49:42 +0200
798 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/ia32_entry.S    2006-04-26 19:06:59 +0200
799 @@ -483,7 +483,7 @@ ia32_syscall_table:
800         data8 sys_tgkill        /* 270 */
801         data8 compat_sys_utimes
802         data8 sys32_fadvise64_64
803 -       data8 sys_ni_syscall
804 +       data8 sys32_vserver
805         data8 sys_ni_syscall
806         data8 sys_ni_syscall    /* 275 */
807         data8 sys_ni_syscall
808 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/ia32/sys_ia32.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/sys_ia32.c
809 --- linux-2.6.16.20/arch/ia64/ia32/sys_ia32.c   2006-02-18 14:39:43 +0100
810 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/ia32/sys_ia32.c      2006-05-29 16:49:39 +0200
811 @@ -1191,7 +1191,7 @@ sys32_gettimeofday (struct compat_timeva
812  {
813         if (tv) {
814                 struct timeval ktv;
815 -               do_gettimeofday(&ktv);
816 +               vx_gettimeofday(&ktv);
817                 if (put_tv32(tv, &ktv))
818                         return -EFAULT;
819         }
820 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/asm-offsets.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/asm-offsets.c
821 --- linux-2.6.16.20/arch/ia64/kernel/asm-offsets.c      2005-10-28 20:49:10 +0200
822 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/asm-offsets.c 2006-04-26 19:06:59 +0200
823 @@ -192,6 +192,7 @@ void foo(void)
824      /* for assembly files which can't include sched.h: */
825         DEFINE(IA64_CLONE_VFORK, CLONE_VFORK);
826         DEFINE(IA64_CLONE_VM, CLONE_VM);
827 +       DEFINE(IA64_CLONE_KTHREAD, CLONE_KTHREAD);
828  
829         BLANK();
830         DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET,
831 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/entry.S linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/entry.S
832 --- linux-2.6.16.20/arch/ia64/kernel/entry.S    2006-02-18 14:39:43 +0100
833 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/entry.S       2006-04-26 19:06:59 +0200
834 @@ -1591,7 +1591,7 @@ sys_call_table:
835         data8 sys_mq_notify
836         data8 sys_mq_getsetattr
837         data8 sys_ni_syscall                    // reserved for kexec_load
838 -       data8 sys_ni_syscall                    // reserved for vserver
839 +       data8 sys_vserver
840         data8 sys_waitid                        // 1270
841         data8 sys_add_key
842         data8 sys_request_key
843 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/perfmon.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/perfmon.c
844 --- linux-2.6.16.20/arch/ia64/kernel/perfmon.c  2006-02-18 14:39:43 +0100
845 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/perfmon.c     2006-04-26 19:06:59 +0200
846 @@ -41,6 +41,8 @@
847  #include <linux/capability.h>
848  #include <linux/rcupdate.h>
849  #include <linux/completion.h>
850 +#include <linux/vs_memory.h>
851 +#include <linux/vs_pid.h>
852  
853  #include <asm/errno.h>
854  #include <asm/intrinsics.h>
855 @@ -2355,7 +2357,7 @@ pfm_smpl_buffer_alloc(struct task_struct
856          */
857         insert_vm_struct(mm, vma);
858  
859 -       mm->total_vm  += size >> PAGE_SHIFT;
860 +       vx_vmpages_add(mm, size >> PAGE_SHIFT);
861         vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
862                                                         vma_pages(vma));
863         up_write(&task->mm->mmap_sem);
864 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/process.c
865 --- linux-2.6.16.20/arch/ia64/kernel/process.c  2006-01-18 06:07:53 +0100
866 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/process.c     2006-04-26 19:06:59 +0200
867 @@ -109,7 +109,8 @@ show_regs (struct pt_regs *regs)
868         unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
869  
870         print_modules();
871 -       printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
872 +       printk("\nPid: %d[#%u], CPU %d, comm: %20s\n",
873 +               current->pid, current->xid, smp_processor_id(), current->comm);
874         printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
875                regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
876         print_symbol("ip is at %s\n", ip);
877 @@ -692,7 +693,8 @@ kernel_thread (int (*fn)(void *), void *
878         regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
879         regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
880         regs.sw.pr = (1 << PRED_KERNEL_STACK);
881 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
882 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
883 +               0, &regs.pt, 0, NULL, NULL);
884  }
885  EXPORT_SYMBOL(kernel_thread);
886  
887 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/ptrace.c
888 --- linux-2.6.16.20/arch/ia64/kernel/ptrace.c   2006-02-18 14:39:43 +0100
889 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/ptrace.c      2006-04-26 19:06:59 +0200
890 @@ -18,6 +18,7 @@
891  #include <linux/security.h>
892  #include <linux/audit.h>
893  #include <linux/signal.h>
894 +#include <linux/vs_pid.h>
895  
896  #include <asm/pgtable.h>
897  #include <asm/processor.h>
898 @@ -1443,6 +1444,9 @@ sys_ptrace (long request, pid_t pid, uns
899         read_unlock(&tasklist_lock);
900         if (!child)
901                 goto out;
902 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
903 +               goto out_tsk;
904 +
905         ret = -EPERM;
906         if (pid == 1)           /* no messing around with init! */
907                 goto out_tsk;
908 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/signal.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/signal.c
909 --- linux-2.6.16.20/arch/ia64/kernel/signal.c   2006-01-18 06:07:53 +0100
910 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/signal.c      2006-04-26 19:06:59 +0200
911 @@ -21,6 +21,7 @@
912  #include <linux/binfmts.h>
913  #include <linux/unistd.h>
914  #include <linux/wait.h>
915 +#include <linux/vs_pid.h>
916  
917  #include <asm/ia32.h>
918  #include <asm/intrinsics.h>
919 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/traps.c
920 --- linux-2.6.16.20/arch/ia64/kernel/traps.c    2006-02-18 14:39:43 +0100
921 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/kernel/traps.c       2006-04-26 19:06:59 +0200
922 @@ -107,8 +107,9 @@ die (const char *str, struct pt_regs *re
923         put_cpu();
924  
925         if (++die.lock_owner_depth < 3) {
926 -               printk("%s[%d]: %s %ld [%d]\n",
927 -                       current->comm, current->pid, str, err, ++die_counter);
928 +               printk("%s[%d[#%u]]: %s %ld [%d]\n",
929 +                       current->comm, current->pid, current->xid,
930 +                       str, err, ++die_counter);
931                 (void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
932                 show_regs(regs);
933         } else
934 @@ -335,8 +336,9 @@ handle_fpu_swa (int fp_fault, struct pt_
935                 last_time = jiffies;
936                 ++fpu_swa_count;
937                 printk(KERN_WARNING
938 -                      "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
939 -                      current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr);
940 +                      "%s(%d[#%u]): floating-point assist fault at ip %016lx, isr %016lx\n",
941 +                      current->comm, current->pid, current->xid,
942 +                      regs->cr_iip + ia64_psr(regs)->ri, isr);
943         }
944  
945         exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
946 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/mm/fault.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/mm/fault.c
947 --- linux-2.6.16.20/arch/ia64/mm/fault.c        2006-01-03 17:29:09 +0100
948 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/mm/fault.c   2006-04-26 19:06:59 +0200
949 @@ -10,6 +10,7 @@
950  #include <linux/smp_lock.h>
951  #include <linux/interrupt.h>
952  #include <linux/kprobes.h>
953 +#include <linux/vs_memory.h>
954  
955  #include <asm/pgtable.h>
956  #include <asm/processor.h>
957 diff -NurpP --minimal linux-2.6.16.20/arch/ia64/sn/kernel/xpc_main.c linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/sn/kernel/xpc_main.c
958 --- linux-2.6.16.20/arch/ia64/sn/kernel/xpc_main.c      2006-02-18 14:39:44 +0100
959 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ia64/sn/kernel/xpc_main.c 2006-04-26 19:06:59 +0200
960 @@ -109,6 +109,7 @@ static ctl_table xpc_sys_xpc_hb_dir[] = 
961                 0644,
962                 NULL,
963                 &proc_dointvec_minmax,
964 +               NULL,
965                 &sysctl_intvec,
966                 NULL,
967                 &xpc_hb_min_interval,
968 @@ -122,6 +123,7 @@ static ctl_table xpc_sys_xpc_hb_dir[] = 
969                 0644,
970                 NULL,
971                 &proc_dointvec_minmax,
972 +               NULL,
973                 &sysctl_intvec,
974                 NULL,
975                 &xpc_hb_check_min_interval,
976 @@ -146,6 +148,7 @@ static ctl_table xpc_sys_xpc_dir[] = {
977                 0644,
978                 NULL,
979                 &proc_dointvec_minmax,
980 +               NULL,
981                 &sysctl_intvec,
982                 NULL,
983                 &xpc_disengage_request_min_timelimit,
984 diff -NurpP --minimal linux-2.6.16.20/arch/m32r/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/process.c
985 --- linux-2.6.16.20/arch/m32r/kernel/process.c  2006-01-18 06:07:53 +0100
986 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/process.c     2006-04-26 19:06:59 +0200
987 @@ -208,8 +208,8 @@ int kernel_thread(int (*fn)(void *), voi
988         regs.psw = M32R_PSW_BIE;
989  
990         /* Ok, create the new process. */
991 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL,
992 -               NULL);
993 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
994 +               0, &regs, 0, NULL, NULL);
995  }
996  
997  /*
998 diff -NurpP --minimal linux-2.6.16.20/arch/m32r/kernel/sys_m32r.c linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/sys_m32r.c
999 --- linux-2.6.16.20/arch/m32r/kernel/sys_m32r.c 2006-04-09 13:49:43 +0200
1000 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/sys_m32r.c    2006-05-11 16:06:22 +0200
1001 @@ -21,6 +21,7 @@
1002  #include <linux/mman.h>
1003  #include <linux/file.h>
1004  #include <linux/utsname.h>
1005 +#include <linux/vs_cvirt.h>
1006  
1007  #include <asm/uaccess.h>
1008  #include <asm/cachectl.h>
1009 @@ -206,7 +207,7 @@ asmlinkage int sys_uname(struct old_utsn
1010         if (!name)
1011                 return -EFAULT;
1012         down_read(&uts_sem);
1013 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
1014 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
1015         up_read(&uts_sem);
1016         return err?-EFAULT:0;
1017  }
1018 diff -NurpP --minimal linux-2.6.16.20/arch/m32r/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/traps.c
1019 --- linux-2.6.16.20/arch/m32r/kernel/traps.c    2005-10-28 20:49:11 +0200
1020 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m32r/kernel/traps.c       2006-04-26 19:06:59 +0200
1021 @@ -196,8 +196,9 @@ static void show_registers(struct pt_reg
1022         } else {
1023                 printk("SPI: %08lx\n", sp);
1024         }
1025 -       printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
1026 -               current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current);
1027 +       printk("Process %s (pid: %d[#%u], process nr: %d, stackpage=%08lx)",
1028 +               current->comm, current->pid, current->xid,
1029 +               0xffff & i, 4096+(unsigned long)current);
1030  
1031         /*
1032          * When in-kernel, we also print out the stack and code at the
1033 diff -NurpP --minimal linux-2.6.16.20/arch/m68k/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/Kconfig
1034 --- linux-2.6.16.20/arch/m68k/Kconfig   2006-02-18 14:39:44 +0100
1035 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/Kconfig      2006-04-26 19:06:59 +0200
1036 @@ -650,6 +650,8 @@ source "fs/Kconfig"
1037  
1038  source "arch/m68k/Kconfig.debug"
1039  
1040 +source "kernel/vserver/Kconfig"
1041 +
1042  source "security/Kconfig"
1043  
1044  source "crypto/Kconfig"
1045 diff -NurpP --minimal linux-2.6.16.20/arch/m68k/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/process.c
1046 --- linux-2.6.16.20/arch/m68k/kernel/process.c  2006-04-09 13:49:43 +0200
1047 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/process.c     2006-04-26 19:06:59 +0200
1048 @@ -160,7 +160,8 @@ int kernel_thread(int (*fn)(void *), voi
1049  
1050         {
1051         register long retval __asm__ ("d0");
1052 -       register long clone_arg __asm__ ("d1") = flags | CLONE_VM | CLONE_UNTRACED;
1053 +       register long clone_arg __asm__ ("d1") =
1054 +               flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD;
1055  
1056         retval = __NR_clone;
1057         __asm__ __volatile__
1058 diff -NurpP --minimal linux-2.6.16.20/arch/m68k/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/ptrace.c
1059 --- linux-2.6.16.20/arch/m68k/kernel/ptrace.c   2006-01-03 17:29:10 +0100
1060 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/ptrace.c      2006-04-26 19:06:59 +0200
1061 @@ -280,6 +280,8 @@ long arch_ptrace(struct task_struct *chi
1062                 ret = ptrace_request(child, request, addr, data);
1063                 break;
1064         }
1065 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
1066 +               goto out_tsk;
1067  
1068         return ret;
1069  out_eio:
1070 diff -NurpP --minimal linux-2.6.16.20/arch/m68k/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/traps.c
1071 --- linux-2.6.16.20/arch/m68k/kernel/traps.c    2006-01-18 06:07:53 +0100
1072 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68k/kernel/traps.c       2006-04-26 19:06:59 +0200
1073 @@ -1198,8 +1198,9 @@ void die_if_kernel (char *str, struct pt
1074         printk("d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
1075                fp->d4, fp->d5, fp->a0, fp->a1);
1076  
1077 -       printk("Process %s (pid: %d, stackpage=%08lx)\n",
1078 -               current->comm, current->pid, PAGE_SIZE+(unsigned long)current);
1079 +       printk("Process %s (pid: %d[#%u], stackpage=%08lx)\n",
1080 +               current->comm, current->pid, current->xid,
1081 +               PAGE_SIZE+(unsigned long)current);
1082         show_stack(NULL, (unsigned long *)fp);
1083         do_exit(SIGSEGV);
1084  }
1085 diff -NurpP --minimal linux-2.6.16.20/arch/m68knommu/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/Kconfig
1086 --- linux-2.6.16.20/arch/m68knommu/Kconfig      2006-02-18 14:39:44 +0100
1087 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/Kconfig 2006-04-26 19:06:59 +0200
1088 @@ -646,6 +646,8 @@ source "fs/Kconfig"
1089  
1090  source "arch/m68knommu/Kconfig.debug"
1091  
1092 +source "kernel/vserver/Kconfig"
1093 +
1094  source "security/Kconfig"
1095  
1096  source "crypto/Kconfig"
1097 diff -NurpP --minimal linux-2.6.16.20/arch/m68knommu/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/kernel/process.c
1098 --- linux-2.6.16.20/arch/m68knommu/kernel/process.c     2006-02-15 13:54:11 +0100
1099 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/kernel/process.c        2006-04-26 19:06:59 +0200
1100 @@ -123,7 +123,7 @@ void show_regs(struct pt_regs * regs)
1101  int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1102  {
1103         int retval;
1104 -       long clone_arg = flags | CLONE_VM;
1105 +       long clone_arg = flags | CLONE_VM | CLONE_KTHREAD;
1106         mm_segment_t fs;
1107  
1108         fs = get_fs();
1109 diff -NurpP --minimal linux-2.6.16.20/arch/m68knommu/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/kernel/traps.c
1110 --- linux-2.6.16.20/arch/m68knommu/kernel/traps.c       2005-10-28 20:49:11 +0200
1111 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/m68knommu/kernel/traps.c  2006-04-26 19:06:59 +0200
1112 @@ -81,8 +81,9 @@ void die_if_kernel(char *str, struct pt_
1113         printk(KERN_EMERG "d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
1114                fp->d4, fp->d5, fp->a0, fp->a1);
1115  
1116 -       printk(KERN_EMERG "Process %s (pid: %d, stackpage=%08lx)\n",
1117 -               current->comm, current->pid, PAGE_SIZE+(unsigned long)current);
1118 +       printk(KERN_EMERG "Process %s (pid: %d[#%u], stackpage=%08lx)\n",
1119 +               current->comm, current->pid, current->xid,
1120 +               PAGE_SIZE+(unsigned long)current);
1121         show_stack(NULL, (unsigned long *)fp);
1122         do_exit(SIGSEGV);
1123  }
1124 diff -NurpP --minimal linux-2.6.16.20/arch/mips/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/mips/Kconfig
1125 --- linux-2.6.16.20/arch/mips/Kconfig   2006-04-09 13:49:43 +0200
1126 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/Kconfig      2006-04-26 19:06:59 +0200
1127 @@ -1814,6 +1814,8 @@ source "arch/mips/oprofile/Kconfig"
1128  
1129  source "arch/mips/Kconfig.debug"
1130  
1131 +source "kernel/vserver/Kconfig"
1132 +
1133  source "security/Kconfig"
1134  
1135  source "crypto/Kconfig"
1136 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/linux32.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/linux32.c
1137 --- linux-2.6.16.20/arch/mips/kernel/linux32.c  2006-04-09 13:49:43 +0200
1138 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/linux32.c     2006-05-29 16:49:44 +0200
1139 @@ -37,6 +37,7 @@
1140  #include <linux/security.h>
1141  #include <linux/compat.h>
1142  #include <linux/vfs.h>
1143 +#include <linux/vs_cvirt.h>
1144  
1145  #include <net/sock.h>
1146  #include <net/scm.h>
1147 @@ -299,7 +300,7 @@ sys32_gettimeofday(struct compat_timeval
1148  {
1149         if (tv) {
1150                 struct timeval ktv;
1151 -               do_gettimeofday(&ktv);
1152 +               vx_gettimeofday(&ktv);
1153                 if (put_tv32(tv, &ktv))
1154                         return -EFAULT;
1155         }
1156 @@ -1099,7 +1100,7 @@ asmlinkage long sys32_newuname(struct ne
1157         int ret = 0;
1158  
1159         down_read(&uts_sem);
1160 -       if (copy_to_user(name,&system_utsname,sizeof *name))
1161 +       if (copy_to_user(name, vx_new_utsname(), sizeof *name))
1162                 ret = -EFAULT;
1163         up_read(&uts_sem);
1164  
1165 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/process.c
1166 --- linux-2.6.16.20/arch/mips/kernel/process.c  2006-02-18 14:39:45 +0100
1167 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/process.c     2006-04-26 19:06:59 +0200
1168 @@ -268,7 +268,8 @@ long kernel_thread(int (*fn)(void *), vo
1169  #endif
1170  
1171         /* Ok, create the new process.. */
1172 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
1173 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
1174 +               0, &regs, 0, NULL, NULL);
1175  }
1176  
1177  static struct mips_frame_info {
1178 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/ptrace.c
1179 --- linux-2.6.16.20/arch/mips/kernel/ptrace.c   2006-04-09 13:49:43 +0200
1180 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/ptrace.c      2006-04-26 19:06:59 +0200
1181 @@ -476,6 +476,8 @@ asmlinkage void do_syscall_trace(struct 
1182                 goto out;
1183         if (!test_thread_flag(TIF_SYSCALL_TRACE))
1184                 goto out;
1185 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
1186 +               goto out_tsk;
1187  
1188         /* The 0x80 provides a way for the tracing parent to distinguish
1189            between a syscall stop and SIGTRAP delivery */
1190 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/scall32-o32.S linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall32-o32.S
1191 --- linux-2.6.16.20/arch/mips/kernel/scall32-o32.S      2006-04-09 13:49:43 +0200
1192 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall32-o32.S 2006-04-26 19:06:59 +0200
1193 @@ -607,7 +607,7 @@ einval:     li      v0, -EINVAL
1194         sys     sys_mq_timedreceive     5
1195         sys     sys_mq_notify           2       /* 4275 */
1196         sys     sys_mq_getsetattr       3
1197 -       sys     sys_ni_syscall          0       /* sys_vserver */
1198 +       sys     sys_vserver             3
1199         sys     sys_waitid              5
1200         sys     sys_ni_syscall          0       /* available, was setaltroot */
1201         sys     sys_add_key             5       /* 4280 */
1202 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/scall64-64.S linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-64.S
1203 --- linux-2.6.16.20/arch/mips/kernel/scall64-64.S       2006-02-18 14:39:45 +0100
1204 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-64.S  2006-04-26 19:06:59 +0200
1205 @@ -433,7 +433,7 @@ sys_call_table:
1206         PTR     sys_mq_timedreceive
1207         PTR     sys_mq_notify
1208         PTR     sys_mq_getsetattr               /* 5235 */
1209 -       PTR     sys_ni_syscall                  /* sys_vserver */
1210 +       PTR     sys_vserver
1211         PTR     sys_waitid
1212         PTR     sys_ni_syscall                  /* available, was setaltroot */
1213         PTR     sys_add_key
1214 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/scall64-n32.S linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-n32.S
1215 --- linux-2.6.16.20/arch/mips/kernel/scall64-n32.S      2006-04-09 13:49:43 +0200
1216 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-n32.S 2006-04-26 19:06:59 +0200
1217 @@ -359,7 +359,7 @@ EXPORT(sysn32_call_table)
1218         PTR     compat_sys_mq_timedreceive
1219         PTR     compat_sys_mq_notify
1220         PTR     compat_sys_mq_getsetattr
1221 -       PTR     sys_ni_syscall                  /* 6240, sys_vserver */
1222 +       PTR     sys32_vserver                   /* 6240 */
1223         PTR     sysn32_waitid
1224         PTR     sys_ni_syscall                  /* available, was setaltroot */
1225         PTR     sys_add_key
1226 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/scall64-o32.S linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-o32.S
1227 --- linux-2.6.16.20/arch/mips/kernel/scall64-o32.S      2006-04-09 13:49:43 +0200
1228 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/scall64-o32.S 2006-04-26 19:06:59 +0200
1229 @@ -481,7 +481,7 @@ sys_call_table:
1230         PTR     compat_sys_mq_timedreceive
1231         PTR     compat_sys_mq_notify            /* 4275 */
1232         PTR     compat_sys_mq_getsetattr
1233 -       PTR     sys_ni_syscall                  /* sys_vserver */
1234 +       PTR     sys32_vserver
1235         PTR     sys32_waitid
1236         PTR     sys_ni_syscall                  /* available, was setaltroot */
1237         PTR     sys_add_key                     /* 4280 */
1238 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/syscall.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/syscall.c
1239 --- linux-2.6.16.20/arch/mips/kernel/syscall.c  2006-02-18 14:39:45 +0100
1240 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/syscall.c     2006-04-26 19:06:59 +0200
1241 @@ -29,6 +29,7 @@
1242  #include <linux/shm.h>
1243  #include <linux/compiler.h>
1244  #include <linux/module.h>
1245 +#include <linux/vs_cvirt.h>
1246  
1247  #include <asm/branch.h>
1248  #include <asm/cachectl.h>
1249 @@ -229,7 +230,7 @@ out:
1250   */
1251  asmlinkage int sys_uname(struct old_utsname __user * name)
1252  {
1253 -       if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
1254 +       if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name)))
1255                 return 0;
1256         return -EFAULT;
1257  }
1258 @@ -240,21 +241,23 @@ asmlinkage int sys_uname(struct old_utsn
1259  asmlinkage int sys_olduname(struct oldold_utsname __user * name)
1260  {
1261         int error;
1262 +       struct new_utsname *ptr;
1263  
1264         if (!name)
1265                 return -EFAULT;
1266         if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
1267                 return -EFAULT;
1268  
1269 -       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
1270 +       ptr = vx_new_utsname();
1271 +       error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
1272         error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
1273 -       error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
1274 +       error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
1275         error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
1276 -       error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
1277 +       error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
1278         error -= __put_user(0,name->release+__OLD_UTS_LEN);
1279 -       error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
1280 +       error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
1281         error -= __put_user(0,name->version+__OLD_UTS_LEN);
1282 -       error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
1283 +       error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
1284         error = __put_user(0,name->machine+__OLD_UTS_LEN);
1285         error = error ? -EFAULT : 0;
1286  
1287 @@ -290,10 +293,10 @@ asmlinkage int _sys_sysmips(int cmd, lon
1288                         return -EFAULT;
1289  
1290                 down_write(&uts_sem);
1291 -               strncpy(system_utsname.nodename, nodename, len);
1292 +               strncpy(vx_new_uts(nodename), nodename, len);
1293                 nodename[__NEW_UTS_LEN] = '\0';
1294 -               strlcpy(system_utsname.nodename, nodename,
1295 -                       sizeof(system_utsname.nodename));
1296 +               strlcpy(vx_new_uts(nodename), nodename,
1297 +                       sizeof(vx_new_uts(nodename)));
1298                 up_write(&uts_sem);
1299                 return 0;
1300         }
1301 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/sysirix.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/sysirix.c
1302 --- linux-2.6.16.20/arch/mips/kernel/sysirix.c  2006-02-18 14:39:45 +0100
1303 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/sysirix.c     2006-05-11 16:06:22 +0200
1304 @@ -31,6 +31,8 @@
1305  #include <linux/socket.h>
1306  #include <linux/security.h>
1307  #include <linux/syscalls.h>
1308 +#include <linux/vs_pid.h>
1309 +#include <linux/vs_cvirt.h>
1310  
1311  #include <asm/ptrace.h>
1312  #include <asm/page.h>
1313 @@ -904,7 +906,7 @@ asmlinkage int irix_getdomainname(char _
1314         down_read(&uts_sem);
1315         if (len > __NEW_UTS_LEN)
1316                 len = __NEW_UTS_LEN;
1317 -       err = copy_to_user(name, system_utsname.domainname, len) ? -EFAULT : 0;
1318 +       err = copy_to_user(name, vx_new_uts(domainname), len) ? -EFAULT : 0;
1319         up_read(&uts_sem);
1320  
1321         return err;
1322 @@ -1147,11 +1149,11 @@ struct iuname {
1323  asmlinkage int irix_uname(struct iuname __user *buf)
1324  {
1325         down_read(&uts_sem);
1326 -       if (copy_from_user(system_utsname.sysname, buf->sysname, 65)
1327 -           || copy_from_user(system_utsname.nodename, buf->nodename, 65)
1328 -           || copy_from_user(system_utsname.release, buf->release, 65)
1329 -           || copy_from_user(system_utsname.version, buf->version, 65)
1330 -           || copy_from_user(system_utsname.machine, buf->machine, 65)) {
1331 +       if (copy_from_user(vx_new_uts(sysname), buf->sysname, 65)
1332 +           || copy_from_user(vx_new_uts(nodename), buf->nodename, 65)
1333 +           || copy_from_user(vx_new_uts(release), buf->release, 65)
1334 +           || copy_from_user(vx_new_uts(version), buf->version, 65)
1335 +           || copy_from_user(vx_new_uts(machine), buf->machine, 65)) {
1336                 return -EFAULT;
1337         }
1338         up_read(&uts_sem);
1339 diff -NurpP --minimal linux-2.6.16.20/arch/mips/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/traps.c
1340 --- linux-2.6.16.20/arch/mips/kernel/traps.c    2006-04-09 13:49:43 +0200
1341 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/mips/kernel/traps.c       2006-04-26 19:06:59 +0200
1342 @@ -266,8 +266,9 @@ void show_registers(struct pt_regs *regs
1343  {
1344         show_regs(regs);
1345         print_modules();
1346 -       printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n",
1347 -               current->comm, current->pid, current_thread_info(), current);
1348 +       printk("Process %s (pid: %d[#%u], threadinfo=%p, task=%p)\n",
1349 +               current->comm, current->pid, current->xid,
1350 +               current_thread_info(), current);
1351         show_stack(current, (long *) regs->regs[29]);
1352         show_trace(current, (long *) regs->regs[29]);
1353         show_code((unsigned int *) regs->cp0_epc);
1354 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/Kconfig
1355 --- linux-2.6.16.20/arch/parisc/Kconfig 2006-02-18 14:39:45 +0100
1356 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/Kconfig    2006-04-26 19:06:59 +0200
1357 @@ -213,6 +213,8 @@ source "arch/parisc/oprofile/Kconfig"
1358  
1359  source "arch/parisc/Kconfig.debug"
1360  
1361 +source "kernel/vserver/Kconfig"
1362 +
1363  source "security/Kconfig"
1364  
1365  source "crypto/Kconfig"
1366 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/hpux/sys_hpux.c linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/hpux/sys_hpux.c
1367 --- linux-2.6.16.20/arch/parisc/hpux/sys_hpux.c 2006-02-15 13:54:11 +0100
1368 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/hpux/sys_hpux.c    2006-05-11 16:06:22 +0200
1369 @@ -33,6 +33,7 @@
1370  #include <linux/utsname.h>
1371  #include <linux/vfs.h>
1372  #include <linux/vmalloc.h>
1373 +#include <linux/vs_cvirt.h>
1374  
1375  #include <asm/errno.h>
1376  #include <asm/pgalloc.h>
1377 @@ -266,15 +267,15 @@ static int hpux_uname(struct hpux_utsnam
1378  
1379         down_read(&uts_sem);
1380  
1381 -       error = __copy_to_user(&name->sysname,&system_utsname.sysname,HPUX_UTSLEN-1);
1382 +       error = __copy_to_user(&name->sysname,vx_new_uts(sysname),HPUX_UTSLEN-1);
1383         error |= __put_user(0,name->sysname+HPUX_UTSLEN-1);
1384 -       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,HPUX_UTSLEN-1);
1385 +       error |= __copy_to_user(&name->nodename,vx_new_uts(nodename),HPUX_UTSLEN-1);
1386         error |= __put_user(0,name->nodename+HPUX_UTSLEN-1);
1387 -       error |= __copy_to_user(&name->release,&system_utsname.release,HPUX_UTSLEN-1);
1388 +       error |= __copy_to_user(&name->release,vx_new_uts(release),HPUX_UTSLEN-1);
1389         error |= __put_user(0,name->release+HPUX_UTSLEN-1);
1390 -       error |= __copy_to_user(&name->version,&system_utsname.version,HPUX_UTSLEN-1);
1391 +       error |= __copy_to_user(&name->version,vx_new_uts(version),HPUX_UTSLEN-1);
1392         error |= __put_user(0,name->version+HPUX_UTSLEN-1);
1393 -       error |= __copy_to_user(&name->machine,&system_utsname.machine,HPUX_UTSLEN-1);
1394 +       error |= __copy_to_user(&name->machine,vx_new_uts(machine),HPUX_UTSLEN-1);
1395         error |= __put_user(0,name->machine+HPUX_UTSLEN-1);
1396  
1397         up_read(&uts_sem);
1398 @@ -373,8 +374,8 @@ int hpux_utssys(char *ubuf, int n, int t
1399                 /*  TODO:  print a warning about using this?  */
1400                 down_write(&uts_sem);
1401                 error = -EFAULT;
1402 -               if (!copy_from_user(system_utsname.sysname, ubuf, len)) {
1403 -                       system_utsname.sysname[len] = 0;
1404 +               if (!copy_from_user(vx_new_uts(sysname), ubuf, len)) {
1405 +                       vx_new_uts(sysname)[len] = 0;
1406                         error = 0;
1407                 }
1408                 up_write(&uts_sem);
1409 @@ -400,8 +401,8 @@ int hpux_utssys(char *ubuf, int n, int t
1410                 /*  TODO:  print a warning about this?  */
1411                 down_write(&uts_sem);
1412                 error = -EFAULT;
1413 -               if (!copy_from_user(system_utsname.release, ubuf, len)) {
1414 -                       system_utsname.release[len] = 0;
1415 +               if (!copy_from_user(vx_new_uts(release), ubuf, len)) {
1416 +                       vx_new_uts(release)[len] = 0;
1417                         error = 0;
1418                 }
1419                 up_write(&uts_sem);
1420 @@ -422,13 +423,13 @@ int hpux_getdomainname(char *name, int l
1421         
1422         down_read(&uts_sem);
1423         
1424 -       nlen = strlen(system_utsname.domainname) + 1;
1425 +       nlen = strlen(vx_new_uts(domainname)) + 1;
1426  
1427         if (nlen < len)
1428                 len = nlen;
1429         if(len > __NEW_UTS_LEN)
1430                 goto done;
1431 -       if(copy_to_user(name, system_utsname.domainname, len))
1432 +       if(copy_to_user(name, vx_new_uts(domainname), len))
1433                 goto done;
1434         err = 0;
1435  done:
1436 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/kernel/entry.S linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/entry.S
1437 --- linux-2.6.16.20/arch/parisc/kernel/entry.S  2006-01-03 17:29:13 +0100
1438 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/entry.S     2006-04-26 19:06:59 +0200
1439 @@ -756,6 +756,7 @@ fault_vector_11:
1440  
1441  #define CLONE_VM 0x100 /* Must agree with <linux/sched.h> */
1442  #define CLONE_UNTRACED 0x00800000
1443 +#define CLONE_KTHREAD 0x10000000
1444  
1445         .export __kernel_thread, code
1446         .import do_fork
1447 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/process.c
1448 --- linux-2.6.16.20/arch/parisc/kernel/process.c        2006-02-15 13:54:11 +0100
1449 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/process.c   2006-04-26 19:06:59 +0200
1450 @@ -178,7 +178,7 @@ pid_t kernel_thread(int (*fn)(void *), v
1451          *        kernel_thread can become a #define.
1452          */
1453  
1454 -       return __kernel_thread(fn, arg, flags);
1455 +       return __kernel_thread(fn, arg, flags | CLONE_KTHREAD);
1456  }
1457  EXPORT_SYMBOL(kernel_thread);
1458  
1459 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/kernel/sys_parisc32.c linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/sys_parisc32.c
1460 --- linux-2.6.16.20/arch/parisc/kernel/sys_parisc32.c   2005-06-22 02:37:56 +0200
1461 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/sys_parisc32.c      2006-05-29 16:50:03 +0200
1462 @@ -203,11 +203,11 @@ static inline long get_ts32(struct times
1463  asmlinkage int
1464  sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
1465  {
1466 -    extern void do_gettimeofday(struct timeval *tv);
1467 +    extern void vx_gettimeofday(struct timeval *tv);
1468  
1469      if (tv) {
1470             struct timeval ktv;
1471 -           do_gettimeofday(&ktv);
1472 +           vx_gettimeofday(&ktv);
1473             if (put_compat_timeval(tv, &ktv))
1474                     return -EFAULT;
1475      }
1476 @@ -657,6 +657,7 @@ asmlinkage int sys32_sysinfo(struct sysi
1477  
1478         do {
1479                 seq = read_seqbegin(&xtime_lock);
1480 +               /* FIXME: requires vx virtualization */
1481                 val.uptime = jiffies / HZ;
1482  
1483                 val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
1484 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/kernel/syscall_table.S linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/syscall_table.S
1485 --- linux-2.6.16.20/arch/parisc/kernel/syscall_table.S  2006-02-18 14:39:46 +0100
1486 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/syscall_table.S     2006-04-26 19:06:59 +0200
1487 @@ -368,7 +368,7 @@
1488         ENTRY_COMP(mbind)               /* 260 */
1489         ENTRY_COMP(get_mempolicy)
1490         ENTRY_COMP(set_mempolicy)
1491 -       ENTRY_SAME(ni_syscall)  /* 263: reserved for vserver */
1492 +       ENTRY_DIFF(vserver)
1493         ENTRY_SAME(add_key)
1494         ENTRY_SAME(request_key)         /* 265 */
1495         ENTRY_SAME(keyctl)
1496 diff -NurpP --minimal linux-2.6.16.20/arch/parisc/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/traps.c
1497 --- linux-2.6.16.20/arch/parisc/kernel/traps.c  2006-02-15 13:54:11 +0100
1498 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/parisc/kernel/traps.c     2006-04-26 19:06:59 +0200
1499 @@ -214,8 +214,9 @@ void die_if_kernel(char *str, struct pt_
1500                 if (err == 0)
1501                         return; /* STFU */
1502  
1503 -               printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
1504 -                       current->comm, current->pid, str, err, regs->iaoq[0]);
1505 +               printk(KERN_CRIT "%s (pid %d[#%u]): %s (code %ld) at " RFMT "\n",
1506 +                       current->comm, current->pid, current->xid,
1507 +                       str, err, regs->iaoq[0]);
1508  #ifdef PRINT_USER_FAULTS
1509                 /* XXX for debugging only */
1510                 show_regs(regs);
1511 @@ -246,8 +247,8 @@ void die_if_kernel(char *str, struct pt_
1512         if (!console_drivers)
1513                 pdc_console_restart();
1514         
1515 -       printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
1516 -               current->comm, current->pid, str, err);
1517 +       printk(KERN_CRIT "%s (pid %d[#%u]): %s (code %ld)\n",
1518 +               current->comm, current->pid, current->xid, str, err);
1519         show_regs(regs);
1520  
1521         /* Wot's wrong wif bein' racy? */
1522 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/Kconfig
1523 --- linux-2.6.16.20/arch/powerpc/Kconfig        2006-04-09 13:49:43 +0200
1524 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/Kconfig   2006-04-26 19:06:59 +0200
1525 @@ -974,6 +974,8 @@ endmenu
1526  
1527  source "arch/powerpc/Kconfig.debug"
1528  
1529 +source "kernel/vserver/Kconfig"
1530 +
1531  source "security/Kconfig"
1532  
1533  config KEYS_COMPAT
1534 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/asm-offsets.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/asm-offsets.c
1535 --- linux-2.6.16.20/arch/powerpc/kernel/asm-offsets.c   2006-04-09 13:49:43 +0200
1536 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/asm-offsets.c      2006-04-26 19:06:59 +0200
1537 @@ -229,6 +229,7 @@ int main(void)
1538  
1539         DEFINE(CLONE_VM, CLONE_VM);
1540         DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
1541 +       DEFINE(CLONE_KTHREAD, CLONE_KTHREAD);
1542  
1543  #ifndef CONFIG_PPC64
1544         DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
1545 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/misc_32.S linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/misc_32.S
1546 --- linux-2.6.16.20/arch/powerpc/kernel/misc_32.S       2006-01-18 06:07:55 +0100
1547 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/misc_32.S  2006-04-26 19:06:59 +0200
1548 @@ -980,7 +980,7 @@ _GLOBAL(kernel_thread)
1549         mr      r30,r3          /* function */
1550         mr      r31,r4          /* argument */
1551         ori     r3,r5,CLONE_VM  /* flags */
1552 -       oris    r3,r3,CLONE_UNTRACED>>16
1553 +       oris    r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16
1554         li      r4,0            /* new sp (unused) */
1555         li      r0,__NR_clone
1556         sc
1557 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/misc_64.S linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/misc_64.S
1558 --- linux-2.6.16.20/arch/powerpc/kernel/misc_64.S       2006-01-18 06:07:55 +0100
1559 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/misc_64.S  2006-04-26 19:06:59 +0200
1560 @@ -684,7 +684,7 @@ _GLOBAL(kernel_thread)
1561         mr      r29,r3
1562         mr      r30,r4
1563         ori     r3,r5,CLONE_VM  /* flags */
1564 -       oris    r3,r3,(CLONE_UNTRACED>>16)
1565 +       oris    r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16
1566         li      r4,0            /* new sp (unused) */
1567         li      r0,__NR_clone
1568         sc
1569 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/process.c
1570 --- linux-2.6.16.20/arch/powerpc/kernel/process.c       2006-04-09 13:49:43 +0200
1571 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/process.c  2006-04-26 19:06:59 +0200
1572 @@ -425,8 +425,9 @@ void show_regs(struct pt_regs * regs)
1573         trap = TRAP(regs);
1574         if (trap == 0x300 || trap == 0x600)
1575                 printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
1576 -       printk("TASK = %p[%d] '%s' THREAD: %p",
1577 -              current, current->pid, current->comm, task_thread_info(current));
1578 +       printk("TASK = %p[%d,#%u] '%s' THREAD: %p",
1579 +              current, current->pid, current->xid,
1580 +              current->comm, task_thread_info(current));
1581  
1582  #ifdef CONFIG_SMP
1583         printk(" CPU: %d", smp_processor_id());
1584 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/sys_ppc32.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/sys_ppc32.c
1585 --- linux-2.6.16.20/arch/powerpc/kernel/sys_ppc32.c     2006-04-09 13:49:43 +0200
1586 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/sys_ppc32.c        2006-05-29 16:50:23 +0200
1587 @@ -340,7 +340,7 @@ asmlinkage long compat_sys_gettimeofday(
1588  {
1589         if (tv) {
1590                 struct timeval ktv;
1591 -               do_gettimeofday(&ktv);
1592 +               vx_gettimeofday(&ktv);
1593                 if (put_tv32(tv, &ktv))
1594                         return -EFAULT;
1595         }
1596 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/syscalls.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/syscalls.c
1597 --- linux-2.6.16.20/arch/powerpc/kernel/syscalls.c      2006-02-18 14:39:46 +0100
1598 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/syscalls.c 2006-04-26 19:06:59 +0200
1599 @@ -36,6 +36,7 @@
1600  #include <linux/file.h>
1601  #include <linux/init.h>
1602  #include <linux/personality.h>
1603 +#include <linux/vs_cvirt.h>
1604  
1605  #include <asm/uaccess.h>
1606  #include <asm/ipc.h>
1607 @@ -259,7 +260,7 @@ long ppc_newuname(struct new_utsname __u
1608         int err = 0;
1609  
1610         down_read(&uts_sem);
1611 -       if (copy_to_user(name, &system_utsname, sizeof(*name)))
1612 +       if (copy_to_user(name, vx_new_utsname(), sizeof(*name)))
1613                 err = -EFAULT;
1614         up_read(&uts_sem);
1615         if (!err)
1616 @@ -272,7 +273,7 @@ int sys_uname(struct old_utsname __user 
1617         int err = 0;
1618         
1619         down_read(&uts_sem);
1620 -       if (copy_to_user(name, &system_utsname, sizeof(*name)))
1621 +       if (copy_to_user(name, vx_new_utsname(), sizeof(*name)))
1622                 err = -EFAULT;
1623         up_read(&uts_sem);
1624         if (!err)
1625 @@ -283,25 +284,22 @@ int sys_uname(struct old_utsname __user 
1626  int sys_olduname(struct oldold_utsname __user *name)
1627  {
1628         int error;
1629 +       struct new_utsname *ptr;
1630  
1631         if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
1632                 return -EFAULT;
1633    
1634         down_read(&uts_sem);
1635 -       error = __copy_to_user(&name->sysname, &system_utsname.sysname,
1636 -                              __OLD_UTS_LEN);
1637 +       ptr = vx_new_utsname();
1638 +       error = __copy_to_user(&name->sysname, ptr->sysname, __OLD_UTS_LEN);
1639         error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
1640 -       error |= __copy_to_user(&name->nodename, &system_utsname.nodename,
1641 -                               __OLD_UTS_LEN);
1642 +       error |= __copy_to_user(&name->nodename, ptr->nodename, __OLD_UTS_LEN);
1643         error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
1644 -       error |= __copy_to_user(&name->release, &system_utsname.release,
1645 -                               __OLD_UTS_LEN);
1646 +       error |= __copy_to_user(&name->release, ptr->release, __OLD_UTS_LEN);
1647         error |= __put_user(0, name->release + __OLD_UTS_LEN);
1648 -       error |= __copy_to_user(&name->version, &system_utsname.version,
1649 -                               __OLD_UTS_LEN);
1650 +       error |= __copy_to_user(&name->version, ptr->version, __OLD_UTS_LEN);
1651         error |= __put_user(0, name->version + __OLD_UTS_LEN);
1652 -       error |= __copy_to_user(&name->machine, &system_utsname.machine,
1653 -                               __OLD_UTS_LEN);
1654 +       error |= __copy_to_user(&name->machine, ptr->machine, __OLD_UTS_LEN);
1655         error |= override_machine(name->machine);
1656         up_read(&uts_sem);
1657  
1658 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/systbl.S linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/systbl.S
1659 --- linux-2.6.16.20/arch/powerpc/kernel/systbl.S        2006-04-09 13:49:43 +0200
1660 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/systbl.S   2006-04-26 19:06:59 +0200
1661 @@ -296,7 +296,7 @@ COMPAT_SYS(fstatfs64)
1662  SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64)
1663  PPC_SYS(rtas)
1664  OLDSYS(debug_setcontext)
1665 -SYSCALL(ni_syscall)
1666 +SYSX(sys_vserver, sys32_vserver, sys_vserver)
1667  SYSCALL(ni_syscall)
1668  COMPAT_SYS(mbind)
1669  COMPAT_SYS(get_mempolicy)
1670 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/traps.c
1671 --- linux-2.6.16.20/arch/powerpc/kernel/traps.c 2006-04-09 13:49:43 +0200
1672 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/traps.c    2006-04-26 19:06:59 +0200
1673 @@ -878,8 +878,9 @@ void nonrecoverable_exception(struct pt_
1674  
1675  void trace_syscall(struct pt_regs *regs)
1676  {
1677 -       printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
1678 -              current, current->pid, regs->nip, regs->link, regs->gpr[0],
1679 +       printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
1680 +              current, current->pid, current->xid,
1681 +              regs->nip, regs->link, regs->gpr[0],
1682                regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
1683  }
1684  
1685 diff -NurpP --minimal linux-2.6.16.20/arch/powerpc/kernel/vdso.c linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/vdso.c
1686 --- linux-2.6.16.20/arch/powerpc/kernel/vdso.c  2006-04-09 13:49:43 +0200
1687 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/powerpc/kernel/vdso.c     2006-04-26 19:06:59 +0200
1688 @@ -25,6 +25,7 @@
1689  #include <linux/elf.h>
1690  #include <linux/security.h>
1691  #include <linux/bootmem.h>
1692 +#include <linux/vs_memory.h>
1693  
1694  #include <asm/pgtable.h>
1695  #include <asm/system.h>
1696 @@ -294,7 +295,7 @@ int arch_setup_additional_pages(struct l
1697                 kmem_cache_free(vm_area_cachep, vma);
1698                 return -ENOMEM;
1699         }
1700 -       mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
1701 +       vx_vmpages_add(mm, (vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
1702         up_write(&mm->mmap_sem);
1703  
1704         return 0;
1705 diff -NurpP --minimal linux-2.6.16.20/arch/ppc/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/Kconfig
1706 --- linux-2.6.16.20/arch/ppc/Kconfig    2006-02-18 14:39:47 +0100
1707 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/Kconfig       2006-04-26 19:06:59 +0200
1708 @@ -1394,6 +1394,8 @@ source "arch/powerpc/oprofile/Kconfig"
1709  
1710  source "arch/ppc/Kconfig.debug"
1711  
1712 +source "kernel/vserver/Kconfig"
1713 +
1714  source "security/Kconfig"
1715  
1716  source "crypto/Kconfig"
1717 diff -NurpP --minimal linux-2.6.16.20/arch/ppc/kernel/asm-offsets.c linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/asm-offsets.c
1718 --- linux-2.6.16.20/arch/ppc/kernel/asm-offsets.c       2006-04-09 13:49:44 +0200
1719 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/asm-offsets.c  2006-04-26 19:06:59 +0200
1720 @@ -122,6 +122,7 @@ main(void)
1721         DEFINE(TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap));
1722         DEFINE(CLONE_VM, CLONE_VM);
1723         DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
1724 +       DEFINE(CLONE_KTHREAD, CLONE_KTHREAD);
1725         DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
1726  
1727         /* About the CPU features table */
1728 diff -NurpP --minimal linux-2.6.16.20/arch/ppc/kernel/misc.S linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/misc.S
1729 --- linux-2.6.16.20/arch/ppc/kernel/misc.S      2006-02-17 22:18:50 +0100
1730 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/misc.S 2006-04-26 19:06:59 +0200
1731 @@ -1011,7 +1011,7 @@ _GLOBAL(kernel_thread)
1732         mr      r30,r3          /* function */
1733         mr      r31,r4          /* argument */
1734         ori     r3,r5,CLONE_VM  /* flags */
1735 -       oris    r3,r3,CLONE_UNTRACED>>16
1736 +       oris    r3,r3,(CLONE_UNTRACED|CLONE_KTHREAD)>>16
1737         li      r4,0            /* new sp (unused) */
1738         li      r0,__NR_clone
1739         sc
1740 diff -NurpP --minimal linux-2.6.16.20/arch/ppc/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/traps.c
1741 --- linux-2.6.16.20/arch/ppc/kernel/traps.c     2006-01-18 06:07:56 +0100
1742 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/ppc/kernel/traps.c        2006-04-26 19:06:59 +0200
1743 @@ -749,8 +749,9 @@ void nonrecoverable_exception(struct pt_
1744  
1745  void trace_syscall(struct pt_regs *regs)
1746  {
1747 -       printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
1748 -              current, current->pid, regs->nip, regs->link, regs->gpr[0],
1749 +       printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
1750 +              current, current->pid, current->xid,
1751 +              regs->nip, regs->link, regs->gpr[0],
1752                regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
1753  }
1754  
1755 diff -NurpP --minimal linux-2.6.16.20/arch/s390/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/s390/Kconfig
1756 --- linux-2.6.16.20/arch/s390/Kconfig   2006-02-18 14:39:47 +0100
1757 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/s390/Kconfig      2006-04-26 19:06:59 +0200
1758 @@ -472,6 +472,8 @@ source "arch/s390/oprofile/Kconfig"
1759  
1760  source "arch/s390/Kconfig.debug"
1761  
1762 +source "kernel/vserver/Kconfig"
1763 +
1764  source "security/Kconfig"
1765  
1766  source "crypto/Kconfig"
1767 diff -NurpP --minimal linux-2.6.16.20/arch/s390/kernel/compat_linux.c linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/compat_linux.c
1768 --- linux-2.6.16.20/arch/s390/kernel/compat_linux.c     2006-02-18 14:39:48 +0100
1769 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/compat_linux.c        2006-05-29 16:53:03 +0200
1770 @@ -595,7 +595,7 @@ asmlinkage long sys32_gettimeofday(struc
1771  {
1772         if (tv) {
1773                 struct timeval ktv;
1774 -               do_gettimeofday(&ktv);
1775 +               vx_gettimeofday(&ktv);
1776                 if (put_tv32(tv, &ktv))
1777                         return -EFAULT;
1778         }
1779 diff -NurpP --minimal linux-2.6.16.20/arch/s390/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/process.c
1780 --- linux-2.6.16.20/arch/s390/kernel/process.c  2006-02-18 14:39:48 +0100
1781 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/process.c     2006-04-26 19:06:59 +0200
1782 @@ -164,9 +164,9 @@ void show_regs(struct pt_regs *regs)
1783         struct task_struct *tsk = current;
1784  
1785          printk("CPU:    %d    %s\n", task_thread_info(tsk)->cpu, print_tainted());
1786 -        printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
1787 -              current->comm, current->pid, (void *) tsk,
1788 -              (void *) tsk->thread.ksp);
1789 +       printk("Process %s (pid: %d[#%u], task: %p, ksp: %p)\n",
1790 +              current->comm, current->pid, current->xid,
1791 +              (void *) tsk, (void *) tsk->thread.ksp);
1792  
1793         show_registers(regs);
1794         /* Show stack backtrace if pt_regs is from kernel mode */
1795 @@ -196,7 +196,7 @@ int kernel_thread(int (*fn)(void *), voi
1796         regs.orig_gpr2 = -1;
1797  
1798         /* Ok, create the new process.. */
1799 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
1800 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
1801                        0, &regs, 0, NULL, NULL);
1802  }
1803  
1804 diff -NurpP --minimal linux-2.6.16.20/arch/s390/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/ptrace.c
1805 --- linux-2.6.16.20/arch/s390/kernel/ptrace.c   2006-04-09 13:49:44 +0200
1806 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/ptrace.c      2006-04-26 19:06:59 +0200
1807 @@ -723,7 +723,13 @@ sys_ptrace(long request, long pid, long 
1808                 goto out;
1809         }
1810  
1811 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
1812 +               ret = -EPERM;
1813 +               goto out_tsk;
1814 +       }
1815 +
1816         ret = do_ptrace(child, request, addr, data);
1817 +out_tsk:
1818         put_task_struct(child);
1819  out:
1820         unlock_kernel();
1821 diff -NurpP --minimal linux-2.6.16.20/arch/s390/kernel/syscalls.S linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/syscalls.S
1822 --- linux-2.6.16.20/arch/s390/kernel/syscalls.S 2006-02-18 14:39:48 +0100
1823 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/s390/kernel/syscalls.S    2006-04-26 19:06:59 +0200
1824 @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_sett
1825  SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper)       /* 260 */
1826  SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
1827  SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
1828 -NI_SYSCALL                                                     /* reserved for vserver */
1829 +SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
1830  SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
1831  SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
1832  SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
1833 diff -NurpP --minimal linux-2.6.16.20/arch/sh/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/sh/Kconfig
1834 --- linux-2.6.16.20/arch/sh/Kconfig     2006-04-09 13:49:44 +0200
1835 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh/Kconfig        2006-04-26 19:06:59 +0200
1836 @@ -633,6 +633,8 @@ source "arch/sh/oprofile/Kconfig"
1837  
1838  source "arch/sh/Kconfig.debug"
1839  
1840 +source "kernel/vserver/Kconfig"
1841 +
1842  source "security/Kconfig"
1843  
1844  source "crypto/Kconfig"
1845 diff -NurpP --minimal linux-2.6.16.20/arch/sh/kernel/kgdb_stub.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/kgdb_stub.c
1846 --- linux-2.6.16.20/arch/sh/kernel/kgdb_stub.c  2004-08-14 12:54:51 +0200
1847 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/kgdb_stub.c     2006-04-26 19:06:59 +0200
1848 @@ -412,7 +412,7 @@ static struct task_struct *get_thread(in
1849         if (pid == PID_MAX) pid = 0;
1850  
1851         /* First check via PID */
1852 -       thread = find_task_by_pid(pid);
1853 +       thread = find_task_by_real_pid(pid);
1854  
1855         if (thread)
1856                 return thread;
1857 diff -NurpP --minimal linux-2.6.16.20/arch/sh/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/process.c
1858 --- linux-2.6.16.20/arch/sh/kernel/process.c    2006-02-15 13:54:12 +0100
1859 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/process.c       2006-04-26 19:06:59 +0200
1860 @@ -113,7 +113,8 @@ void machine_power_off(void)
1861  void show_regs(struct pt_regs * regs)
1862  {
1863         printk("\n");
1864 -       printk("Pid : %d, Comm: %20s\n", current->pid, current->comm);
1865 +       printk("Pid : %d[#%u], Comm: %20s\n",
1866 +               current->pid, current->xid, current->comm);
1867         print_symbol("PC is at %s\n", regs->pc);
1868         printk("PC  : %08lx SP  : %08lx SR  : %08lx ",
1869                regs->pc, regs->regs[15], regs->sr);
1870 @@ -181,7 +182,8 @@ int kernel_thread(int (*fn)(void *), voi
1871         regs.sr = (1 << 30);
1872  
1873         /* Ok, create the new process.. */
1874 -       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
1875 +       return do_fork(flags | CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD,
1876 +               0, &regs, 0, NULL, NULL);
1877  }
1878  
1879  /*
1880 diff -NurpP --minimal linux-2.6.16.20/arch/sh/kernel/setup.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/setup.c
1881 --- linux-2.6.16.20/arch/sh/kernel/setup.c      2006-02-15 13:54:12 +0100
1882 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/setup.c 2006-05-11 16:06:22 +0200
1883 @@ -20,6 +20,7 @@
1884  #include <linux/root_dev.h>
1885  #include <linux/utsname.h>
1886  #include <linux/cpu.h>
1887 +#include <linux/vs_cvirt.h>
1888  #include <asm/uaccess.h>
1889  #include <asm/io.h>
1890  #include <asm/sections.h>
1891 @@ -485,7 +486,7 @@ static int show_cpuinfo(struct seq_file 
1892                 seq_printf(m, "machine\t\t: %s\n", get_system_type());
1893  
1894         seq_printf(m, "processor\t: %d\n", cpu);
1895 -       seq_printf(m, "cpu family\t: %s\n", system_utsname.machine);
1896 +       seq_printf(m, "cpu family\t: %s\n", vx_new_uts(machine));
1897         seq_printf(m, "cpu type\t: %s\n", get_cpu_subtype());
1898  
1899         show_cpuflags(m);
1900 diff -NurpP --minimal linux-2.6.16.20/arch/sh/kernel/sys_sh.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/sys_sh.c
1901 --- linux-2.6.16.20/arch/sh/kernel/sys_sh.c     2005-08-29 22:24:55 +0200
1902 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh/kernel/sys_sh.c        2006-05-11 16:06:22 +0200
1903 @@ -21,6 +21,7 @@
1904  #include <linux/mman.h>
1905  #include <linux/file.h>
1906  #include <linux/utsname.h>
1907 +#include <linux/vs_cvirt.h>
1908  
1909  #include <asm/uaccess.h>
1910  #include <asm/ipc.h>
1911 @@ -267,7 +268,7 @@ asmlinkage int sys_uname(struct old_utsn
1912         if (!name)
1913                 return -EFAULT;
1914         down_read(&uts_sem);
1915 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
1916 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
1917         up_read(&uts_sem);
1918         return err?-EFAULT:0;
1919  }
1920 diff -NurpP --minimal linux-2.6.16.20/arch/sh64/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh64/kernel/process.c
1921 --- linux-2.6.16.20/arch/sh64/kernel/process.c  2006-01-18 06:07:57 +0100
1922 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh64/kernel/process.c     2006-04-26 19:06:59 +0200
1923 @@ -637,7 +637,7 @@ int kernel_thread(int (*fn)(void *), voi
1924  static __inline__ _syscall2(int,clone,unsigned long,flags,unsigned long,newsp)
1925  static __inline__ _syscall1(int,exit,int,ret)
1926  
1927 -       reply = clone(flags | CLONE_VM, 0);
1928 +       reply = clone(flags | CLONE_VM | CLONE_KTHREAD, 0);
1929         if (!reply) {
1930                 /* Child */
1931                 reply = exit(fn(arg));
1932 diff -NurpP --minimal linux-2.6.16.20/arch/sh64/kernel/sys_sh64.c linux-2.6.16.20-vs2.1.1-rc22/arch/sh64/kernel/sys_sh64.c
1933 --- linux-2.6.16.20/arch/sh64/kernel/sys_sh64.c 2005-06-22 02:37:59 +0200
1934 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sh64/kernel/sys_sh64.c    2006-05-11 16:06:22 +0200
1935 @@ -29,6 +29,7 @@
1936  #include <linux/file.h>
1937  #include <linux/utsname.h>
1938  #include <linux/syscalls.h>
1939 +#include <linux/vs_cvirt.h>
1940  #include <asm/uaccess.h>
1941  #include <asm/ipc.h>
1942  #include <asm/ptrace.h>
1943 @@ -279,7 +280,7 @@ asmlinkage int sys_uname(struct old_utsn
1944         if (!name)
1945                 return -EFAULT;
1946         down_read(&uts_sem);
1947 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
1948 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
1949         up_read(&uts_sem);
1950         return err?-EFAULT:0;
1951  }
1952 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/Kconfig
1953 --- linux-2.6.16.20/arch/sparc/Kconfig  2006-02-18 14:39:49 +0100
1954 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/Kconfig     2006-04-26 19:06:59 +0200
1955 @@ -284,6 +284,8 @@ source "fs/Kconfig"
1956  
1957  source "arch/sparc/Kconfig.debug"
1958  
1959 +source "kernel/vserver/Kconfig"
1960 +
1961  source "security/Kconfig"
1962  
1963  source "crypto/Kconfig"
1964 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/process.c
1965 --- linux-2.6.16.20/arch/sparc/kernel/process.c 2006-02-15 13:54:13 +0100
1966 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/process.c    2006-04-26 19:07:00 +0200
1967 @@ -706,7 +706,8 @@ pid_t kernel_thread(int (*fn)(void *), v
1968                              /* Notreached by child. */
1969                              "1: mov %%o0, %0\n\t" :
1970                              "=r" (retval) :
1971 -                            "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED),
1972 +                            "i" (__NR_clone), "r" (flags |
1973 +                                       CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD),
1974                              "i" (__NR_exit),  "r" (fn), "r" (arg) :
1975                              "g1", "g2", "g3", "o0", "o1", "memory", "cc");
1976         return retval;
1977 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/ptrace.c
1978 --- linux-2.6.16.20/arch/sparc/kernel/ptrace.c  2006-04-09 13:49:44 +0200
1979 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/ptrace.c     2006-04-28 05:07:10 +0200
1980 @@ -299,6 +299,10 @@ asmlinkage void do_ptrace(struct pt_regs
1981                 pt_error_return(regs, -ret);
1982                 goto out;
1983         }
1984 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
1985 +               pt_error_return(regs, ESRCH);
1986 +               goto out_tsk;
1987 +       }
1988  
1989         if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH)
1990             || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) {
1991 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/sys_sparc.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/sys_sparc.c
1992 --- linux-2.6.16.20/arch/sparc/kernel/sys_sparc.c       2005-06-22 02:37:59 +0200
1993 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/sys_sparc.c  2006-04-26 19:07:00 +0200
1994 @@ -21,6 +21,7 @@
1995  #include <linux/utsname.h>
1996  #include <linux/smp.h>
1997  #include <linux/smp_lock.h>
1998 +#include <linux/vs_cvirt.h>
1999  
2000  #include <asm/uaccess.h>
2001  #include <asm/ipc.h>
2002 @@ -470,13 +471,13 @@ asmlinkage int sys_getdomainname(char __
2003         
2004         down_read(&uts_sem);
2005         
2006 -       nlen = strlen(system_utsname.domainname) + 1;
2007 +       nlen = strlen(vx_new_uts(domainname)) + 1;
2008  
2009         if (nlen < len)
2010                 len = nlen;
2011         if (len > __NEW_UTS_LEN)
2012                 goto done;
2013 -       if (copy_to_user(name, system_utsname.domainname, len))
2014 +       if (copy_to_user(name, vx_new_uts(domainname), len))
2015                 goto done;
2016         err = 0;
2017  done:
2018 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/sys_sunos.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/sys_sunos.c
2019 --- linux-2.6.16.20/arch/sparc/kernel/sys_sunos.c       2006-02-15 13:54:13 +0100
2020 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/sys_sunos.c  2006-05-11 16:06:22 +0200
2021 @@ -35,6 +35,7 @@
2022  #include <linux/smp.h>
2023  #include <linux/smp_lock.h>
2024  #include <linux/syscalls.h>
2025 +#include <linux/vs_cvirt.h>
2026  
2027  #include <net/sock.h>
2028  
2029 @@ -482,14 +483,16 @@ struct sunos_utsname {
2030  asmlinkage int sunos_uname(struct sunos_utsname __user *name)
2031  {
2032         int ret;
2033 +       struct new_utsname *ptr;
2034         down_read(&uts_sem);
2035 -       ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0], sizeof(name->sname) - 1);
2036 +       ptr = vx_new_utsname();
2037 +       ret = copy_to_user(&name->sname[0], ptr->sysname, sizeof(name->sname) - 1);
2038         if (!ret) {
2039 -               ret |= __copy_to_user(&name->nname[0], &system_utsname.nodename[0], sizeof(name->nname) - 1);
2040 +               ret |= __copy_to_user(&name->nname[0], ptr->nodename, sizeof(name->nname) - 1);
2041                 ret |= __put_user('\0', &name->nname[8]);
2042 -               ret |= __copy_to_user(&name->rel[0], &system_utsname.release[0], sizeof(name->rel) - 1);
2043 -               ret |= __copy_to_user(&name->ver[0], &system_utsname.version[0], sizeof(name->ver) - 1);
2044 -               ret |= __copy_to_user(&name->mach[0], &system_utsname.machine[0], sizeof(name->mach) - 1);
2045 +               ret |= __copy_to_user(&name->rel[0], ptr->release, sizeof(name->rel) - 1);
2046 +               ret |= __copy_to_user(&name->ver[0], ptr->version, sizeof(name->ver) - 1);
2047 +               ret |= __copy_to_user(&name->mach[0], ptr->machine, sizeof(name->mach) - 1);
2048         }
2049         up_read(&uts_sem);
2050         return ret ? -EFAULT : 0;
2051 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/systbls.S linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/systbls.S
2052 --- linux-2.6.16.20/arch/sparc/kernel/systbls.S 2006-02-18 14:39:49 +0100
2053 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/systbls.S    2006-04-26 19:07:00 +0200
2054 @@ -72,7 +72,7 @@ sys_call_table:
2055  /*250*/        .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
2056  /*255*/        .long sys_nis_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
2057  /*260*/        .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
2058 -/*265*/        .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
2059 +/*265*/        .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
2060  /*270*/        .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
2061  /*275*/        .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
2062  /*280*/        .long sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl, sys_openat
2063 diff -NurpP --minimal linux-2.6.16.20/arch/sparc/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/traps.c
2064 --- linux-2.6.16.20/arch/sparc/kernel/traps.c   2006-01-18 06:07:57 +0100
2065 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc/kernel/traps.c      2006-04-26 19:07:00 +0200
2066 @@ -100,7 +100,8 @@ void die_if_kernel(char *str, struct pt_
2067  "              /_| \\__/ |_\\\n"
2068  "                 \\__U_/\n");
2069  
2070 -       printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
2071 +       printk("%s(%d[#%u]): %s [#%d]\n", current->comm,
2072 +               current->pid, current->xid, str, ++die_counter);
2073         show_regs(regs);
2074  
2075         __SAVE; __SAVE; __SAVE; __SAVE;
2076 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/Kconfig
2077 --- linux-2.6.16.20/arch/sparc64/Kconfig        2006-04-09 13:49:44 +0200
2078 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/Kconfig   2006-04-26 19:07:00 +0200
2079 @@ -394,6 +394,8 @@ endmenu
2080  
2081  source "arch/sparc64/Kconfig.debug"
2082  
2083 +source "kernel/vserver/Kconfig"
2084 +
2085  source "security/Kconfig"
2086  
2087  source "crypto/Kconfig"
2088 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/binfmt_aout32.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/binfmt_aout32.c
2089 --- linux-2.6.16.20/arch/sparc64/kernel/binfmt_aout32.c 2006-02-18 14:39:49 +0100
2090 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/binfmt_aout32.c    2006-04-26 19:07:00 +0200
2091 @@ -27,6 +27,7 @@
2092  #include <linux/binfmts.h>
2093  #include <linux/personality.h>
2094  #include <linux/init.h>
2095 +#include <linux/vs_memory.h>
2096  
2097  #include <asm/system.h>
2098  #include <asm/uaccess.h>
2099 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/process.c
2100 --- linux-2.6.16.20/arch/sparc64/kernel/process.c       2006-02-15 13:54:13 +0100
2101 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/process.c  2006-04-26 19:07:00 +0200
2102 @@ -717,7 +717,8 @@ pid_t kernel_thread(int (*fn)(void *), v
2103                              /* Notreached by child. */
2104                              "1:" :
2105                              "=r" (retval) :
2106 -                            "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED),
2107 +                            "i" (__NR_clone), "r" (flags |
2108 +                               CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD),
2109                              "i" (__NR_exit),  "r" (fn), "r" (arg) :
2110                              "g1", "g2", "g3", "o0", "o1", "memory", "cc");
2111         return retval;
2112 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/ptrace.c
2113 --- linux-2.6.16.20/arch/sparc64/kernel/ptrace.c        2006-02-18 14:39:49 +0100
2114 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/ptrace.c   2006-04-28 05:07:10 +0200
2115 @@ -209,6 +209,10 @@ asmlinkage void do_ptrace(struct pt_regs
2116                 pt_error_return(regs, -ret);
2117                 goto out;
2118         }
2119 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
2120 +               pt_error_return(regs, ESRCH);
2121 +               goto out_tsk;
2122 +       }
2123  
2124         if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH)
2125             || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) {
2126 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/sys_sparc.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sparc.c
2127 --- linux-2.6.16.20/arch/sparc64/kernel/sys_sparc.c     2005-08-29 22:24:56 +0200
2128 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sparc.c        2006-04-26 19:07:00 +0200
2129 @@ -25,6 +25,7 @@
2130  #include <linux/syscalls.h>
2131  #include <linux/ipc.h>
2132  #include <linux/personality.h>
2133 +#include <linux/vs_cvirt.h>
2134  
2135  #include <asm/uaccess.h>
2136  #include <asm/ipc.h>
2137 @@ -476,13 +477,13 @@ asmlinkage long sys_getdomainname(char _
2138  
2139         down_read(&uts_sem);
2140         
2141 -       nlen = strlen(system_utsname.domainname) + 1;
2142 +       nlen = strlen(vx_new_uts(domainname)) + 1;
2143  
2144          if (nlen < len)
2145                  len = nlen;
2146         if (len > __NEW_UTS_LEN)
2147                 goto done;
2148 -       if (copy_to_user(name, system_utsname.domainname, len))
2149 +       if (copy_to_user(name, vx_new_uts(domainname), len))
2150                 goto done;
2151         err = 0;
2152  done:
2153 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/sys_sparc32.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sparc32.c
2154 --- linux-2.6.16.20/arch/sparc64/kernel/sys_sparc32.c   2006-02-18 14:39:49 +0100
2155 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sparc32.c      2006-05-29 16:53:09 +0200
2156 @@ -788,7 +788,7 @@ asmlinkage long sys32_gettimeofday(struc
2157  {
2158         if (tv) {
2159                 struct timeval ktv;
2160 -               do_gettimeofday(&ktv);
2161 +               vx_gettimeofday(&ktv);
2162                 if (put_tv32(tv, &ktv))
2163                         return -EFAULT;
2164         }
2165 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/sys_sunos32.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sunos32.c
2166 --- linux-2.6.16.20/arch/sparc64/kernel/sys_sunos32.c   2006-02-15 13:54:13 +0100
2167 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/sys_sunos32.c      2006-05-11 16:06:22 +0200
2168 @@ -35,6 +35,7 @@
2169  #include <linux/smp.h>
2170  #include <linux/smp_lock.h>
2171  #include <linux/syscalls.h>
2172 +#include <linux/vs_cvirt.h>
2173  
2174  #include <asm/uaccess.h>
2175  #include <asm/page.h>
2176 @@ -437,18 +438,20 @@ struct sunos_utsname {
2177  asmlinkage int sunos_uname(struct sunos_utsname __user *name)
2178  {
2179         int ret;
2180 +       struct new_utsname *ptr;
2181  
2182         down_read(&uts_sem);
2183 -       ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0],
2184 +       ptr = vx_new_utsname();
2185 +       ret = copy_to_user(&name->sname[0], ptr->sysname,
2186                            sizeof(name->sname) - 1);
2187 -       ret |= copy_to_user(&name->nname[0], &system_utsname.nodename[0],
2188 +       ret |= copy_to_user(&name->nname[0], ptr->nodename,
2189                             sizeof(name->nname) - 1);
2190         ret |= put_user('\0', &name->nname[8]);
2191 -       ret |= copy_to_user(&name->rel[0], &system_utsname.release[0],
2192 +       ret |= copy_to_user(&name->rel[0], ptr->release,
2193                             sizeof(name->rel) - 1);
2194 -       ret |= copy_to_user(&name->ver[0], &system_utsname.version[0],
2195 +       ret |= copy_to_user(&name->ver[0], ptr->version,
2196                             sizeof(name->ver) - 1);
2197 -       ret |= copy_to_user(&name->mach[0], &system_utsname.machine[0],
2198 +       ret |= copy_to_user(&name->mach[0], ptr->machine,
2199                             sizeof(name->mach) - 1);
2200         up_read(&uts_sem);
2201         return (ret ? -EFAULT : 0);
2202 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/systbls.S linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/systbls.S
2203 --- linux-2.6.16.20/arch/sparc64/kernel/systbls.S       2006-02-18 14:39:49 +0100
2204 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/systbls.S  2006-04-26 19:07:00 +0200
2205 @@ -73,7 +73,7 @@ sys_call_table32:
2206  /*250*/        .word sys32_mremap, sys32_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl
2207         .word sys_ni_syscall, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
2208  /*260*/        .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
2209 -       .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
2210 +       .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
2211  /*270*/        .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
2212         .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
2213  /*280*/        .word sys_ni_syscall, sys_add_key, sys_request_key, sys_keyctl, compat_sys_openat
2214 @@ -142,7 +142,7 @@ sys_call_table:
2215  /*250*/        .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
2216         .word sys_ni_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
2217  /*260*/        .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
2218 -       .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
2219 +       .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
2220  /*270*/        .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
2221         .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
2222  /*280*/        .word sys_nis_syscall, sys_add_key, sys_request_key, sys_keyctl, sys_openat
2223 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/traps.c
2224 --- linux-2.6.16.20/arch/sparc64/kernel/traps.c 2006-01-18 06:07:57 +0100
2225 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/kernel/traps.c    2006-04-26 19:07:00 +0200
2226 @@ -1896,7 +1896,8 @@ void die_if_kernel(char *str, struct pt_
2227  "              /_| \\__/ |_\\\n"
2228  "                 \\__U_/\n");
2229  
2230 -       printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
2231 +       printk("%s(%d[#%u]): %s [#%d]\n", current->comm,
2232 +               current->pid, current->xid, str, ++die_counter);
2233         notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV);
2234         __asm__ __volatile__("flushw");
2235         __show_regs(regs);
2236 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/solaris/fs.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/solaris/fs.c
2237 --- linux-2.6.16.20/arch/sparc64/solaris/fs.c   2006-04-09 13:49:44 +0200
2238 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/solaris/fs.c      2006-04-26 19:07:00 +0200
2239 @@ -363,7 +363,7 @@ static int report_statvfs(struct vfsmoun
2240                 int j = strlen (p);
2241                 
2242                 if (j > 15) j = 15;
2243 -               if (IS_RDONLY(inode)) i = 1;
2244 +               if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1;
2245                 if (mnt->mnt_flags & MNT_NOSUID) i |= 2;
2246                 if (!sysv_valid_dev(inode->i_sb->s_dev))
2247                         return -EOVERFLOW;
2248 @@ -399,7 +399,7 @@ static int report_statvfs64(struct vfsmo
2249                 int j = strlen (p);
2250                 
2251                 if (j > 15) j = 15;
2252 -               if (IS_RDONLY(inode)) i = 1;
2253 +               if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt)) i = 1;
2254                 if (mnt->mnt_flags & MNT_NOSUID) i |= 2;
2255                 if (!sysv_valid_dev(inode->i_sb->s_dev))
2256                         return -EOVERFLOW;
2257 diff -NurpP --minimal linux-2.6.16.20/arch/sparc64/solaris/misc.c linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/solaris/misc.c
2258 --- linux-2.6.16.20/arch/sparc64/solaris/misc.c 2006-01-03 17:29:19 +0100
2259 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/sparc64/solaris/misc.c    2006-05-11 16:06:22 +0200
2260 @@ -17,6 +17,7 @@
2261  #include <linux/timex.h>
2262  #include <linux/major.h>
2263  #include <linux/compat.h>
2264 +#include <linux/vs_cvirt.h>
2265  
2266  #include <asm/uaccess.h>
2267  #include <asm/string.h>
2268 @@ -239,7 +240,7 @@ asmlinkage int solaris_utssys(u32 buf, u
2269                 /* Let's cheat */
2270                 err  = set_utsfield(v->sysname, "SunOS", 1, 0);
2271                 down_read(&uts_sem);
2272 -               err |= set_utsfield(v->nodename, system_utsname.nodename,
2273 +               err |= set_utsfield(v->nodename, vx_new_uts(nodename),
2274                                     1, 1);
2275                 up_read(&uts_sem);
2276                 err |= set_utsfield(v->release, "2.6", 0, 0);
2277 @@ -263,7 +264,7 @@ asmlinkage int solaris_utsname(u32 buf)
2278         /* Why should we not lie a bit? */
2279         down_read(&uts_sem);
2280         err  = set_utsfield(v->sysname, "SunOS", 0, 0);
2281 -       err |= set_utsfield(v->nodename, system_utsname.nodename, 1, 1);
2282 +       err |= set_utsfield(v->nodename, vx_new_uts(nodename), 1, 1);
2283         err |= set_utsfield(v->release, "5.6", 0, 0);
2284         err |= set_utsfield(v->version, "Generic", 0, 0);
2285         err |= set_utsfield(v->machine, machine(), 0, 0);
2286 @@ -295,7 +296,7 @@ asmlinkage int solaris_sysinfo(int cmd, 
2287         case SI_HOSTNAME:
2288                 r = buffer + 256;
2289                 down_read(&uts_sem);
2290 -               for (p = system_utsname.nodename, q = buffer; 
2291 +               for (p = vx_new_uts(nodename), q = buffer;
2292                      q < r && *p && *p != '.'; *q++ = *p++);
2293                 up_read(&uts_sem);
2294                 *q = 0;
2295 diff -NurpP --minimal linux-2.6.16.20/arch/um/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/um/Kconfig
2296 --- linux-2.6.16.20/arch/um/Kconfig     2006-02-18 14:39:49 +0100
2297 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/um/Kconfig        2006-04-26 19:07:00 +0200
2298 @@ -290,6 +290,8 @@ source "drivers/connector/Kconfig"
2299  
2300  source "fs/Kconfig"
2301  
2302 +source "kernel/vserver/Kconfig"
2303 +
2304  source "security/Kconfig"
2305  
2306  source "crypto/Kconfig"
2307 diff -NurpP --minimal linux-2.6.16.20/arch/um/drivers/mconsole_kern.c linux-2.6.16.20-vs2.1.1-rc22/arch/um/drivers/mconsole_kern.c
2308 --- linux-2.6.16.20/arch/um/drivers/mconsole_kern.c     2006-04-09 13:49:44 +0200
2309 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/um/drivers/mconsole_kern.c        2006-04-26 19:07:00 +0200
2310 @@ -21,6 +21,7 @@
2311  #include "linux/proc_fs.h"
2312  #include "linux/syscalls.h"
2313  #include "linux/console.h"
2314 +#include "linux/vs_pid.h"
2315  #include "asm/irq.h"
2316  #include "asm/uaccess.h"
2317  #include "user_util.h"
2318 diff -NurpP --minimal linux-2.6.16.20/arch/um/kernel/process_kern.c linux-2.6.16.20-vs2.1.1-rc22/arch/um/kernel/process_kern.c
2319 --- linux-2.6.16.20/arch/um/kernel/process_kern.c       2006-02-18 14:39:49 +0100
2320 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/um/kernel/process_kern.c  2006-04-28 05:07:10 +0200
2321 @@ -23,6 +23,7 @@
2322  #include "linux/proc_fs.h"
2323  #include "linux/ptrace.h"
2324  #include "linux/random.h"
2325 +
2326  #include "asm/unistd.h"
2327  #include "asm/mman.h"
2328  #include "asm/segment.h"
2329 @@ -95,7 +96,7 @@ int kernel_thread(int (*fn)(void *), voi
2330  
2331         current->thread.request.u.thread.proc = fn;
2332         current->thread.request.u.thread.arg = arg;
2333 -       pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0,
2334 +       pid = do_fork(CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD | flags, 0,
2335                       &current->thread.regs, 0, NULL, NULL);
2336         if(pid < 0)
2337                 panic("do_fork failed in kernel_thread, errno = %d", pid);
2338 diff -NurpP --minimal linux-2.6.16.20/arch/um/kernel/syscall_kern.c linux-2.6.16.20-vs2.1.1-rc22/arch/um/kernel/syscall_kern.c
2339 --- linux-2.6.16.20/arch/um/kernel/syscall_kern.c       2005-08-29 22:24:56 +0200
2340 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/um/kernel/syscall_kern.c  2006-04-26 19:07:00 +0200
2341 @@ -15,6 +15,8 @@
2342  #include "linux/unistd.h"
2343  #include "linux/slab.h"
2344  #include "linux/utime.h"
2345 +#include <linux/vs_cvirt.h>
2346 +
2347  #include "asm/mman.h"
2348  #include "asm/uaccess.h"
2349  #include "kern_util.h"
2350 @@ -110,7 +112,7 @@ long sys_uname(struct old_utsname * name
2351         if (!name)
2352                 return -EFAULT;
2353         down_read(&uts_sem);
2354 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
2355 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
2356         up_read(&uts_sem);
2357         return err?-EFAULT:0;
2358  }
2359 @@ -118,6 +120,7 @@ long sys_uname(struct old_utsname * name
2360  long sys_olduname(struct oldold_utsname * name)
2361  {
2362         long error;
2363 +       struct new_utsname *ptr;
2364  
2365         if (!name)
2366                 return -EFAULT;
2367 @@ -126,19 +129,20 @@ long sys_olduname(struct oldold_utsname 
2368    
2369         down_read(&uts_sem);
2370         
2371 -       error = __copy_to_user(&name->sysname,&system_utsname.sysname,
2372 +       ptr = vx_new_utsname();
2373 +       error = __copy_to_user(&name->sysname,ptr->sysname,
2374                                __OLD_UTS_LEN);
2375         error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
2376 -       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,
2377 +       error |= __copy_to_user(&name->nodename,ptr->nodename,
2378                                 __OLD_UTS_LEN);
2379         error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
2380 -       error |= __copy_to_user(&name->release,&system_utsname.release,
2381 +       error |= __copy_to_user(&name->release,ptr->release,
2382                                 __OLD_UTS_LEN);
2383         error |= __put_user(0,name->release+__OLD_UTS_LEN);
2384 -       error |= __copy_to_user(&name->version,&system_utsname.version,
2385 +       error |= __copy_to_user(&name->version,ptr->version,
2386                                 __OLD_UTS_LEN);
2387         error |= __put_user(0,name->version+__OLD_UTS_LEN);
2388 -       error |= __copy_to_user(&name->machine,&system_utsname.machine,
2389 +       error |= __copy_to_user(&name->machine,ptr->machine,
2390                                 __OLD_UTS_LEN);
2391         error |= __put_user(0,name->machine+__OLD_UTS_LEN);
2392         
2393 diff -NurpP --minimal linux-2.6.16.20/arch/um/sys-x86_64/syscalls.c linux-2.6.16.20-vs2.1.1-rc22/arch/um/sys-x86_64/syscalls.c
2394 --- linux-2.6.16.20/arch/um/sys-x86_64/syscalls.c       2006-01-03 17:29:20 +0100
2395 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/um/sys-x86_64/syscalls.c  2006-05-11 16:06:22 +0200
2396 @@ -9,6 +9,7 @@
2397  #include "linux/shm.h"
2398  #include "linux/utsname.h"
2399  #include "linux/personality.h"
2400 +#include "linux/vs_cvirt.h"
2401  #include "asm/uaccess.h"
2402  #define __FRAME_OFFSETS
2403  #include "asm/ptrace.h"
2404 @@ -21,7 +22,7 @@ asmlinkage long sys_uname64(struct new_u
2405  {
2406         int err;
2407         down_read(&uts_sem);
2408 -       err = copy_to_user(name, &system_utsname, sizeof (*name));
2409 +       err = copy_to_user(name, vx_new_utsname(), sizeof (*name));
2410         up_read(&uts_sem);
2411         if (personality(current->personality) == PER_LINUX32)
2412                 err |= copy_to_user(&name->machine, "i686", 5);
2413 diff -NurpP --minimal linux-2.6.16.20/arch/v850/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/v850/Kconfig
2414 --- linux-2.6.16.20/arch/v850/Kconfig   2006-02-18 14:39:50 +0100
2415 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/v850/Kconfig      2006-04-26 19:07:00 +0200
2416 @@ -320,6 +320,8 @@ source "drivers/usb/Kconfig"
2417  
2418  source "arch/v850/Kconfig.debug"
2419  
2420 +source "kernel/vserver/Kconfig"
2421 +
2422  source "security/Kconfig"
2423  
2424  source "crypto/Kconfig"
2425 diff -NurpP --minimal linux-2.6.16.20/arch/v850/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/v850/kernel/process.c
2426 --- linux-2.6.16.20/arch/v850/kernel/process.c  2006-04-09 13:49:44 +0200
2427 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/v850/kernel/process.c     2006-04-26 19:07:00 +0200
2428 @@ -84,7 +84,7 @@ int kernel_thread (int (*fn)(void *), vo
2429         /* Clone this thread.  Note that we don't pass the clone syscall's
2430            second argument -- it's ignored for calls from kernel mode (the
2431            child's SP is always set to the top of the kernel stack).  */
2432 -       arg0 = flags | CLONE_VM;
2433 +       arg0 = flags | CLONE_VM | CLONE_KTHREAD;
2434         syscall = __NR_clone;
2435         asm volatile ("trap " SYSCALL_SHORT_TRAP
2436                       : "=r" (ret), "=r" (syscall)
2437 diff -NurpP --minimal linux-2.6.16.20/arch/v850/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/arch/v850/kernel/ptrace.c
2438 --- linux-2.6.16.20/arch/v850/kernel/ptrace.c   2006-04-09 13:49:44 +0200
2439 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/v850/kernel/ptrace.c      2006-04-26 19:07:00 +0200
2440 @@ -137,6 +137,8 @@ long arch_ptrace(struct task_struct *chi
2441                         break;
2442                 rval = -EIO;
2443                 goto out;
2444 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
2445 +               goto out_tsk;
2446  
2447         /* Read/write the word at location ADDR in the registers.  */
2448         case PTRACE_PEEKUSR:
2449 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/Kconfig linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/Kconfig
2450 --- linux-2.6.16.20/arch/x86_64/Kconfig 2006-04-09 13:49:44 +0200
2451 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/Kconfig    2006-04-26 19:07:00 +0200
2452 @@ -588,6 +588,8 @@ endmenu
2453  
2454  source "arch/x86_64/Kconfig.debug"
2455  
2456 +source "kernel/vserver/Kconfig"
2457 +
2458  source "security/Kconfig"
2459  
2460  source "crypto/Kconfig"
2461 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/ia32/ia32_aout.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32_aout.c
2462 --- linux-2.6.16.20/arch/x86_64/ia32/ia32_aout.c        2006-01-03 17:29:20 +0100
2463 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32_aout.c   2006-04-26 19:07:00 +0200
2464 @@ -25,6 +25,7 @@
2465  #include <linux/binfmts.h>
2466  #include <linux/personality.h>
2467  #include <linux/init.h>
2468 +#include <linux/vs_memory.h>
2469  
2470  #include <asm/system.h>
2471  #include <asm/uaccess.h>
2472 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/ia32/ia32_binfmt.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32_binfmt.c
2473 --- linux-2.6.16.20/arch/x86_64/ia32/ia32_binfmt.c      2006-02-18 14:39:50 +0100
2474 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32_binfmt.c 2006-04-26 19:07:00 +0200
2475 @@ -371,7 +371,8 @@ int ia32_setup_arg_pages(struct linux_bi
2476                         kmem_cache_free(vm_area_cachep, mpnt);
2477                         return ret;
2478                 }
2479 -               mm->stack_vm = mm->total_vm = vma_pages(mpnt);
2480 +               vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt));
2481 +               mm->stack_vm = mm->total_vm;
2482         } 
2483  
2484         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
2485 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/ia32/ia32entry.S linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32entry.S
2486 --- linux-2.6.16.20/arch/x86_64/ia32/ia32entry.S        2006-02-18 14:39:50 +0100
2487 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/ia32entry.S   2006-04-26 19:07:00 +0200
2488 @@ -650,7 +650,7 @@ ia32_sys_call_table:
2489         .quad sys_tgkill                /* 270 */
2490         .quad compat_sys_utimes
2491         .quad sys32_fadvise64_64
2492 -       .quad quiet_ni_syscall  /* sys_vserver */
2493 +       .quad sys32_vserver
2494         .quad sys_mbind
2495         .quad compat_sys_get_mempolicy  /* 275 */
2496         .quad sys_set_mempolicy
2497 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/ia32/sys_ia32.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/sys_ia32.c
2498 --- linux-2.6.16.20/arch/x86_64/ia32/sys_ia32.c 2006-02-18 14:39:50 +0100
2499 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/sys_ia32.c    2006-05-29 16:53:21 +0200
2500 @@ -62,6 +62,7 @@
2501  #include <linux/highuid.h>
2502  #include <linux/vmalloc.h>
2503  #include <linux/fsnotify.h>
2504 +#include <linux/vs_cvirt.h>
2505  #include <asm/mman.h>
2506  #include <asm/types.h>
2507  #include <asm/uaccess.h>
2508 @@ -460,7 +461,7 @@ sys32_gettimeofday(struct compat_timeval
2509  {
2510         if (tv) {
2511                 struct timeval ktv;
2512 -               do_gettimeofday(&ktv);
2513 +               vx_gettimeofday(&ktv);
2514                 if (put_tv32(tv, &ktv))
2515                         return -EFAULT;
2516         }
2517 @@ -882,6 +883,7 @@ asmlinkage long sys32_mmap2(unsigned lon
2518  asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
2519  {
2520         int error;
2521 +       struct new_utsname *ptr;
2522  
2523         if (!name)
2524                 return -EFAULT;
2525 @@ -890,13 +892,14 @@ asmlinkage long sys32_olduname(struct ol
2526    
2527         down_read(&uts_sem);
2528         
2529 -       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
2530 +       ptr = vx_new_utsname();
2531 +       error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
2532          __put_user(0,name->sysname+__OLD_UTS_LEN);
2533 -        __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
2534 +        __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
2535          __put_user(0,name->nodename+__OLD_UTS_LEN);
2536 -        __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
2537 +        __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
2538          __put_user(0,name->release+__OLD_UTS_LEN);
2539 -        __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
2540 +        __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
2541          __put_user(0,name->version+__OLD_UTS_LEN);
2542          { 
2543                  char *arch = "x86_64";
2544 @@ -919,7 +922,7 @@ long sys32_uname(struct old_utsname __us
2545         if (!name)
2546                 return -EFAULT;
2547         down_read(&uts_sem);
2548 -       err=copy_to_user(name, &system_utsname, sizeof (*name));
2549 +       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
2550         up_read(&uts_sem);
2551         if (personality(current->personality) == PER_LINUX32) 
2552                 err |= copy_to_user(&name->machine, "i686", 5);
2553 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/ia32/syscall32.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/syscall32.c
2554 --- linux-2.6.16.20/arch/x86_64/ia32/syscall32.c        2005-10-28 20:49:18 +0200
2555 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/ia32/syscall32.c   2006-04-26 19:07:00 +0200
2556 @@ -10,6 +10,7 @@
2557  #include <linux/init.h>
2558  #include <linux/stringify.h>
2559  #include <linux/security.h>
2560 +#include <linux/vs_memory.h>
2561  #include <asm/proto.h>
2562  #include <asm/tlbflush.h>
2563  #include <asm/ia32_unistd.h>
2564 @@ -70,7 +71,7 @@ int syscall32_setup_pages(struct linux_b
2565                 kmem_cache_free(vm_area_cachep, vma);
2566                 return ret;
2567         }
2568 -       mm->total_vm += npages;
2569 +       vx_vmpages_add(mm, npages);
2570         up_write(&mm->mmap_sem);
2571         return 0;
2572  }
2573 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/process.c
2574 --- linux-2.6.16.20/arch/x86_64/kernel/process.c        2006-05-11 21:25:35 +0200
2575 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/process.c   2006-04-26 19:07:00 +0200
2576 @@ -55,7 +55,8 @@
2577  
2578  asmlinkage extern void ret_from_fork(void);
2579  
2580 -unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
2581 +unsigned long kernel_thread_flags =
2582 +       CLONE_VM | CLONE_UNTRACED | CLONE_KTHREAD;
2583  
2584  unsigned long boot_option_idle_override = 0;
2585  EXPORT_SYMBOL(boot_option_idle_override);
2586 @@ -297,8 +298,8 @@ void __show_regs(struct pt_regs * regs)
2587  
2588         printk("\n");
2589         print_modules();
2590 -       printk("Pid: %d, comm: %.20s %s %s %.*s\n",
2591 -               current->pid, current->comm, print_tainted(),
2592 +       printk("Pid: %d[#%u], comm: %.20s %s %s %.*s\n",
2593 +               current->pid, current->xid, current->comm, print_tainted(),
2594                 system_utsname.release,
2595                 (int)strcspn(system_utsname.version, " "),
2596                 system_utsname.version);
2597 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/kernel/sys_x86_64.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/sys_x86_64.c
2598 --- linux-2.6.16.20/arch/x86_64/kernel/sys_x86_64.c     2006-01-03 17:29:20 +0100
2599 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/sys_x86_64.c        2006-04-26 19:07:00 +0200
2600 @@ -16,6 +16,7 @@
2601  #include <linux/file.h>
2602  #include <linux/utsname.h>
2603  #include <linux/personality.h>
2604 +#include <linux/vs_cvirt.h>
2605  
2606  #include <asm/uaccess.h>
2607  #include <asm/ia32.h>
2608 @@ -148,7 +149,7 @@ asmlinkage long sys_uname(struct new_uts
2609  {
2610         int err;
2611         down_read(&uts_sem);
2612 -       err = copy_to_user(name, &system_utsname, sizeof (*name));
2613 +       err = copy_to_user(name, vx_new_utsname(), sizeof (*name));
2614         up_read(&uts_sem);
2615         if (personality(current->personality) == PER_LINUX32) 
2616                 err |= copy_to_user(&name->machine, "i686", 5);                 
2617 diff -NurpP --minimal linux-2.6.16.20/arch/x86_64/kernel/traps.c linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/traps.c
2618 --- linux-2.6.16.20/arch/x86_64/kernel/traps.c  2006-06-06 15:37:20 +0200
2619 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/x86_64/kernel/traps.c     2006-06-06 15:31:32 +0200
2620 @@ -322,8 +322,9 @@ void show_registers(struct pt_regs *regs
2621  
2622         printk("CPU %d ", cpu);
2623         __show_regs(regs);
2624 -       printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
2625 -               cur->comm, cur->pid, task_thread_info(cur), cur);
2626 +       printk("Process %s (pid: %d[#%u], threadinfo %p, task %p)\n",
2627 +               cur->comm, cur->pid, cur->xid,
2628 +               task_thread_info(cur), cur);
2629  
2630         /*
2631          * When in-kernel, we also print out the stack and code at the
2632 diff -NurpP --minimal linux-2.6.16.20/arch/xtensa/kernel/process.c linux-2.6.16.20-vs2.1.1-rc22/arch/xtensa/kernel/process.c
2633 --- linux-2.6.16.20/arch/xtensa/kernel/process.c        2006-04-09 13:49:44 +0200
2634 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/xtensa/kernel/process.c   2006-04-26 19:07:00 +0200
2635 @@ -207,7 +207,7 @@ int kernel_thread(int (*fn)(void *), voi
2636                  :"=r" (retval)
2637                  :"i" (__NR_clone), "i" (__NR_exit),
2638                  "r" (arg), "r" (fn),
2639 -                "r" (flags | CLONE_VM)
2640 +                "r" (flags | CLONE_VM | CLONE_KTHREAD)
2641                  : "a2", "a3", "a4", "a5", "a6" );
2642         return retval;
2643  }
2644 diff -NurpP --minimal linux-2.6.16.20/arch/xtensa/kernel/syscalls.c linux-2.6.16.20-vs2.1.1-rc22/arch/xtensa/kernel/syscalls.c
2645 --- linux-2.6.16.20/arch/xtensa/kernel/syscalls.c       2005-08-29 22:24:57 +0200
2646 +++ linux-2.6.16.20-vs2.1.1-rc22/arch/xtensa/kernel/syscalls.c  2006-05-11 16:06:22 +0200
2647 @@ -35,6 +35,7 @@
2648  #include <linux/msg.h>
2649  #include <linux/shm.h>
2650  #include <linux/errno.h>
2651 +#include <linux/vs_cvirt.h>
2652  #include <asm/ptrace.h>
2653  #include <asm/signal.h>
2654  #include <asm/uaccess.h>
2655 @@ -129,7 +130,7 @@ out:
2656  
2657  int sys_uname(struct old_utsname * name)
2658  {
2659 -       if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
2660 +       if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name)))
2661                 return 0;
2662         return -EFAULT;
2663  }
2664 diff -NurpP --minimal linux-2.6.16.20/block/cfq-iosched.c linux-2.6.16.20-vs2.1.1-rc22/block/cfq-iosched.c
2665 --- linux-2.6.16.20/block/cfq-iosched.c 2006-03-20 17:32:30 +0100
2666 +++ linux-2.6.16.20-vs2.1.1-rc22/block/cfq-iosched.c    2006-05-02 03:32:45 +0200
2667 @@ -1756,6 +1756,8 @@ static void cfq_prio_boost(struct cfq_qu
2668  
2669  static inline pid_t cfq_queue_pid(struct task_struct *task, int rw)
2670  {
2671 +       if (task->xid)
2672 +               return task->xid + (1 << 16);
2673         if (rw == READ || process_sync(task))
2674                 return task->pid;
2675  
2676 diff -NurpP --minimal linux-2.6.16.20/drivers/block/Kconfig linux-2.6.16.20-vs2.1.1-rc22/drivers/block/Kconfig
2677 --- linux-2.6.16.20/drivers/block/Kconfig       2006-02-18 14:39:52 +0100
2678 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/block/Kconfig  2006-04-26 19:07:00 +0200
2679 @@ -315,6 +315,13 @@ config BLK_DEV_CRYPTOLOOP
2680           instead, which can be configured to be on-disk compatible with the
2681           cryptoloop device.
2682  
2683 +config BLK_DEV_VROOT
2684 +       tristate "Virtual Root device support"
2685 +       depends on QUOTACTL
2686 +       ---help---
2687 +         Saying Y here will allow you to use quota/fs ioctls on a shared
2688 +         partition within a virtual server without compromising security.
2689 +
2690  config BLK_DEV_NBD
2691         tristate "Network block device support"
2692         depends on NET
2693 diff -NurpP --minimal linux-2.6.16.20/drivers/block/Makefile linux-2.6.16.20-vs2.1.1-rc22/drivers/block/Makefile
2694 --- linux-2.6.16.20/drivers/block/Makefile      2006-01-03 17:29:21 +0100
2695 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/block/Makefile 2006-04-26 19:07:00 +0200
2696 @@ -30,4 +30,5 @@ obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryp
2697  obj-$(CONFIG_VIODASD)          += viodasd.o
2698  obj-$(CONFIG_BLK_DEV_SX8)      += sx8.o
2699  obj-$(CONFIG_BLK_DEV_UB)       += ub.o
2700 +obj-$(CONFIG_BLK_DEV_VROOT)    += vroot.o
2701  
2702 diff -NurpP --minimal linux-2.6.16.20/drivers/block/loop.c linux-2.6.16.20-vs2.1.1-rc22/drivers/block/loop.c
2703 --- linux-2.6.16.20/drivers/block/loop.c        2006-01-18 06:07:59 +0100
2704 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/block/loop.c   2006-04-26 19:07:00 +0200
2705 @@ -74,6 +74,7 @@
2706  #include <linux/completion.h>
2707  #include <linux/highmem.h>
2708  #include <linux/gfp.h>
2709 +#include <linux/vs_context.h>
2710  
2711  #include <asm/uaccess.h>
2712  
2713 @@ -743,10 +744,12 @@ static int loop_set_fd(struct loop_devic
2714         struct file     *file, *f;
2715         struct inode    *inode;
2716         struct address_space *mapping;
2717 +       struct vx_info_save vxis;
2718         unsigned lo_blocksize;
2719         int             lo_flags = 0;
2720         int             error;
2721         loff_t          size;
2722 +       pid_t           pid;
2723  
2724         /* This is safe, since we have a reference from open(). */
2725         __module_get(THIS_MODULE);
2726 @@ -839,10 +842,16 @@ static int loop_set_fd(struct loop_devic
2727  
2728         set_blocksize(bdev, lo_blocksize);
2729  
2730 -       kernel_thread(loop_thread, lo, CLONE_KERNEL);
2731 +       __enter_vx_admin(&vxis);
2732 +       pid = kernel_thread(loop_thread, lo, CLONE_KERNEL);
2733 +       __leave_vx_admin(&vxis);
2734 +       if (pid < 0)
2735 +               goto out_err;
2736         wait_for_completion(&lo->lo_done);
2737         return 0;
2738  
2739 + out_err:
2740 +       error = (int)pid;
2741   out_putf:
2742         fput(file);
2743   out:
2744 diff -NurpP --minimal linux-2.6.16.20/drivers/block/vroot.c linux-2.6.16.20-vs2.1.1-rc22/drivers/block/vroot.c
2745 --- linux-2.6.16.20/drivers/block/vroot.c       1970-01-01 01:00:00 +0100
2746 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/block/vroot.c  2006-04-26 19:07:00 +0200
2747 @@ -0,0 +1,288 @@
2748 +/*
2749 + *  linux/drivers/block/vroot.c
2750 + *
2751 + *  written by Herbert Pötzl, 9/11/2002
2752 + *  ported to 2.6.10 by Herbert Pötzl, 30/12/2004
2753 + *
2754 + *  based on the loop.c code by Theodore Ts'o.
2755 + *
2756 + * Copyright (C) 2002-2005 by Herbert Pötzl.
2757 + * Redistribution of this file is permitted under the
2758 + * GNU General Public License.
2759 + *
2760 + */
2761 +
2762 +#include <linux/module.h>
2763 +#include <linux/moduleparam.h>
2764 +#include <linux/file.h>
2765 +#include <linux/major.h>
2766 +#include <linux/blkdev.h>
2767 +#include <linux/devfs_fs_kernel.h>
2768 +
2769 +#include <linux/vroot.h>
2770 +#include <linux/vserver/debug.h>
2771 +
2772 +
2773 +static int max_vroot = 8;
2774 +
2775 +static struct vroot_device *vroot_dev;
2776 +static struct gendisk **disks;
2777 +
2778 +
2779 +static int vroot_set_dev(
2780 +       struct vroot_device *vr,
2781 +       struct file *vr_file,
2782 +       struct block_device *bdev,
2783 +       unsigned int arg)
2784 +{
2785 +       struct block_device *real_bdev;
2786 +       struct file *file;
2787 +       struct inode *inode;
2788 +       int error;
2789 +
2790 +       error = -EBUSY;
2791 +       if (vr->vr_state != Vr_unbound)
2792 +               goto out;
2793 +
2794 +       error = -EBADF;
2795 +       file = fget(arg);
2796 +       if (!file)
2797 +               goto out;
2798 +
2799 +       error = -EINVAL;
2800 +       inode = file->f_dentry->d_inode;
2801 +
2802 +
2803 +       if (S_ISBLK(inode->i_mode)) {
2804 +               real_bdev = inode->i_bdev;
2805 +               vr->vr_device = real_bdev;
2806 +               __iget(real_bdev->bd_inode);
2807 +       } else
2808 +               goto out_fput;
2809 +
2810 +       vxdprintk(VXD_CBIT(misc, 0),
2811 +               "vroot[%d]_set_dev: dev=" VXF_DEV,
2812 +               vr->vr_number, VXD_DEV(real_bdev));
2813 +
2814 +       vr->vr_state = Vr_bound;
2815 +       error = 0;
2816 +
2817 + out_fput:
2818 +       fput(file);
2819 + out:
2820 +       return error;
2821 +}
2822 +
2823 +static int vroot_clr_dev(
2824 +       struct vroot_device *vr,
2825 +       struct file *vr_file,
2826 +       struct block_device *bdev)
2827 +{
2828 +       struct block_device *real_bdev;
2829 +
2830 +       if (vr->vr_state != Vr_bound)
2831 +               return -ENXIO;
2832 +       if (vr->vr_refcnt > 1)  /* we needed one fd for the ioctl */
2833 +               return -EBUSY;
2834 +
2835 +       real_bdev = vr->vr_device;
2836 +
2837 +       vxdprintk(VXD_CBIT(misc, 0),
2838 +               "vroot[%d]_clr_dev: dev=" VXF_DEV,
2839 +               vr->vr_number, VXD_DEV(real_bdev));
2840 +
2841 +       bdput(real_bdev);
2842 +       vr->vr_state = Vr_unbound;
2843 +       vr->vr_device = NULL;
2844 +       return 0;
2845 +}
2846 +
2847 +
2848 +static int vr_ioctl(struct inode * inode, struct file * file,
2849 +       unsigned int cmd, unsigned long arg)
2850 +{
2851 +       struct vroot_device *vr = inode->i_bdev->bd_disk->private_data;
2852 +       int err;
2853 +
2854 +       down(&vr->vr_ctl_mutex);
2855 +       switch (cmd) {
2856 +       case VROOT_SET_DEV:
2857 +               err = vroot_set_dev(vr, file, inode->i_bdev, arg);
2858 +               break;
2859 +       case VROOT_CLR_DEV:
2860 +               err = vroot_clr_dev(vr, file, inode->i_bdev);
2861 +               break;
2862 +       default:
2863 +               err = -EINVAL;
2864 +               break;
2865 +       }
2866 +       up(&vr->vr_ctl_mutex);
2867 +       return err;
2868 +}
2869 +
2870 +static int vr_open(struct inode *inode, struct file *file)
2871 +{
2872 +       struct vroot_device *vr = inode->i_bdev->bd_disk->private_data;
2873 +
2874 +       down(&vr->vr_ctl_mutex);
2875 +       vr->vr_refcnt++;
2876 +       up(&vr->vr_ctl_mutex);
2877 +       return 0;
2878 +}
2879 +
2880 +static int vr_release(struct inode *inode, struct file *file)
2881 +{
2882 +       struct vroot_device *vr = inode->i_bdev->bd_disk->private_data;
2883 +
2884 +       down(&vr->vr_ctl_mutex);
2885 +       --vr->vr_refcnt;
2886 +       up(&vr->vr_ctl_mutex);
2887 +       return 0;
2888 +}
2889 +
2890 +static struct block_device_operations vr_fops = {
2891 +       .owner =        THIS_MODULE,
2892 +       .open =         vr_open,
2893 +       .release =      vr_release,
2894 +       .ioctl =        vr_ioctl,
2895 +};
2896 +
2897 +struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
2898 +{
2899 +       struct inode *inode = bdev->bd_inode;
2900 +       struct vroot_device *vr;
2901 +       struct block_device *real_bdev;
2902 +       int minor = iminor(inode);
2903 +
2904 +       vr = &vroot_dev[minor];
2905 +       real_bdev = vr->vr_device;
2906 +
2907 +       vxdprintk(VXD_CBIT(misc, 0),
2908 +               "vroot[%d]_get_real_bdev: dev=" VXF_DEV,
2909 +               vr->vr_number, VXD_DEV(real_bdev));
2910 +
2911 +       if (vr->vr_state != Vr_bound)
2912 +               return ERR_PTR(-ENXIO);
2913 +
2914 +       __iget(real_bdev->bd_inode);
2915 +       return real_bdev;
2916 +}
2917 +
2918 +/*
2919 + * And now the modules code and kernel interface.
2920 + */
2921 +
2922 +module_param(max_vroot, int, 0);
2923 +
2924 +MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
2925 +MODULE_LICENSE("GPL");
2926 +MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
2927 +
2928 +MODULE_AUTHOR ("Herbert Pötzl");
2929 +MODULE_DESCRIPTION ("Virtual Root Device Mapper");
2930 +
2931 +
2932 +int __init vroot_init(void)
2933 +{
2934 +       int err, i;
2935 +
2936 +       if (max_vroot < 1 || max_vroot > 256) {
2937 +               max_vroot = MAX_VROOT_DEFAULT;
2938 +               printk(KERN_WARNING "vroot: invalid max_vroot "
2939 +                       "(must be between 1 and 256), "
2940 +                       "using default (%d)\n", max_vroot);
2941 +       }
2942 +
2943 +       if (register_blkdev(VROOT_MAJOR, "vroot"))
2944 +               return -EIO;
2945 +
2946 +       err = -ENOMEM;
2947 +       vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
2948 +       if (!vroot_dev)
2949 +               goto out_mem1;
2950 +       memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
2951 +
2952 +       disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
2953 +       if (!disks)
2954 +               goto out_mem2;
2955 +
2956 +       for (i = 0; i < max_vroot; i++) {
2957 +               disks[i] = alloc_disk(1);
2958 +               if (!disks[i])
2959 +                       goto out_mem3;
2960 +       }
2961 +
2962 +       devfs_mk_dir("vroot");
2963 +
2964 +       for (i = 0; i < max_vroot; i++) {
2965 +               struct vroot_device *vr = &vroot_dev[i];
2966 +               struct gendisk *disk = disks[i];
2967 +
2968 +               memset(vr, 0, sizeof(*vr));
2969 +               init_MUTEX(&vr->vr_ctl_mutex);
2970 +               vr->vr_number = i;
2971 +               disk->major = VROOT_MAJOR;
2972 +               disk->first_minor = i;
2973 +               disk->fops = &vr_fops;
2974 +               sprintf(disk->disk_name, "vroot%d", i);
2975 +               sprintf(disk->devfs_name, "vroot/%d", i);
2976 +               disk->private_data = vr;
2977 +       }
2978 +
2979 +       err = register_vroot_grb(&__vroot_get_real_bdev);
2980 +       if (err)
2981 +               goto out_reg;
2982 +
2983 +       for (i = 0; i < max_vroot; i++)
2984 +               add_disk(disks[i]);
2985 +       printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
2986 +       return 0;
2987 +
2988 +out_reg:
2989 +       devfs_remove("vroot");
2990 +out_mem3:
2991 +       while (i--)
2992 +               put_disk(disks[i]);
2993 +       kfree(disks);
2994 +out_mem2:
2995 +       kfree(vroot_dev);
2996 +out_mem1:
2997 +       unregister_blkdev(VROOT_MAJOR, "vroot");
2998 +       printk(KERN_ERR "vroot: ran out of memory\n");
2999 +       return err;
3000 +}
3001 +
3002 +void vroot_exit(void)
3003 +{
3004 +       int i;
3005 +
3006 +       if (unregister_vroot_grb(&__vroot_get_real_bdev))
3007 +               printk(KERN_WARNING "vroot: cannot unregister grb\n");
3008 +
3009 +       for (i = 0; i < max_vroot; i++) {
3010 +               del_gendisk(disks[i]);
3011 +               put_disk(disks[i]);
3012 +       }
3013 +       devfs_remove("vroot");
3014 +       if (unregister_blkdev(VROOT_MAJOR, "vroot"))
3015 +               printk(KERN_WARNING "vroot: cannot unregister blkdev\n");
3016 +
3017 +       kfree(disks);
3018 +       kfree(vroot_dev);
3019 +}
3020 +
3021 +module_init(vroot_init);
3022 +module_exit(vroot_exit);
3023 +
3024 +#ifndef MODULE
3025 +
3026 +static int __init max_vroot_setup(char *str)
3027 +{
3028 +       max_vroot = simple_strtol(str, NULL, 0);
3029 +       return 1;
3030 +}
3031 +
3032 +__setup("max_vroot=", max_vroot_setup);
3033 +
3034 +#endif
3035 +
3036 diff -NurpP --minimal linux-2.6.16.20/drivers/char/random.c linux-2.6.16.20-vs2.1.1-rc22/drivers/char/random.c
3037 --- linux-2.6.16.20/drivers/char/random.c       2006-04-09 13:49:45 +0200
3038 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/char/random.c  2006-04-26 19:07:00 +0200
3039 @@ -1174,7 +1174,7 @@ static char sysctl_bootid[16];
3040  static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
3041                         void __user *buffer, size_t *lenp, loff_t *ppos)
3042  {
3043 -       ctl_table fake_table;
3044 +       ctl_table fake_table = {0};
3045         unsigned char buf[64], tmp_uuid[16], *uuid;
3046  
3047         uuid = table->data;
3048 diff -NurpP --minimal linux-2.6.16.20/drivers/char/sysrq.c linux-2.6.16.20-vs2.1.1-rc22/drivers/char/sysrq.c
3049 --- linux-2.6.16.20/drivers/char/sysrq.c        2006-04-09 13:49:45 +0200
3050 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/char/sysrq.c   2006-04-26 19:07:00 +0200
3051 @@ -36,6 +36,7 @@
3052  #include <linux/vt_kern.h>
3053  #include <linux/workqueue.h>
3054  #include <linux/kexec.h>
3055 +#include <linux/vserver/debug.h>
3056  
3057  #include <asm/ptrace.h>
3058  
3059 @@ -286,6 +287,21 @@ static struct sysrq_key_op sysrq_unrt_op
3060         .enable_mask    = SYSRQ_ENABLE_RTNICE,
3061  };
3062  
3063 +
3064 +#ifdef CONFIG_VSERVER_DEBUG
3065 +static void sysrq_handle_vxinfo(int key, struct pt_regs *pt_regs,
3066 +                                struct tty_struct *tty)
3067 +{
3068 +       dump_vx_info_inactive((key == 'x')?0:1);
3069 +}
3070 +static struct sysrq_key_op sysrq_showvxinfo_op = {
3071 +       .handler        = sysrq_handle_vxinfo,
3072 +       .help_msg       = "conteXt",
3073 +       .action_msg     = "Show Context Info",
3074 +       .enable_mask    = SYSRQ_ENABLE_DUMP,
3075 +};
3076 +#endif
3077 +
3078  /* Key Operations table and lock */
3079  static DEFINE_SPINLOCK(sysrq_key_table_lock);
3080  #define SYSRQ_KEY_TABLE_LENGTH 36
3081 @@ -342,7 +358,11 @@ static struct sysrq_key_op *sysrq_key_ta
3082  /* u */        &sysrq_mountro_op,
3083  /* v */        NULL, /* May be assigned at init time by SMP VOYAGER */
3084  /* w */        NULL,
3085 +#ifdef CONFIG_VSERVER_DEBUG
3086 +/* x */        &sysrq_showvxinfo_op,
3087 +#else
3088  /* x */        NULL,
3089 +#endif
3090  /* y */        NULL,
3091  /* z */        NULL
3092  };
3093 @@ -354,6 +374,8 @@ static int sysrq_key_table_key2index(int
3094                 retval = key - '0';
3095         } else if ((key >= 'a') && (key <= 'z')) {
3096                 retval = key + 10 - 'a';
3097 +       } else if ((key >= 'A') && (key <= 'Z')) {
3098 +               retval = key + 10 - 'A';
3099         } else {
3100                 retval = -1;
3101         }
3102 diff -NurpP --minimal linux-2.6.16.20/drivers/char/tty_io.c linux-2.6.16.20-vs2.1.1-rc22/drivers/char/tty_io.c
3103 --- linux-2.6.16.20/drivers/char/tty_io.c       2006-05-11 21:25:35 +0200
3104 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/char/tty_io.c  2006-04-26 19:07:00 +0200
3105 @@ -103,6 +103,7 @@
3106  #include <linux/vt_kern.h>
3107  #include <linux/selection.h>
3108  #include <linux/devfs_fs_kernel.h>
3109 +#include <linux/vs_pid.h>
3110  
3111  #include <linux/kmod.h>
3112  
3113 @@ -2381,13 +2382,16 @@ static int tiocsctty(struct tty_struct *
3114  
3115  static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
3116  {
3117 +       pid_t pgrp;
3118         /*
3119          * (tty == real_tty) is a cheap way of
3120          * testing if the tty is NOT a master pty.
3121          */
3122         if (tty == real_tty && current->signal->tty != real_tty)
3123                 return -ENOTTY;
3124 -       return put_user(real_tty->pgrp, p);
3125 +
3126 +       pgrp = vx_map_pid(real_tty->pgrp);
3127 +       return put_user(pgrp, p);
3128  }
3129  
3130  static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
3131 @@ -2405,6 +2409,8 @@ static int tiocspgrp(struct tty_struct *
3132                 return -ENOTTY;
3133         if (get_user(pgrp, p))
3134                 return -EFAULT;
3135 +
3136 +       pgrp = vx_rmap_pid(pgrp);
3137         if (pgrp < 0)
3138                 return -EINVAL;
3139         if (session_of_pgrp(pgrp) != current->signal->session)
3140 diff -NurpP --minimal linux-2.6.16.20/drivers/infiniband/core/uverbs_mem.c linux-2.6.16.20-vs2.1.1-rc22/drivers/infiniband/core/uverbs_mem.c
3141 --- linux-2.6.16.20/drivers/infiniband/core/uverbs_mem.c        2005-10-28 20:49:23 +0200
3142 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/infiniband/core/uverbs_mem.c   2006-04-26 19:07:00 +0200
3143 @@ -36,6 +36,7 @@
3144  
3145  #include <linux/mm.h>
3146  #include <linux/dma-mapping.h>
3147 +#include <linux/vs_memory.h>
3148  
3149  #include "uverbs.h"
3150  
3151 @@ -161,7 +162,7 @@ out:
3152         if (ret < 0)
3153                 __ib_umem_release(dev, mem, 0);
3154         else
3155 -               current->mm->locked_vm = locked;
3156 +               vx_vmlocked_sub(current->mm, current->mm->locked_vm - locked);
3157  
3158         up_write(&current->mm->mmap_sem);
3159         free_page((unsigned long) page_list);
3160 @@ -174,8 +175,8 @@ void ib_umem_release(struct ib_device *d
3161         __ib_umem_release(dev, umem, 1);
3162  
3163         down_write(&current->mm->mmap_sem);
3164 -       current->mm->locked_vm -=
3165 -               PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
3166 +       vx_vmlocked_sub(current->mm,
3167 +               PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT);
3168         up_write(&current->mm->mmap_sem);
3169  }
3170  
3171 @@ -184,7 +185,7 @@ static void ib_umem_account(void *work_p
3172         struct ib_umem_account_work *work = work_ptr;
3173  
3174         down_write(&work->mm->mmap_sem);
3175 -       work->mm->locked_vm -= work->diff;
3176 +       vx_vmlocked_sub(work->mm, work->diff);
3177         up_write(&work->mm->mmap_sem);
3178         mmput(work->mm);
3179         kfree(work);
3180 diff -NurpP --minimal linux-2.6.16.20/drivers/mtd/devices/blkmtd.c linux-2.6.16.20-vs2.1.1-rc22/drivers/mtd/devices/blkmtd.c
3181 --- linux-2.6.16.20/drivers/mtd/devices/blkmtd.c        2006-01-03 17:29:35 +0100
3182 +++ linux-2.6.16.20-vs2.1.1-rc22/drivers/mtd/devices/blkmtd.c   2006-04-26 19:07:00 +0200
3183 @@ -614,7 +614,7 @@ static struct mtd_erase_region_info *cal
3184  }
3185  
3186  
3187 -extern dev_t __init name_to_dev_t(const char *line);
3188 +extern dev_t __init name_to_dev_t(char *line);
3189  
3190  static struct blkmtd_dev *add_device(char *devname, int readonly, int erase_size)
3191  {
3192 diff -NurpP --minimal linux-2.6.16.20/fs/attr.c linux-2.6.16.20-vs2.1.1-rc22/fs/attr.c
3193 --- linux-2.6.16.20/fs/attr.c   2006-04-09 13:49:53 +0200
3194 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/attr.c      2006-04-26 19:07:00 +0200
3195 @@ -15,6 +15,9 @@
3196  #include <linux/fcntl.h>
3197  #include <linux/quotaops.h>
3198  #include <linux/security.h>
3199 +#include <linux/proc_fs.h>
3200 +#include <linux/devpts_fs.h>
3201 +#include <linux/vserver/debug.h>
3202  
3203  /* Taken over from the old code... */
3204  
3205 @@ -56,6 +59,30 @@ int inode_change_ok(struct inode *inode,
3206                 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
3207                         goto error;
3208         }
3209 +
3210 +       /* Check for evil vserver activity */
3211 +       if (vx_check(0, VX_ADMIN))
3212 +               goto fine;
3213 +
3214 +       if (IS_BARRIER(inode)) {
3215 +               vxwprintk(1, "xid=%d messing with the barrier.",
3216 +                       vx_current_xid());
3217 +               goto error;
3218 +       }
3219 +       switch (inode->i_sb->s_magic) {
3220 +               case PROC_SUPER_MAGIC:
3221 +                       /* maybe allow that in the future? */
3222 +                       vxwprintk(1, "xid=%d messing with the procfs.",
3223 +                               vx_current_xid());
3224 +                       goto error;
3225 +               case DEVPTS_SUPER_MAGIC:
3226 +                       /* devpts is xid tagged */
3227 +                       if (vx_check((xid_t)inode->i_tag, VX_IDENT))
3228 +                               goto fine;
3229 +                       vxwprintk(1, "xid=%d messing with the devpts.",
3230 +                               vx_current_xid());
3231 +                       goto error;
3232 +       }
3233  fine:
3234         retval = 0;
3235  error:
3236 @@ -79,6 +106,8 @@ int inode_setattr(struct inode * inode, 
3237                 inode->i_uid = attr->ia_uid;
3238         if (ia_valid & ATTR_GID)
3239                 inode->i_gid = attr->ia_gid;
3240 +       if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
3241 +               inode->i_tag = attr->ia_tag;
3242         if (ia_valid & ATTR_ATIME)
3243                 inode->i_atime = timespec_trunc(attr->ia_atime,
3244                                                 inode->i_sb->s_time_gran);
3245 @@ -153,7 +182,8 @@ int notify_change(struct dentry * dentry
3246                         error = security_inode_setattr(dentry, attr);
3247                 if (!error) {
3248                         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3249 -                           (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
3250 +                           (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
3251 +                           (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag))
3252                                 error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
3253                         if (!error)
3254                                 error = inode_setattr(inode, attr);
3255 diff -NurpP --minimal linux-2.6.16.20/fs/binfmt_aout.c linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_aout.c
3256 --- linux-2.6.16.20/fs/binfmt_aout.c    2006-04-09 13:49:53 +0200
3257 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_aout.c       2006-04-26 19:07:00 +0200
3258 @@ -24,6 +24,7 @@
3259  #include <linux/binfmts.h>
3260  #include <linux/personality.h>
3261  #include <linux/init.h>
3262 +#include <linux/vs_memory.h>
3263  
3264  #include <asm/system.h>
3265  #include <asm/uaccess.h>
3266 diff -NurpP --minimal linux-2.6.16.20/fs/binfmt_elf.c linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_elf.c
3267 --- linux-2.6.16.20/fs/binfmt_elf.c     2006-04-09 13:49:53 +0200
3268 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_elf.c        2006-05-11 16:06:22 +0200
3269 @@ -38,6 +38,8 @@
3270  #include <linux/security.h>
3271  #include <linux/syscalls.h>
3272  #include <linux/random.h>
3273 +#include <linux/vs_memory.h>
3274 +#include <linux/vs_cvirt.h>
3275  
3276  #include <asm/uaccess.h>
3277  #include <asm/param.h>
3278 diff -NurpP --minimal linux-2.6.16.20/fs/binfmt_elf_fdpic.c linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_elf_fdpic.c
3279 --- linux-2.6.16.20/fs/binfmt_elf_fdpic.c       2006-01-18 06:08:29 +0100
3280 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_elf_fdpic.c  2006-05-11 16:06:22 +0200
3281 @@ -32,6 +32,7 @@
3282  #include <linux/elf.h>
3283  #include <linux/elf-fdpic.h>
3284  #include <linux/elfcore.h>
3285 +#include <linux/vs_cvirt.h>
3286  
3287  #include <asm/uaccess.h>
3288  #include <asm/param.h>
3289 diff -NurpP --minimal linux-2.6.16.20/fs/binfmt_flat.c linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_flat.c
3290 --- linux-2.6.16.20/fs/binfmt_flat.c    2006-02-18 14:40:21 +0100
3291 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_flat.c       2006-04-26 19:07:00 +0200
3292 @@ -36,6 +36,7 @@
3293  #include <linux/personality.h>
3294  #include <linux/init.h>
3295  #include <linux/flat.h>
3296 +#include <linux/vs_memory.h>
3297  
3298  #include <asm/byteorder.h>
3299  #include <asm/system.h>
3300 diff -NurpP --minimal linux-2.6.16.20/fs/binfmt_som.c linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_som.c
3301 --- linux-2.6.16.20/fs/binfmt_som.c     2006-01-03 17:29:55 +0100
3302 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/binfmt_som.c        2006-04-26 19:07:00 +0200
3303 @@ -28,6 +28,7 @@
3304  #include <linux/shm.h>
3305  #include <linux/personality.h>
3306  #include <linux/init.h>
3307 +#include <linux/vs_memory.h>
3308  
3309  #include <asm/uaccess.h>
3310  #include <asm/pgtable.h>
3311 diff -NurpP --minimal linux-2.6.16.20/fs/buffer.c linux-2.6.16.20-vs2.1.1-rc22/fs/buffer.c
3312 --- linux-2.6.16.20/fs/buffer.c 2006-04-09 13:49:53 +0200
3313 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/buffer.c    2006-04-26 19:07:00 +0200
3314 @@ -168,7 +168,7 @@ EXPORT_SYMBOL(sync_blockdev);
3315  int fsync_super(struct super_block *sb)
3316  {
3317         sync_inodes_sb(sb, 0);
3318 -       DQUOT_SYNC(sb);
3319 +       DQUOT_SYNC(sb->s_dqh);
3320         lock_super(sb);
3321         if (sb->s_dirt && sb->s_op->write_super)
3322                 sb->s_op->write_super(sb);
3323 @@ -217,7 +217,7 @@ struct super_block *freeze_bdev(struct b
3324                 smp_wmb();
3325  
3326                 sync_inodes_sb(sb, 0);
3327 -               DQUOT_SYNC(sb);
3328 +               DQUOT_SYNC(sb->s_dqh);
3329  
3330                 lock_super(sb);
3331                 if (sb->s_dirt && sb->s_op->write_super)
3332 diff -NurpP --minimal linux-2.6.16.20/fs/dcache.c linux-2.6.16.20-vs2.1.1-rc22/fs/dcache.c
3333 --- linux-2.6.16.20/fs/dcache.c 2006-03-20 17:33:10 +0100
3334 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/dcache.c    2006-05-02 04:33:23 +0200
3335 @@ -33,6 +33,7 @@
3336  #include <linux/seqlock.h>
3337  #include <linux/swap.h>
3338  #include <linux/bootmem.h>
3339 +#include <linux/vs_limit.h>
3340  
3341  /* #define DCACHE_DEBUG 1 */
3342  
3343 @@ -148,6 +149,7 @@ void dput(struct dentry *dentry)
3344         if (!dentry)
3345                 return;
3346  
3347 +       vx_dentry_dec(dentry);
3348  repeat:
3349         if (atomic_read(&dentry->d_count) == 1)
3350                 might_sleep();
3351 @@ -161,6 +163,8 @@ repeat:
3352                 return;
3353         }
3354  
3355 +       vx_dentry_dec(dentry);
3356 +
3357         /*
3358          * AV: ->d_delete() is _NOT_ allowed to block now.
3359          */
3360 @@ -271,6 +275,7 @@ static inline struct dentry * __dget_loc
3361         if (!list_empty(&dentry->d_lru)) {
3362                 dentry_stat.nr_unused--;
3363                 list_del_init(&dentry->d_lru);
3364 +               vx_dentry_inc(dentry);
3365         }
3366         return dentry;
3367  }
3368 @@ -714,6 +719,9 @@ struct dentry *d_alloc(struct dentry * p
3369         struct dentry *dentry;
3370         char *dname;
3371  
3372 +       if (!vx_dentry_avail(1))
3373 +               return NULL;
3374 +
3375         dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
3376         if (!dentry)
3377                 return NULL;
3378 @@ -762,6 +770,7 @@ struct dentry *d_alloc(struct dentry * p
3379         if (parent)
3380                 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
3381         dentry_stat.nr_dentry++;
3382 +       vx_dentry_inc(dentry);
3383         spin_unlock(&dcache_lock);
3384  
3385         return dentry;
3386 @@ -1089,6 +1098,7 @@ struct dentry * __d_lookup(struct dentry
3387  
3388                 if (!d_unhashed(dentry)) {
3389                         atomic_inc(&dentry->d_count);
3390 +                       vx_dentry_inc(dentry);
3391                         found = dentry;
3392                 }
3393                 spin_unlock(&dentry->d_lock);
3394 diff -NurpP --minimal linux-2.6.16.20/fs/devpts/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/devpts/inode.c
3395 --- linux-2.6.16.20/fs/devpts/inode.c   2006-02-18 14:40:21 +0100
3396 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/devpts/inode.c      2006-04-26 19:07:00 +0200
3397 @@ -19,7 +19,20 @@
3398  #include <linux/tty.h>
3399  #include <linux/devpts_fs.h>
3400  
3401 -#define DEVPTS_SUPER_MAGIC 0x1cd1
3402 +
3403 +static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd)
3404 +{
3405 +       int ret = -EACCES;
3406 +
3407 +       /* devpts is xid tagged */
3408 +       if (vx_check((xid_t)inode->i_tag, VX_IDENT))
3409 +               ret = generic_permission(inode, mask, NULL);
3410 +       return ret;
3411 +}
3412 +
3413 +static struct inode_operations devpts_file_inode_operations = {
3414 +       .permission     = devpts_permission,
3415 +};
3416  
3417  static struct vfsmount *devpts_mnt;
3418  static struct dentry *devpts_root;
3419 @@ -69,6 +82,25 @@ static int devpts_remount(struct super_b
3420         return 0;
3421  }
3422  
3423 +static int devpts_filter(struct dentry *de)
3424 +{
3425 +       /* devpts is xid tagged */
3426 +       return vx_check((xid_t)de->d_inode->i_tag, VX_IDENT);
3427 +}
3428 +
3429 +static int devpts_readdir(struct file * filp, void * dirent, filldir_t filldir)
3430 +{
3431 +       return dcache_readdir_filter(filp, dirent, filldir, devpts_filter);
3432 +}
3433 +
3434 +static struct file_operations devpts_dir_operations = {
3435 +       .open           = dcache_dir_open,
3436 +       .release        = dcache_dir_close,
3437 +       .llseek         = dcache_dir_lseek,
3438 +       .read           = generic_read_dir,
3439 +       .readdir        = devpts_readdir,
3440 +};
3441 +
3442  static struct super_operations devpts_sops = {
3443         .statfs         = simple_statfs,
3444         .remount_fs     = devpts_remount,
3445 @@ -95,8 +127,10 @@ devpts_fill_super(struct super_block *s,
3446         inode->i_uid = inode->i_gid = 0;
3447         inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
3448         inode->i_op = &simple_dir_inode_operations;
3449 -       inode->i_fop = &simple_dir_operations;
3450 +       inode->i_fop = &devpts_dir_operations;
3451         inode->i_nlink = 2;
3452 +       /* devpts is xid tagged */
3453 +       inode->i_tag = (tag_t)vx_current_xid();
3454  
3455         devpts_root = s->s_root = d_alloc_root(inode);
3456         if (s->s_root)
3457 @@ -155,6 +189,9 @@ int devpts_pty_new(struct tty_struct *tt
3458         inode->i_gid = config.setgid ? config.gid : current->fsgid;
3459         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
3460         init_special_inode(inode, S_IFCHR|config.mode, device);
3461 +       /* devpts is xid tagged */
3462 +       inode->i_tag = (tag_t)vx_current_xid();
3463 +       inode->i_op = &devpts_file_inode_operations;
3464         inode->u.generic_ip = tty;
3465  
3466         dentry = get_node(number);
3467 diff -NurpP --minimal linux-2.6.16.20/fs/dquot.c linux-2.6.16.20-vs2.1.1-rc22/fs/dquot.c
3468 --- linux-2.6.16.20/fs/dquot.c  2006-04-09 13:49:53 +0200
3469 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/dquot.c     2006-04-26 19:07:00 +0200
3470 @@ -79,6 +79,7 @@
3471  #include <linux/buffer_head.h>
3472  #include <linux/capability.h>
3473  #include <linux/quotaops.h>
3474 +#include <linux/vserver/debug.h>
3475  
3476  #include <asm/uaccess.h>
3477  
3478 @@ -185,7 +186,7 @@ static void put_quota_format(struct quot
3479  /*
3480   * Dquot List Management:
3481   * The quota code uses three lists for dquot management: the inuse_list,
3482 - * free_dquots, and dquot_hash[] array. A single dquot structure may be
3483 + * free_dquots, and hash->dqh_hash[] array. A single dquot structure may be
3484   * on all three lists, depending on its current state.
3485   *
3486   * All dquots are placed to the end of inuse_list when first created, and this
3487 @@ -198,7 +199,7 @@ static void put_quota_format(struct quot
3488   * dquot is invalidated it's completely released from memory.
3489   *
3490   * Dquots with a specific identity (device, type and id) are placed on
3491 - * one of the dquot_hash[] hash chains. The provides an efficient search
3492 + * one of the hash->dqh_hash[] hash chains. The provides an efficient search
3493   * mechanism to locate a specific dquot.
3494   */
3495  
3496 @@ -212,36 +213,44 @@ struct dqstats dqstats;
3497  static void dqput(struct dquot *dquot);
3498  
3499  static inline unsigned int
3500 -hashfn(const struct super_block *sb, unsigned int id, int type)
3501 +hashfn(struct dqhash *hash, unsigned int id, int type)
3502  {
3503         unsigned long tmp;
3504  
3505 -       tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type);
3506 +       tmp = (((unsigned long)hash >> L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type);
3507         return (tmp + (tmp >> dq_hash_bits)) & dq_hash_mask;
3508  }
3509  
3510  /*
3511   * Following list functions expect dq_list_lock to be held
3512   */
3513 -static inline void insert_dquot_hash(struct dquot *dquot)
3514 +static inline void insert_dquot_hash(struct dqhash *hash, struct dquot *dquot)
3515  {
3516 -       struct hlist_head *head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type);
3517 +       struct hlist_head *head = dquot_hash +
3518 +               hashfn(hash, dquot->dq_id, dquot->dq_type);
3519 +       /* struct hlist_head *head = hash->dqh_hash +
3520 +               hashfn(dquot->dq_dqh, dquot->dq_id, dquot->dq_type); */
3521         hlist_add_head(&dquot->dq_hash, head);
3522 +       dquot->dq_dqh = dqhget(hash);
3523  }
3524  
3525  static inline void remove_dquot_hash(struct dquot *dquot)
3526  {
3527         hlist_del_init(&dquot->dq_hash);
3528 +       dqhput(dquot->dq_dqh);
3529 +       dquot->dq_dqh = NULL;
3530  }
3531  
3532 -static inline struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, unsigned int id, int type)
3533 +static inline struct dquot *find_dquot(struct dqhash *hash,
3534 +       unsigned int hashent, unsigned int id, int type)
3535  {
3536         struct hlist_node *node;
3537         struct dquot *dquot;
3538  
3539 -       hlist_for_each (node, dquot_hash+hashent) {
3540 +       /* hlist_for_each (node, hash->dqh_hash + hashent) { */
3541 +       hlist_for_each (node, dquot_hash + hashent) {
3542                 dquot = hlist_entry(node, struct dquot, dq_hash);
3543 -               if (dquot->dq_sb == sb && dquot->dq_id == id && dquot->dq_type == type)
3544 +               if (dquot->dq_dqh == hash && dquot->dq_id == id && dquot->dq_type == type)
3545                         return dquot;
3546         }
3547         return NODQUOT;
3548 @@ -285,13 +294,13 @@ static void wait_on_dquot(struct dquot *
3549         up(&dquot->dq_lock);
3550  }
3551  
3552 -#define mark_dquot_dirty(dquot) ((dquot)->dq_sb->dq_op->mark_dirty(dquot))
3553 +#define mark_dquot_dirty(dquot) ((dquot)->dq_dqh->dqh_qop->mark_dirty(dquot))
3554  
3555  int dquot_mark_dquot_dirty(struct dquot *dquot)
3556  {
3557         spin_lock(&dq_list_lock);
3558         if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags))
3559 -               list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)->
3560 +               list_add(&dquot->dq_dirty, &dqh_dqopt(dquot->dq_dqh)->
3561                                 info[dquot->dq_type].dqi_dirty_list);
3562         spin_unlock(&dq_list_lock);
3563         return 0;
3564 @@ -306,9 +315,9 @@ static inline int clear_dquot_dirty(stru
3565         return 1;
3566  }
3567  
3568 -void mark_info_dirty(struct super_block *sb, int type)
3569 +void mark_info_dirty(struct dqhash *hash, int type)
3570  {
3571 -       set_bit(DQF_INFO_DIRTY_B, &sb_dqopt(sb)->info[type].dqi_flags);
3572 +       set_bit(DQF_INFO_DIRTY_B, &dqh_dqopt(hash)->info[type].dqi_flags);
3573  }
3574  EXPORT_SYMBOL(mark_info_dirty);
3575  
3576 @@ -319,7 +328,7 @@ EXPORT_SYMBOL(mark_info_dirty);
3577  int dquot_acquire(struct dquot *dquot)
3578  {
3579         int ret = 0, ret2 = 0;
3580 -       struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
3581 +       struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh);
3582  
3583         down(&dquot->dq_lock);
3584         down(&dqopt->dqio_sem);
3585 @@ -333,7 +342,7 @@ int dquot_acquire(struct dquot *dquot)
3586                 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot);
3587                 /* Write the info if needed */
3588                 if (info_dirty(&dqopt->info[dquot->dq_type]))
3589 -                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type);
3590 +                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type);
3591                 if (ret < 0)
3592                         goto out_iolock;
3593                 if (ret2 < 0) {
3594 @@ -354,7 +363,7 @@ out_iolock:
3595  int dquot_commit(struct dquot *dquot)
3596  {
3597         int ret = 0, ret2 = 0;
3598 -       struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
3599 +       struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh);
3600  
3601         down(&dqopt->dqio_sem);
3602         spin_lock(&dq_list_lock);
3603 @@ -368,7 +377,7 @@ int dquot_commit(struct dquot *dquot)
3604         if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
3605                 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot);
3606                 if (info_dirty(&dqopt->info[dquot->dq_type]))
3607 -                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type);
3608 +                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type);
3609                 if (ret >= 0)
3610                         ret = ret2;
3611         }
3612 @@ -383,7 +392,7 @@ out_sem:
3613  int dquot_release(struct dquot *dquot)
3614  {
3615         int ret = 0, ret2 = 0;
3616 -       struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
3617 +       struct quota_info *dqopt = dqh_dqopt(dquot->dq_dqh);
3618  
3619         down(&dquot->dq_lock);
3620         /* Check whether we are not racing with some other dqget() */
3621 @@ -394,7 +403,7 @@ int dquot_release(struct dquot *dquot)
3622                 ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot);
3623                 /* Write the info */
3624                 if (info_dirty(&dqopt->info[dquot->dq_type]))
3625 -                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_sb, dquot->dq_type);
3626 +                       ret2 = dqopt->ops[dquot->dq_type]->write_file_info(dquot->dq_dqh, dquot->dq_type);
3627                 if (ret >= 0)
3628                         ret = ret2;
3629         }
3630 @@ -409,13 +418,13 @@ out_dqlock:
3631   * quota is disabled and pointers from inodes removed so there cannot be new
3632   * quota users. Also because we hold dqonoff_sem there can be no quota users
3633   * for this sb+type at all. */
3634 -static void invalidate_dquots(struct super_block *sb, int type)
3635 +static void invalidate_dquots(struct dqhash *hash, int type)
3636  {
3637         struct dquot *dquot, *tmp;
3638  
3639         spin_lock(&dq_list_lock);
3640         list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
3641 -               if (dquot->dq_sb != sb)
3642 +               if (dquot->dq_dqh != hash)
3643                         continue;
3644                 if (dquot->dq_type != type)
3645                         continue;
3646 @@ -432,18 +441,94 @@ static void invalidate_dquots(struct sup
3647         spin_unlock(&dq_list_lock);
3648  }
3649  
3650 -int vfs_quota_sync(struct super_block *sb, int type)
3651 +
3652 +/* Dquota Hash Management Functions */
3653 +
3654 +static LIST_HEAD(dqhash_list);
3655 +
3656 +struct dqhash *new_dqhash(struct super_block *sb, unsigned int id)
3657 +{
3658 +       struct dqhash *hash;
3659 +       int err;
3660 +
3661 +       err = -ENOMEM;
3662 +       hash = kmalloc(sizeof(struct dqhash),  GFP_USER);
3663 +       if (!hash)
3664 +               goto out;
3665 +
3666 +       memset(hash, 0, sizeof(struct dqhash));
3667 +       hash->dqh_id = id;
3668 +       atomic_set(&hash->dqh_count, 1);
3669 +
3670 +       INIT_LIST_HEAD(&hash->dqh_list);
3671 +
3672 +       sema_init(&hash->dqh_dqopt.dqio_sem, 1);
3673 +       sema_init(&hash->dqh_dqopt.dqonoff_sem, 1);
3674 +       init_rwsem(&hash->dqh_dqopt.dqptr_sem);
3675 +       hash->dqh_qop = sb->s_qop;
3676 +       hash->dqh_qcop = sb->s_qcop;
3677 +       hash->dqh_sb = sb;
3678 +
3679 +       lock_kernel();
3680 +       list_add(&hash->dqh_list, &dqhash_list);
3681 +       unlock_kernel();
3682 +       vxdprintk(VXD_CBIT(misc, 0),
3683 +               "new_dqhash: %p [#0x%08x]", hash, hash->dqh_id);
3684 +       return hash;
3685 +
3686 +       // kfree(hash);
3687 +out:
3688 +       return ERR_PTR(err);
3689 +}
3690 +
3691 +void destroy_dqhash(struct dqhash *hash)
3692 +{
3693 +       int cnt;
3694 +
3695 +       vxdprintk(VXD_CBIT(misc, 0),
3696 +               "destroy_dqhash: %p [#0x%08x] c=%d",
3697 +               hash, hash->dqh_id, atomic_read(&hash->dqh_count));
3698 +       lock_kernel();
3699 +       list_del_init(&hash->dqh_list);
3700 +       unlock_kernel();
3701 +       for (cnt = 0; cnt < MAXQUOTAS; cnt++)   /* should not be required anymore! */
3702 +               invalidate_dquots(hash, cnt);
3703 +       kfree(hash);
3704 +}
3705 +
3706 +
3707 +struct dqhash *find_dqhash(unsigned int id)
3708 +{
3709 +       struct list_head *head;
3710 +       struct dqhash *hash;
3711 +
3712 +       lock_kernel();
3713 +       list_for_each(head, &dqhash_list) {
3714 +               hash = list_entry(head, struct dqhash, dqh_list);
3715 +               if (hash->dqh_id == id)
3716 +                       goto dqh_found;
3717 +       }
3718 +       unlock_kernel();
3719 +       return NULL;
3720 +
3721 +dqh_found:
3722 +       unlock_kernel();
3723 +       return dqhget(hash);
3724 +}
3725 +
3726 +
3727 +int vfs_quota_sync(struct dqhash *hash, int type)
3728  {
3729         struct list_head *dirty;
3730         struct dquot *dquot;
3731 -       struct quota_info *dqopt = sb_dqopt(sb);
3732 +       struct quota_info *dqopt = dqh_dqopt(hash);
3733         int cnt;
3734  
3735         down(&dqopt->dqonoff_sem);
3736         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
3737                 if (type != -1 && cnt != type)
3738                         continue;
3739 -               if (!sb_has_quota_enabled(sb, cnt))
3740 +               if (!dqh_has_quota_enabled(hash, cnt))
3741                         continue;
3742                 spin_lock(&dq_list_lock);
3743                 dirty = &dqopt->info[cnt].dqi_dirty_list;
3744 @@ -460,7 +545,7 @@ int vfs_quota_sync(struct super_block *s
3745                         atomic_inc(&dquot->dq_count);
3746                         dqstats.lookups++;
3747                         spin_unlock(&dq_list_lock);
3748 -                       sb->dq_op->write_dquot(dquot);
3749 +                       hash->dqh_qop->write_dquot(dquot);
3750                         dqput(dquot);
3751                         spin_lock(&dq_list_lock);
3752                 }
3753 @@ -468,9 +553,10 @@ int vfs_quota_sync(struct super_block *s
3754         }
3755  
3756         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
3757 -               if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)
3758 +               if ((cnt == type || type == -1)
3759 +                       && dqh_has_quota_enabled(hash, cnt)
3760                         && info_dirty(&dqopt->info[cnt]))
3761 -                       sb->dq_op->write_info(sb, cnt);
3762 +                       hash->dqh_qop->write_info(hash, cnt);
3763         spin_lock(&dq_list_lock);
3764         dqstats.syncs++;
3765         spin_unlock(&dq_list_lock);
3766 @@ -525,7 +611,7 @@ static void dqput(struct dquot *dquot)
3767         if (!atomic_read(&dquot->dq_count)) {
3768                 printk("VFS: dqput: trying to free free dquot\n");
3769                 printk("VFS: device %s, dquot of %s %d\n",
3770 -                       dquot->dq_sb->s_id,
3771 +                       dquot->dq_dqh->dqh_sb->s_id,
3772                         quotatypes[dquot->dq_type],
3773                         dquot->dq_id);
3774                 BUG();
3775 @@ -547,14 +633,14 @@ we_slept:
3776         if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
3777                 spin_unlock(&dq_list_lock);
3778                 /* Commit dquot before releasing */
3779 -               dquot->dq_sb->dq_op->write_dquot(dquot);
3780 +               dquot->dq_dqh->dqh_qop->write_dquot(dquot);
3781                 goto we_slept;
3782         }
3783         /* Clear flag in case dquot was inactive (something bad happened) */
3784         clear_dquot_dirty(dquot);
3785         if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
3786                 spin_unlock(&dq_list_lock);
3787 -               dquot->dq_sb->dq_op->release_dquot(dquot);
3788 +               dquot->dq_dqh->dqh_qop->release_dquot(dquot);
3789                 goto we_slept;
3790         }
3791         atomic_dec(&dquot->dq_count);
3792 @@ -567,7 +653,7 @@ we_slept:
3793         spin_unlock(&dq_list_lock);
3794  }
3795  
3796 -static struct dquot *get_empty_dquot(struct super_block *sb, int type)
3797 +static struct dquot *get_empty_dquot(int type)
3798  {
3799         struct dquot *dquot;
3800  
3801 @@ -581,7 +667,7 @@ static struct dquot *get_empty_dquot(str
3802         INIT_LIST_HEAD(&dquot->dq_inuse);
3803         INIT_HLIST_NODE(&dquot->dq_hash);
3804         INIT_LIST_HEAD(&dquot->dq_dirty);
3805 -       dquot->dq_sb = sb;
3806 +       dquot->dq_dqh = NULL;
3807         dquot->dq_type = type;
3808         atomic_set(&dquot->dq_count, 1);
3809  
3810 @@ -592,19 +678,19 @@ static struct dquot *get_empty_dquot(str
3811   * Get reference to dquot
3812   * MUST be called with either dqptr_sem or dqonoff_sem held
3813   */
3814 -static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
3815 +static struct dquot *dqget(struct dqhash *hash, unsigned int id, int type)
3816  {
3817 -       unsigned int hashent = hashfn(sb, id, type);
3818 +       unsigned int hashent = hashfn(hash, id, type);
3819         struct dquot *dquot, *empty = NODQUOT;
3820  
3821 -        if (!sb_has_quota_enabled(sb, type))
3822 +       if (!dqh_has_quota_enabled(hash, type))
3823                 return NODQUOT;
3824  we_slept:
3825         spin_lock(&dq_list_lock);
3826 -       if ((dquot = find_dquot(hashent, sb, id, type)) == NODQUOT) {
3827 +       if ((dquot = find_dquot(hash, hashent, id, type)) == NODQUOT) {
3828                 if (empty == NODQUOT) {
3829                         spin_unlock(&dq_list_lock);
3830 -                       if ((empty = get_empty_dquot(sb, type)) == NODQUOT)
3831 +                       if ((empty = get_empty_dquot(type)) == NODQUOT)
3832                                 schedule();     /* Try to wait for a moment... */
3833                         goto we_slept;
3834                 }
3835 @@ -613,7 +699,7 @@ we_slept:
3836                 /* all dquots go on the inuse_list */
3837                 put_inuse(dquot);
3838                 /* hash it first so it can be found */
3839 -               insert_dquot_hash(dquot);
3840 +               insert_dquot_hash(hash, dquot);
3841                 dqstats.lookups++;
3842                 spin_unlock(&dq_list_lock);
3843         } else {
3844 @@ -630,12 +716,13 @@ we_slept:
3845          * finished or it will be canceled due to dq_count > 1 test */
3846         wait_on_dquot(dquot);
3847         /* Read the dquot and instantiate it (everything done only if needed) */
3848 -       if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && sb->dq_op->acquire_dquot(dquot) < 0) {
3849 +       if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) &&
3850 +               hash->dqh_qop->acquire_dquot(dquot) < 0) {
3851                 dqput(dquot);
3852                 return NODQUOT;
3853         }
3854  #ifdef __DQUOT_PARANOIA
3855 -       if (!dquot->dq_sb)      /* Has somebody invalidated entry under us? */
3856 +       if (!dquot->dq_dqh)     /* Has somebody invalidated entry under us? */
3857                 BUG();
3858  #endif
3859  
3860 @@ -657,9 +744,10 @@ static int dqinit_needed(struct inode *i
3861  }
3862  
3863  /* This routine is guarded by dqonoff_sem semaphore */
3864 -static void add_dquot_ref(struct super_block *sb, int type)
3865 +static void add_dquot_ref(struct dqhash *hash, int type)
3866  {
3867         struct list_head *p;
3868 +       struct super_block *sb = hash->dqh_sb;
3869  
3870  restart:
3871         file_list_lock();
3872 @@ -669,7 +757,7 @@ restart:
3873                 if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) {
3874                         struct dentry *dentry = dget(filp->f_dentry);
3875                         file_list_unlock();
3876 -                       sb->dq_op->initialize(inode, type);
3877 +                       hash->dqh_qop->initialize(inode, type);
3878                         dput(dentry);
3879                         /* As we may have blocked we had better restart... */
3880                         goto restart;
3881 @@ -728,16 +816,16 @@ static void put_dquot_list(struct list_h
3882  }
3883  
3884  /* Gather all references from inodes and drop them */
3885 -static void drop_dquot_ref(struct super_block *sb, int type)
3886 +static void drop_dquot_ref(struct dqhash *hash, int type)
3887  {
3888         LIST_HEAD(tofree_head);
3889  
3890         /* We need to be guarded against prune_icache to reach all the
3891          * inodes - otherwise some can be on the local list of prune_icache */
3892         down(&iprune_sem);
3893 -       down_write(&sb_dqopt(sb)->dqptr_sem);
3894 -       remove_dquot_ref(sb, type, &tofree_head);
3895 -       up_write(&sb_dqopt(sb)->dqptr_sem);
3896 +       down_write(&dqh_dqopt(hash)->dqptr_sem);
3897 +       remove_dquot_ref(hash, type, &tofree_head);
3898 +       up_write(&dqh_dqopt(hash)->dqptr_sem);
3899         up(&iprune_sem);
3900         put_dquot_list(&tofree_head);
3901  }
3902 @@ -809,7 +897,7 @@ static void print_warning(struct dquot *
3903         if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags)))
3904                 return;
3905  
3906 -       tty_write_message(current->signal->tty, dquot->dq_sb->s_id);
3907 +       tty_write_message(current->signal->tty, dquot->dq_dqh->dqh_sb->s_id);
3908         if (warntype == ISOFTWARN || warntype == BSOFTWARN)
3909                 tty_write_message(current->signal->tty, ": warning, ");
3910         else
3911 @@ -849,7 +937,7 @@ static inline void flush_warnings(struct
3912  
3913  static inline char ignore_hardlimit(struct dquot *dquot)
3914  {
3915 -       struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
3916 +       struct mem_dqinfo *info = &dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type];
3917  
3918         return capable(CAP_SYS_RESOURCE) &&
3919             (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || !(info->dqi_flags & V1_DQF_RSQUASH));
3920 @@ -881,7 +969,7 @@ static int check_idq(struct dquot *dquot
3921            (dquot->dq_dqb.dqb_curinodes + inodes) > dquot->dq_dqb.dqb_isoftlimit &&
3922             dquot->dq_dqb.dqb_itime == 0) {
3923                 *warntype = ISOFTWARN;
3924 -               dquot->dq_dqb.dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
3925 +               dquot->dq_dqb.dqb_itime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_igrace;
3926         }
3927  
3928         return QUOTA_OK;
3929 @@ -916,7 +1004,7 @@ static int check_bdq(struct dquot *dquot
3930             dquot->dq_dqb.dqb_btime == 0) {
3931                 if (!prealloc) {
3932                         *warntype = BSOFTWARN;
3933 -                       dquot->dq_dqb.dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace;
3934 +                       dquot->dq_dqb.dqb_btime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_bgrace;
3935                 }
3936                 else
3937                         /*
3938 @@ -942,7 +1030,7 @@ int dquot_initialize(struct inode *inode
3939           * re-enter the quota code and are already holding the semaphore */
3940         if (IS_NOQUOTA(inode))
3941                 return 0;
3942 -       down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
3943 +       down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3944         /* Having dqptr_sem we know NOQUOTA flags can't be altered... */
3945         if (IS_NOQUOTA(inode))
3946                 goto out_err;
3947 @@ -958,11 +1046,11 @@ int dquot_initialize(struct inode *inode
3948                                         id = inode->i_gid;
3949                                         break;
3950                         }
3951 -                       inode->i_dquot[cnt] = dqget(inode->i_sb, id, cnt);
3952 +                       inode->i_dquot[cnt] = dqget(inode->i_dqh, id, cnt);
3953                 }
3954         }
3955  out_err:
3956 -       up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
3957 +       up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3958         return ret;
3959  }
3960  
3961 @@ -974,14 +1062,14 @@ int dquot_drop(struct inode *inode)
3962  {
3963         int cnt;
3964  
3965 -       down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
3966 +       down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3967         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
3968                 if (inode->i_dquot[cnt] != NODQUOT) {
3969                         dqput(inode->i_dquot[cnt]);
3970                         inode->i_dquot[cnt] = NODQUOT;
3971                 }
3972         }
3973 -       up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
3974 +       up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3975         return 0;
3976  }
3977  
3978 @@ -1012,9 +1100,9 @@ out_add:
3979         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
3980                 warntype[cnt] = NOWARN;
3981  
3982 -       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
3983 +       down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3984         if (IS_NOQUOTA(inode)) {        /* Now we can do reliable test... */
3985 -               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
3986 +               up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3987                 goto out_add;
3988         }
3989         spin_lock(&dq_data_lock);
3990 @@ -1039,7 +1127,7 @@ warn_put_all:
3991                         if (inode->i_dquot[cnt])
3992                                 mark_dquot_dirty(inode->i_dquot[cnt]);
3993         flush_warnings(inode->i_dquot, warntype);
3994 -       up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
3995 +       up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
3996         return ret;
3997  }
3998  
3999 @@ -1057,9 +1145,9 @@ int dquot_alloc_inode(const struct inode
4000                 return QUOTA_OK;
4001         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
4002                 warntype[cnt] = NOWARN;
4003 -       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4004 +       down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4005         if (IS_NOQUOTA(inode)) {
4006 -               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4007 +               up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4008                 return QUOTA_OK;
4009         }
4010         spin_lock(&dq_data_lock);
4011 @@ -1084,7 +1172,7 @@ warn_put_all:
4012                         if (inode->i_dquot[cnt])
4013                                 mark_dquot_dirty(inode->i_dquot[cnt]);
4014         flush_warnings((struct dquot **)inode->i_dquot, warntype);
4015 -       up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4016 +       up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4017         return ret;
4018  }
4019  
4020 @@ -1102,10 +1190,10 @@ out_sub:
4021                 inode_sub_bytes(inode, number);
4022                 return QUOTA_OK;
4023         }
4024 -       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4025 +       down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4026         /* Now recheck reliably when holding dqptr_sem */
4027         if (IS_NOQUOTA(inode)) {
4028 -               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4029 +               up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4030                 goto out_sub;
4031         }
4032         spin_lock(&dq_data_lock);
4033 @@ -1120,7 +1208,7 @@ out_sub:
4034         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
4035                 if (inode->i_dquot[cnt])
4036                         mark_dquot_dirty(inode->i_dquot[cnt]);
4037 -       up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4038 +       up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4039         return QUOTA_OK;
4040  }
4041  
4042 @@ -1135,10 +1223,10 @@ int dquot_free_inode(const struct inode 
4043           * re-enter the quota code and are already holding the semaphore */
4044         if (IS_NOQUOTA(inode))
4045                 return QUOTA_OK;
4046 -       down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4047 +       down_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4048         /* Now recheck reliably when holding dqptr_sem */
4049         if (IS_NOQUOTA(inode)) {
4050 -               up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4051 +               up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4052                 return QUOTA_OK;
4053         }
4054         spin_lock(&dq_data_lock);
4055 @@ -1152,7 +1240,7 @@ int dquot_free_inode(const struct inode 
4056         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
4057                 if (inode->i_dquot[cnt])
4058                         mark_dquot_dirty(inode->i_dquot[cnt]);
4059 -       up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
4060 +       up_read(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4061         return QUOTA_OK;
4062  }
4063  
4064 @@ -1167,6 +1255,7 @@ int dquot_transfer(struct inode *inode, 
4065         qsize_t space;
4066         struct dquot *transfer_from[MAXQUOTAS];
4067         struct dquot *transfer_to[MAXQUOTAS];
4068 +       struct dqhash *dqh = inode->i_sb->s_dqh;
4069         int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
4070             chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
4071         char warntype[MAXQUOTAS];
4072 @@ -1180,10 +1269,10 @@ int dquot_transfer(struct inode *inode, 
4073                 transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
4074                 warntype[cnt] = NOWARN;
4075         }
4076 -       down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
4077 +       down_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4078         /* Now recheck reliably when holding dqptr_sem */
4079         if (IS_NOQUOTA(inode)) {        /* File without quota accounting? */
4080 -               up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
4081 +               up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4082                 return QUOTA_OK;
4083         }
4084         /* First build the transfer_to list - here we can block on
4085 @@ -1194,12 +1283,12 @@ int dquot_transfer(struct inode *inode, 
4086                         case USRQUOTA:
4087                                 if (!chuid)
4088                                         continue;
4089 -                               transfer_to[cnt] = dqget(inode->i_sb, iattr->ia_uid, cnt);
4090 +                               transfer_to[cnt] = dqget(dqh, iattr->ia_uid, cnt);
4091                                 break;
4092                         case GRPQUOTA:
4093                                 if (!chgid)
4094                                         continue;
4095 -                               transfer_to[cnt] = dqget(inode->i_sb, iattr->ia_gid, cnt);
4096 +                               transfer_to[cnt] = dqget(dqh, iattr->ia_gid, cnt);
4097                                 break;
4098                 }
4099         }
4100 @@ -1254,20 +1343,20 @@ warn_put_all:
4101                 if (ret == NO_QUOTA && transfer_to[cnt] != NODQUOT)
4102                         dqput(transfer_to[cnt]);
4103         }
4104 -       up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
4105 +       up_write(&dqh_dqopt(inode->i_dqh)->dqptr_sem);
4106         return ret;
4107  }
4108  
4109  /*
4110   * Write info of quota file to disk
4111   */
4112 -int dquot_commit_info(struct super_block *sb, int type)
4113 +int dquot_commit_info(struct dqhash *hash, int type)
4114  {
4115         int ret;
4116 -       struct quota_info *dqopt = sb_dqopt(sb);
4117 +       struct quota_info *dqopt = dqh_dqopt(hash);
4118  
4119         down(&dqopt->dqio_sem);
4120 -       ret = dqopt->ops[type]->write_file_info(sb, type);
4121 +       ret = dqopt->ops[type]->write_file_info(hash, type);
4122         up(&dqopt->dqio_sem);
4123         return ret;
4124  }
4125 @@ -1317,10 +1406,10 @@ static inline void reset_enable_flags(st
4126  /*
4127   * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
4128   */
4129 -int vfs_quota_off(struct super_block *sb, int type)
4130 +int vfs_quota_off(struct dqhash *hash, int type)
4131  {
4132         int cnt;
4133 -       struct quota_info *dqopt = sb_dqopt(sb);
4134 +       struct quota_info *dqopt = dqh_dqopt(hash);
4135         struct inode *toputinode[MAXQUOTAS];
4136  
4137         /* We need to serialize quota_off() for device */
4138 @@ -1329,21 +1418,21 @@ int vfs_quota_off(struct super_block *sb
4139                 toputinode[cnt] = NULL;
4140                 if (type != -1 && cnt != type)
4141                         continue;
4142 -               if (!sb_has_quota_enabled(sb, cnt))
4143 +               if (!dqh_has_quota_enabled(hash, cnt))
4144                         continue;
4145                 reset_enable_flags(dqopt, cnt);
4146  
4147                 /* Note: these are blocking operations */
4148 -               drop_dquot_ref(sb, cnt);
4149 -               invalidate_dquots(sb, cnt);
4150 +               drop_dquot_ref(hash, cnt);
4151 +               invalidate_dquots(hash, cnt);
4152                 /*
4153                  * Now all dquots should be invalidated, all writes done so we should be only
4154                  * users of the info. No locks needed.
4155                  */
4156                 if (info_dirty(&dqopt->info[cnt]))
4157 -                       sb->dq_op->write_info(sb, cnt);
4158 +                       hash->dqh_qop->write_info(hash, cnt);
4159                 if (dqopt->ops[cnt]->free_file_info)
4160 -                       dqopt->ops[cnt]->free_file_info(sb, cnt);
4161 +                       dqopt->ops[cnt]->free_file_info(hash, cnt);
4162                 put_quota_format(dqopt->info[cnt].dqi_format);
4163  
4164                 toputinode[cnt] = dqopt->files[cnt];
4165 @@ -1356,9 +1445,9 @@ int vfs_quota_off(struct super_block *sb
4166         up(&dqopt->dqonoff_sem);
4167         /* Sync the superblock so that buffers with quota data are written to
4168          * disk (and so userspace sees correct data afterwards). */
4169 -       if (sb->s_op->sync_fs)
4170 -               sb->s_op->sync_fs(sb, 1);
4171 -       sync_blockdev(sb->s_bdev);
4172 +       if (hash->dqh_sb->s_op->sync_fs)
4173 +               hash->dqh_sb->s_op->sync_fs(hash->dqh_sb, 1);
4174 +       sync_blockdev(hash->dqh_sb->s_bdev);
4175         /* Now the quota files are just ordinary files and we can set the
4176          * inode flags back. Moreover we discard the pagecache so that
4177          * userspace sees the writes we did bypassing the pagecache. We
4178 @@ -1369,7 +1458,7 @@ int vfs_quota_off(struct super_block *sb
4179                         down(&dqopt->dqonoff_sem);
4180                         /* If quota was reenabled in the meantime, we have
4181                          * nothing to do */
4182 -                       if (!sb_has_quota_enabled(sb, cnt)) {
4183 +                       if (!dqh_has_quota_enabled(hash, cnt)) {
4184                                 mutex_lock(&toputinode[cnt]->i_mutex);
4185                                 toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
4186                                   S_NOATIME | S_NOQUOTA);
4187 @@ -1380,8 +1469,8 @@ int vfs_quota_off(struct super_block *sb
4188                         }
4189                         up(&dqopt->dqonoff_sem);
4190                 }
4191 -       if (sb->s_bdev)
4192 -               invalidate_bdev(sb->s_bdev, 0);
4193 +       if (hash->dqh_sb->s_bdev)
4194 +               invalidate_bdev(hash->dqh_sb->s_bdev, 0);
4195         return 0;
4196  }
4197  
4198 @@ -1394,7 +1483,8 @@ static int vfs_quota_on_inode(struct ino
4199  {
4200         struct quota_format_type *fmt = find_quota_format(format_id);
4201         struct super_block *sb = inode->i_sb;
4202 -       struct quota_info *dqopt = sb_dqopt(sb);
4203 +       struct dqhash *hash = inode->i_dqh;
4204 +       struct quota_info *dqopt = dqh_dqopt(hash);
4205         int error;
4206         int oldflags = -1;
4207  
4208 @@ -1420,7 +1510,7 @@ static int vfs_quota_on_inode(struct ino
4209         invalidate_bdev(sb->s_bdev, 0);
4210         mutex_lock(&inode->i_mutex);
4211         down(&dqopt->dqonoff_sem);
4212 -       if (sb_has_quota_enabled(sb, type)) {
4213 +       if (dqh_has_quota_enabled(hash, type)) {
4214                 error = -EBUSY;
4215                 goto out_lock;
4216         }
4217 @@ -1431,21 +1521,21 @@ static int vfs_quota_on_inode(struct ino
4218         oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
4219         inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
4220         up_write(&dqopt->dqptr_sem);
4221 -       sb->dq_op->drop(inode);
4222 +       hash->dqh_qop->drop(inode);
4223  
4224         error = -EIO;
4225         dqopt->files[type] = igrab(inode);
4226         if (!dqopt->files[type])
4227                 goto out_lock;
4228         error = -EINVAL;
4229 -       if (!fmt->qf_ops->check_quota_file(sb, type))
4230 +       if (!fmt->qf_ops->check_quota_file(hash, type))
4231                 goto out_file_init;
4232  
4233         dqopt->ops[type] = fmt->qf_ops;
4234         dqopt->info[type].dqi_format = fmt;
4235         INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list);
4236         down(&dqopt->dqio_sem);
4237 -       if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) {
4238 +       if ((error = dqopt->ops[type]->read_file_info(hash, type)) < 0) {
4239                 up(&dqopt->dqio_sem);
4240                 goto out_file_init;
4241         }
4242 @@ -1453,7 +1543,7 @@ static int vfs_quota_on_inode(struct ino
4243         mutex_unlock(&inode->i_mutex);
4244         set_enable_flags(dqopt, type);
4245  
4246 -       add_dquot_ref(sb, type);
4247 +       add_dquot_ref(hash, type);
4248         up(&dqopt->dqonoff_sem);
4249  
4250         return 0;
4251 @@ -1479,7 +1569,7 @@ out_fmt:
4252  }
4253  
4254  /* Actual function called from quotactl() */
4255 -int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path)
4256 +int vfs_quota_on(struct dqhash *hash, int type, int format_id, char *path)
4257  {
4258         struct nameidata nd;
4259         int error;
4260 @@ -1491,7 +1581,7 @@ int vfs_quota_on(struct super_block *sb,
4261         if (error)
4262                 goto out_path;
4263         /* Quota file not on the same filesystem? */
4264 -       if (nd.mnt->mnt_sb != sb)
4265 +       if (nd.mnt->mnt_sb != hash->dqh_sb)
4266                 error = -EXDEV;
4267         else
4268                 error = vfs_quota_on_inode(nd.dentry->d_inode, type, format_id);
4269 @@ -1504,13 +1594,13 @@ out_path:
4270   * This function is used when filesystem needs to initialize quotas
4271   * during mount time.
4272   */
4273 -int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
4274 +int vfs_quota_on_mount(struct dqhash *hash, char *qf_name,
4275                 int format_id, int type)
4276  {
4277         struct dentry *dentry;
4278         int error;
4279  
4280 -       dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
4281 +       dentry = lookup_one_len(qf_name, hash->dqh_sb->s_root, strlen(qf_name));
4282         if (IS_ERR(dentry))
4283                 return PTR_ERR(dentry);
4284  
4285 @@ -1546,18 +1636,18 @@ static void do_get_dqblk(struct dquot *d
4286         spin_unlock(&dq_data_lock);
4287  }
4288  
4289 -int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di)
4290 +int vfs_get_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di)
4291  {
4292         struct dquot *dquot;
4293  
4294 -       down(&sb_dqopt(sb)->dqonoff_sem);
4295 -       if (!(dquot = dqget(sb, id, type))) {
4296 -               up(&sb_dqopt(sb)->dqonoff_sem);
4297 +       down(&dqh_dqopt(hash)->dqonoff_sem);
4298 +       if (!(dquot = dqget(hash, id, type))) {
4299 +               up(&dqh_dqopt(hash)->dqonoff_sem);
4300                 return -ESRCH;
4301         }
4302         do_get_dqblk(dquot, di);
4303         dqput(dquot);
4304 -       up(&sb_dqopt(sb)->dqonoff_sem);
4305 +       up(&dqh_dqopt(hash)->dqonoff_sem);
4306         return 0;
4307  }
4308  
4309 @@ -1597,7 +1687,7 @@ static void do_set_dqblk(struct dquot *d
4310                         clear_bit(DQ_BLKS_B, &dquot->dq_flags);
4311                 }
4312                 else if (!(di->dqb_valid & QIF_BTIME))  /* Set grace only if user hasn't provided his own... */
4313 -                       dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace;
4314 +                       dm->dqb_btime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_bgrace;
4315         }
4316         if (check_ilim) {
4317                 if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) {
4318 @@ -1605,7 +1695,7 @@ static void do_set_dqblk(struct dquot *d
4319                         clear_bit(DQ_INODES_B, &dquot->dq_flags);
4320                 }
4321                 else if (!(di->dqb_valid & QIF_ITIME))  /* Set grace only if user hasn't provided his own... */
4322 -                       dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
4323 +                       dm->dqb_itime = get_seconds() + dqh_dqopt(dquot->dq_dqh)->info[dquot->dq_type].dqi_igrace;
4324         }
4325         if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit)
4326                 clear_bit(DQ_FAKE_B, &dquot->dq_flags);
4327 @@ -1615,53 +1705,53 @@ static void do_set_dqblk(struct dquot *d
4328         mark_dquot_dirty(dquot);
4329  }
4330  
4331 -int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di)
4332 +int vfs_set_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di)
4333  {
4334         struct dquot *dquot;
4335  
4336 -       down(&sb_dqopt(sb)->dqonoff_sem);
4337 -       if (!(dquot = dqget(sb, id, type))) {
4338 -               up(&sb_dqopt(sb)->dqonoff_sem);
4339 +       down(&dqh_dqopt(hash)->dqonoff_sem);
4340 +       if (!(dquot = dqget(hash, id, type))) {
4341 +               up(&dqh_dqopt(hash)->dqonoff_sem);
4342                 return -ESRCH;
4343         }
4344         do_set_dqblk(dquot, di);
4345         dqput(dquot);
4346 -       up(&sb_dqopt(sb)->dqonoff_sem);
4347 +       up(&dqh_dqopt(hash)->dqonoff_sem);
4348         return 0;
4349  }
4350  
4351  /* Generic routine for getting common part of quota file information */
4352 -int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
4353 +int vfs_get_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii)
4354  {
4355         struct mem_dqinfo *mi;
4356    
4357 -       down(&sb_dqopt(sb)->dqonoff_sem);
4358 -       if (!sb_has_quota_enabled(sb, type)) {
4359 -               up(&sb_dqopt(sb)->dqonoff_sem);
4360 +       down(&dqh_dqopt(hash)->dqonoff_sem);
4361 +       if (!dqh_has_quota_enabled(hash, type)) {
4362 +               up(&dqh_dqopt(hash)->dqonoff_sem);
4363                 return -ESRCH;
4364         }
4365 -       mi = sb_dqopt(sb)->info + type;
4366 +       mi = dqh_dqopt(hash)->info + type;
4367         spin_lock(&dq_data_lock);
4368         ii->dqi_bgrace = mi->dqi_bgrace;
4369         ii->dqi_igrace = mi->dqi_igrace;
4370         ii->dqi_flags = mi->dqi_flags & DQF_MASK;
4371         ii->dqi_valid = IIF_ALL;
4372         spin_unlock(&dq_data_lock);
4373 -       up(&sb_dqopt(sb)->dqonoff_sem);
4374 +       up(&dqh_dqopt(hash)->dqonoff_sem);
4375         return 0;
4376  }
4377  
4378  /* Generic routine for setting common part of quota file information */
4379 -int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
4380 +int vfs_set_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii)
4381  {
4382         struct mem_dqinfo *mi;
4383  
4384 -       down(&sb_dqopt(sb)->dqonoff_sem);
4385 -       if (!sb_has_quota_enabled(sb, type)) {
4386 -               up(&sb_dqopt(sb)->dqonoff_sem);
4387 +       down(&dqh_dqopt(hash)->dqonoff_sem);
4388 +       if (!dqh_has_quota_enabled(hash, type)) {
4389 +               up(&dqh_dqopt(hash)->dqonoff_sem);
4390                 return -ESRCH;
4391         }
4392 -       mi = sb_dqopt(sb)->info + type;
4393 +       mi = dqh_dqopt(hash)->info + type;
4394         spin_lock(&dq_data_lock);
4395         if (ii->dqi_valid & IIF_BGRACE)
4396                 mi->dqi_bgrace = ii->dqi_bgrace;
4397 @@ -1670,10 +1760,10 @@ int vfs_set_dqinfo(struct super_block *s
4398         if (ii->dqi_valid & IIF_FLAGS)
4399                 mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | (ii->dqi_flags & DQF_MASK);
4400         spin_unlock(&dq_data_lock);
4401 -       mark_info_dirty(sb, type);
4402 +       mark_info_dirty(hash, type);
4403         /* Force write to disk */
4404 -       sb->dq_op->write_info(sb, type);
4405 -       up(&sb_dqopt(sb)->dqonoff_sem);
4406 +       hash->dqh_qop->write_info(hash, type);
4407 +       up(&dqh_dqopt(hash)->dqonoff_sem);
4408         return 0;
4409  }
4410  
4411 diff -NurpP --minimal linux-2.6.16.20/fs/exec.c linux-2.6.16.20-vs2.1.1-rc22/fs/exec.c
4412 --- linux-2.6.16.20/fs/exec.c   2006-04-09 13:49:53 +0200
4413 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/exec.c      2006-05-29 16:54:23 +0200
4414 @@ -49,6 +49,8 @@
4415  #include <linux/rmap.h>
4416  #include <linux/acct.h>
4417  #include <linux/cn_proc.h>
4418 +#include <linux/vs_cvirt.h>
4419 +#include <linux/vs_memory.h>
4420  
4421  #include <asm/uaccess.h>
4422  #include <asm/mmu_context.h>
4423 @@ -436,7 +438,8 @@ int setup_arg_pages(struct linux_binprm 
4424                         kmem_cache_free(vm_area_cachep, mpnt);
4425                         return ret;
4426                 }
4427 -               mm->stack_vm = mm->total_vm = vma_pages(mpnt);
4428 +               vx_vmpages_sub(mm, mm->total_vm - vma_pages(mpnt));
4429 +               mm->stack_vm = mm->total_vm;
4430         }
4431  
4432         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
4433 @@ -1320,7 +1323,7 @@ static void format_corename(char *corena
4434                         /* UNIX time of coredump */
4435                         case 't': {
4436                                 struct timeval tv;
4437 -                               do_gettimeofday(&tv);
4438 +                               vx_gettimeofday(&tv);
4439                                 rc = snprintf(out_ptr, out_end - out_ptr,
4440                                               "%lu", tv.tv_sec);
4441                                 if (rc > out_end - out_ptr)
4442 @@ -1332,7 +1335,7 @@ static void format_corename(char *corena
4443                         case 'h':
4444                                 down_read(&uts_sem);
4445                                 rc = snprintf(out_ptr, out_end - out_ptr,
4446 -                                             "%s", system_utsname.nodename);
4447 +                                             "%s", vx_new_uts(nodename));
4448                                 up_read(&uts_sem);
4449                                 if (rc > out_end - out_ptr)
4450                                         goto out;
4451 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/balloc.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/balloc.c
4452 --- linux-2.6.16.20/fs/ext2/balloc.c    2006-04-09 13:49:53 +0200
4453 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/balloc.c       2006-04-26 19:07:00 +0200
4454 @@ -17,6 +17,8 @@
4455  #include <linux/sched.h>
4456  #include <linux/buffer_head.h>
4457  #include <linux/capability.h>
4458 +#include <linux/vs_dlimit.h>
4459 +#include <linux/vs_tag.h>
4460  
4461  /*
4462   * balloc.c contains the blocks allocation and deallocation routines
4463 @@ -109,6 +111,8 @@ static int reserve_blocks(struct super_b
4464         free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
4465         root_blocks = le32_to_cpu(es->s_r_blocks_count);
4466  
4467 +       DLIMIT_ADJUST_BLOCK(sb, dx_current_tag(), &free_blocks, &root_blocks);
4468 +
4469         if (free_blocks < count)
4470                 count = free_blocks;
4471  
4472 @@ -259,6 +263,7 @@ do_more:
4473         }
4474  error_return:
4475         brelse(bitmap_bh);
4476 +       DLIMIT_FREE_BLOCK(inode, freed);
4477         release_blocks(sb, freed);
4478         DQUOT_FREE_BLOCK(inode, freed);
4479  }
4480 @@ -362,6 +367,10 @@ int ext2_new_block(struct inode *inode, 
4481                 *err = -ENOSPC;
4482                 goto out_dquot;
4483         }
4484 +       if (DLIMIT_ALLOC_BLOCK(inode, es_alloc)) {
4485 +               *err = -ENOSPC;
4486 +               goto out_dlimit;
4487 +       }
4488  
4489         ext2_debug ("goal=%lu.\n", goal);
4490  
4491 @@ -509,6 +518,8 @@ got_block:
4492         *err = 0;
4493  out_release:
4494         group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
4495 +       DLIMIT_FREE_BLOCK(inode, es_alloc);
4496 +out_dlimit:
4497         release_blocks(sb, es_alloc);
4498  out_dquot:
4499         DQUOT_FREE_BLOCK(inode, dq_alloc);
4500 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/ext2.h linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ext2.h
4501 --- linux-2.6.16.20/fs/ext2/ext2.h      2006-02-18 14:40:21 +0100
4502 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ext2.h 2006-04-26 19:07:00 +0200
4503 @@ -162,6 +162,7 @@ extern struct file_operations ext2_xip_f
4504  extern struct address_space_operations ext2_aops;
4505  extern struct address_space_operations ext2_aops_xip;
4506  extern struct address_space_operations ext2_nobh_aops;
4507 +extern int ext2_sync_flags(struct inode *inode);
4508  
4509  /* namei.c */
4510  extern struct inode_operations ext2_dir_inode_operations;
4511 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/file.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/file.c
4512 --- linux-2.6.16.20/fs/ext2/file.c      2005-08-29 22:25:30 +0200
4513 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/file.c 2006-04-26 19:07:00 +0200
4514 @@ -53,6 +53,7 @@ struct file_operations ext2_file_operati
4515         .readv          = generic_file_readv,
4516         .writev         = generic_file_writev,
4517         .sendfile       = generic_file_sendfile,
4518 +       .sendpage       = generic_file_sendpage,
4519  };
4520  
4521  #ifdef CONFIG_EXT2_FS_XIP
4522 @@ -79,4 +80,5 @@ struct inode_operations ext2_file_inode_
4523  #endif
4524         .setattr        = ext2_setattr,
4525         .permission     = ext2_permission,
4526 +       .sync_flags     = ext2_sync_flags,
4527  };
4528 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/ialloc.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ialloc.c
4529 --- linux-2.6.16.20/fs/ext2/ialloc.c    2006-02-18 14:40:21 +0100
4530 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ialloc.c       2006-04-26 19:07:00 +0200
4531 @@ -18,6 +18,8 @@
4532  #include <linux/backing-dev.h>
4533  #include <linux/buffer_head.h>
4534  #include <linux/random.h>
4535 +#include <linux/vs_dlimit.h>
4536 +#include <linux/vs_tag.h>
4537  #include "ext2.h"
4538  #include "xattr.h"
4539  #include "acl.h"
4540 @@ -126,6 +128,7 @@ void ext2_free_inode (struct inode * ino
4541                 ext2_xattr_delete_inode(inode);
4542                 DQUOT_FREE_INODE(inode);
4543                 DQUOT_DROP(inode);
4544 +               DLIMIT_FREE_INODE(inode);
4545         }
4546  
4547         es = EXT2_SB(sb)->s_es;
4548 @@ -465,6 +468,11 @@ struct inode *ext2_new_inode(struct inod
4549         if (!inode)
4550                 return ERR_PTR(-ENOMEM);
4551  
4552 +       inode->i_tag = dx_current_fstag(sb);
4553 +       if (DLIMIT_ALLOC_INODE(inode)) {
4554 +               err = -ENOSPC;
4555 +               goto fail_dlim;
4556 +       }
4557         ei = EXT2_I(inode);
4558         sbi = EXT2_SB(sb);
4559         es = sbi->s_es;
4560 @@ -579,7 +587,8 @@ got:
4561         inode->i_blocks = 0;
4562         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
4563         memset(ei->i_data, 0, sizeof(ei->i_data));
4564 -       ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
4565 +       ei->i_flags = EXT2_I(dir)->i_flags &
4566 +               ~(EXT2_BTREE_FL|EXT2_IUNLINK_FL|EXT2_BARRIER_FL);
4567         if (S_ISLNK(mode))
4568                 ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
4569         /* dirsync is only applied to directories */
4570 @@ -627,12 +636,15 @@ fail_free_drop:
4571  
4572  fail_drop:
4573         DQUOT_DROP(inode);
4574 +       DLIMIT_FREE_INODE(inode);
4575         inode->i_flags |= S_NOQUOTA;
4576         inode->i_nlink = 0;
4577         iput(inode);
4578         return ERR_PTR(err);
4579  
4580  fail:
4581 +       DLIMIT_FREE_INODE(inode);
4582 +fail_dlim:
4583         make_bad_inode(inode);
4584         iput(inode);
4585         return ERR_PTR(err);
4586 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/inode.c
4587 --- linux-2.6.16.20/fs/ext2/inode.c     2006-02-18 14:40:21 +0100
4588 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/inode.c        2006-04-26 19:07:00 +0200
4589 @@ -31,6 +31,7 @@
4590  #include <linux/writeback.h>
4591  #include <linux/buffer_head.h>
4592  #include <linux/mpage.h>
4593 +#include <linux/vserver/tag.h>
4594  #include "ext2.h"
4595  #include "acl.h"
4596  #include "xip.h"
4597 @@ -1054,25 +1055,70 @@ void ext2_set_inode_flags(struct inode *
4598  {
4599         unsigned int flags = EXT2_I(inode)->i_flags;
4600  
4601 -       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
4602 +       inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER |
4603 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
4604 +
4605 +       if (flags & EXT2_IMMUTABLE_FL)
4606 +               inode->i_flags |= S_IMMUTABLE;
4607 +       if (flags & EXT2_IUNLINK_FL)
4608 +               inode->i_flags |= S_IUNLINK;
4609 +       if (flags & EXT2_BARRIER_FL)
4610 +               inode->i_flags |= S_BARRIER;
4611 +
4612         if (flags & EXT2_SYNC_FL)
4613                 inode->i_flags |= S_SYNC;
4614         if (flags & EXT2_APPEND_FL)
4615                 inode->i_flags |= S_APPEND;
4616 -       if (flags & EXT2_IMMUTABLE_FL)
4617 -               inode->i_flags |= S_IMMUTABLE;
4618         if (flags & EXT2_NOATIME_FL)
4619                 inode->i_flags |= S_NOATIME;
4620         if (flags & EXT2_DIRSYNC_FL)
4621                 inode->i_flags |= S_DIRSYNC;
4622  }
4623  
4624 +int ext2_sync_flags(struct inode *inode)
4625 +{
4626 +       unsigned int oldflags, newflags;
4627 +
4628 +       oldflags = EXT2_I(inode)->i_flags;
4629 +       newflags = oldflags & ~(EXT2_APPEND_FL |
4630 +               EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL |
4631 +               EXT2_BARRIER_FL | EXT2_NOATIME_FL |
4632 +               EXT2_SYNC_FL | EXT2_DIRSYNC_FL);
4633 +
4634 +       if (IS_APPEND(inode))
4635 +               newflags |= EXT2_APPEND_FL;
4636 +       if (IS_IMMUTABLE(inode))
4637 +               newflags |= EXT2_IMMUTABLE_FL;
4638 +       if (IS_IUNLINK(inode))
4639 +               newflags |= EXT2_IUNLINK_FL;
4640 +       if (IS_BARRIER(inode))
4641 +               newflags |= EXT2_BARRIER_FL;
4642 +
4643 +       /* we do not want to copy superblock flags */
4644 +       if (inode->i_flags & S_NOATIME)
4645 +               newflags |= EXT2_NOATIME_FL;
4646 +       if (inode->i_flags & S_SYNC)
4647 +               newflags |= EXT2_SYNC_FL;
4648 +       if (inode->i_flags & S_DIRSYNC)
4649 +               newflags |= EXT2_DIRSYNC_FL;
4650 +
4651 +       if (oldflags ^ newflags) {
4652 +               EXT2_I(inode)->i_flags = newflags;
4653 +               inode->i_ctime = CURRENT_TIME;
4654 +               mark_inode_dirty(inode);
4655 +       }
4656 +
4657 +       return 0;
4658 +}
4659 +
4660  void ext2_read_inode (struct inode * inode)
4661  {
4662         struct ext2_inode_info *ei = EXT2_I(inode);
4663         ino_t ino = inode->i_ino;
4664         struct buffer_head * bh;
4665         struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
4666 +       uid_t uid;
4667 +       gid_t gid;
4668         int n;
4669  
4670  #ifdef CONFIG_EXT2_FS_POSIX_ACL
4671 @@ -1083,12 +1129,17 @@ void ext2_read_inode (struct inode * ino
4672                 goto bad_inode;
4673  
4674         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
4675 -       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
4676 -       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
4677 +       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
4678 +       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
4679         if (!(test_opt (inode->i_sb, NO_UID32))) {
4680 -               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
4681 -               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
4682 +               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
4683 +               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
4684         }
4685 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
4686 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
4687 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
4688 +               le16_to_cpu(raw_inode->i_raw_tag));
4689 +
4690         inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
4691         inode->i_size = le32_to_cpu(raw_inode->i_size);
4692         inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
4693 @@ -1186,8 +1237,8 @@ static int ext2_update_inode(struct inod
4694         struct ext2_inode_info *ei = EXT2_I(inode);
4695         struct super_block *sb = inode->i_sb;
4696         ino_t ino = inode->i_ino;
4697 -       uid_t uid = inode->i_uid;
4698 -       gid_t gid = inode->i_gid;
4699 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
4700 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
4701         struct buffer_head * bh;
4702         struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
4703         int n;
4704 @@ -1222,6 +1273,9 @@ static int ext2_update_inode(struct inod
4705                 raw_inode->i_uid_high = 0;
4706                 raw_inode->i_gid_high = 0;
4707         }
4708 +#ifdef CONFIG_TAGGING_INTERN
4709 +       raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
4710 +#endif
4711         raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
4712         raw_inode->i_size = cpu_to_le32(inode->i_size);
4713         raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
4714 @@ -1308,7 +1362,8 @@ int ext2_setattr(struct dentry *dentry, 
4715         if (error)
4716                 return error;
4717         if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
4718 -           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
4719 +           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
4720 +           (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
4721                 error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0;
4722                 if (error)
4723                         return error;
4724 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ioctl.c
4725 --- linux-2.6.16.20/fs/ext2/ioctl.c     2006-04-09 13:49:53 +0200
4726 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/ioctl.c        2006-04-26 19:07:00 +0200
4727 @@ -11,6 +11,7 @@
4728  #include <linux/capability.h>
4729  #include <linux/time.h>
4730  #include <linux/sched.h>
4731 +#include <linux/mount.h>
4732  #include <asm/current.h>
4733  #include <asm/uaccess.h>
4734  
4735 @@ -30,7 +31,8 @@ int ext2_ioctl (struct inode * inode, st
4736         case EXT2_IOC_SETFLAGS: {
4737                 unsigned int oldflags;
4738  
4739 -               if (IS_RDONLY(inode))
4740 +               if (IS_RDONLY(inode) ||
4741 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
4742                         return -EROFS;
4743  
4744                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
4745 @@ -50,7 +52,9 @@ int ext2_ioctl (struct inode * inode, st
4746                  *
4747                  * This test looks nicer. Thanks to Pauline Middelink
4748                  */
4749 -               if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
4750 +               if ((oldflags & EXT2_IMMUTABLE_FL) ||
4751 +                       ((flags ^ oldflags) & (EXT2_APPEND_FL |
4752 +                       EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL))) {
4753                         if (!capable(CAP_LINUX_IMMUTABLE))
4754                                 return -EPERM;
4755                 }
4756 @@ -69,7 +73,8 @@ int ext2_ioctl (struct inode * inode, st
4757         case EXT2_IOC_SETVERSION:
4758                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
4759                         return -EPERM;
4760 -               if (IS_RDONLY(inode))
4761 +               if (IS_RDONLY(inode) ||
4762 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
4763                         return -EROFS;
4764                 if (get_user(inode->i_generation, (int __user *) arg))
4765                         return -EFAULT; 
4766 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/namei.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/namei.c
4767 --- linux-2.6.16.20/fs/ext2/namei.c     2006-02-18 14:40:21 +0100
4768 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/namei.c        2006-04-26 19:07:00 +0200
4769 @@ -31,6 +31,7 @@
4770   */
4771  
4772  #include <linux/pagemap.h>
4773 +#include <linux/vserver/tag.h>
4774  #include "ext2.h"
4775  #include "xattr.h"
4776  #include "acl.h"
4777 @@ -82,6 +83,7 @@ static struct dentry *ext2_lookup(struct
4778                 inode = iget(dir->i_sb, ino);
4779                 if (!inode)
4780                         return ERR_PTR(-EACCES);
4781 +               dx_propagate_tag(nd, inode);
4782         }
4783         return d_splice_alias(inode, dentry);
4784  }
4785 @@ -407,6 +409,7 @@ struct inode_operations ext2_dir_inode_o
4786  #endif
4787         .setattr        = ext2_setattr,
4788         .permission     = ext2_permission,
4789 +       .sync_flags     = ext2_sync_flags,
4790  };
4791  
4792  struct inode_operations ext2_special_inode_operations = {
4793 @@ -418,4 +421,5 @@ struct inode_operations ext2_special_ino
4794  #endif
4795         .setattr        = ext2_setattr,
4796         .permission     = ext2_permission,
4797 +       .sync_flags     = ext2_sync_flags,
4798  };
4799 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/super.c
4800 --- linux-2.6.16.20/fs/ext2/super.c     2006-02-18 14:40:21 +0100
4801 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/super.c        2006-04-26 19:07:00 +0200
4802 @@ -230,8 +230,8 @@ static int ext2_show_options(struct seq_
4803  }
4804  
4805  #ifdef CONFIG_QUOTA
4806 -static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
4807 -static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
4808 +static ssize_t ext2_quota_read(struct dqhash *hash, int type, char *data, size_t len, loff_t off);
4809 +static ssize_t ext2_quota_write(struct dqhash *hash, int type, const char *data, size_t len, loff_t off);
4810  #endif
4811  
4812  static struct super_operations ext2_sops = {
4813 @@ -289,7 +289,7 @@ enum {
4814         Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
4815         Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
4816         Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota,
4817 -       Opt_usrquota, Opt_grpquota
4818 +       Opt_usrquota, Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid
4819  };
4820  
4821  static match_table_t tokens = {
4822 @@ -317,6 +317,10 @@ static match_table_t tokens = {
4823         {Opt_acl, "acl"},
4824         {Opt_noacl, "noacl"},
4825         {Opt_xip, "xip"},
4826 +       {Opt_tag, "tag"},
4827 +       {Opt_notag, "notag"},
4828 +       {Opt_tagid, "tagid=%u"},
4829 +       {Opt_tag, "tagxid"},
4830         {Opt_grpquota, "grpquota"},
4831         {Opt_ignore, "noquota"},
4832         {Opt_quota, "quota"},
4833 @@ -380,6 +384,20 @@ static int parse_options (char * options
4834                 case Opt_nouid32:
4835                         set_opt (sbi->s_mount_opt, NO_UID32);
4836                         break;
4837 +#ifndef CONFIG_TAGGING_NONE
4838 +               case Opt_tag:
4839 +                       set_opt (sbi->s_mount_opt, TAGGED);
4840 +                       break;
4841 +               case Opt_notag:
4842 +                       clear_opt (sbi->s_mount_opt, TAGGED);
4843 +                       break;
4844 +#endif
4845 +#ifdef CONFIG_PROPAGATE
4846 +               case Opt_tagid:
4847 +                       /* use args[0] */
4848 +                       set_opt (sbi->s_mount_opt, TAGGED);
4849 +                       break;
4850 +#endif
4851                 case Opt_nocheck:
4852                         clear_opt (sbi->s_mount_opt, CHECK);
4853                         break;
4854 @@ -681,6 +699,8 @@ static int ext2_fill_super(struct super_
4855         if (!parse_options ((char *) data, sbi))
4856                 goto failed_mount;
4857  
4858 +       if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
4859 +               sb->s_flags |= MS_TAGGED;
4860         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
4861                 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
4862                  MS_POSIXACL : 0);
4863 @@ -990,6 +1010,13 @@ static int ext2_remount (struct super_bl
4864                 goto restore_opts;
4865         }
4866  
4867 +       if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
4868 +               !(sb->s_flags & MS_TAGGED)) {
4869 +               printk("EXT2-fs: %s: tagging not permitted on remount.\n",
4870 +                      sb->s_id);
4871 +               return -EINVAL;
4872 +       }
4873 +
4874         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
4875                 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
4876  
4877 @@ -1101,10 +1128,11 @@ static struct super_block *ext2_get_sb(s
4878   * acquiring the locks... As quota files are never truncated and quota code
4879   * itself serializes the operations (and noone else should touch the files)
4880   * we don't have to be afraid of races */
4881 -static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
4882 +static ssize_t ext2_quota_read(struct dqhash *hash, int type, char *data,
4883                                size_t len, loff_t off)
4884  {
4885 -       struct inode *inode = sb_dqopt(sb)->files[type];
4886 +       struct inode *inode = dqh_dqopt(hash)->files[type];
4887 +       struct super_block *sb = hash->dqh_sb;
4888         sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
4889         int err = 0;
4890         int offset = off & (sb->s_blocksize - 1);
4891 @@ -1145,10 +1173,11 @@ static ssize_t ext2_quota_read(struct su
4892  }
4893  
4894  /* Write to quotafile */
4895 -static ssize_t ext2_quota_write(struct super_block *sb, int type,
4896 +static ssize_t ext2_quota_write(struct dqhash *hash, int type,
4897                                 const char *data, size_t len, loff_t off)
4898  {
4899 -       struct inode *inode = sb_dqopt(sb)->files[type];
4900 +       struct inode *inode = dqh_dqopt(hash)->files[type];
4901 +       struct super_block *sb = hash->dqh_sb;
4902         sector_t blk = off >> EXT2_BLOCK_SIZE_BITS(sb);
4903         int err = 0;
4904         int offset = off & (sb->s_blocksize - 1);
4905 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/symlink.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/symlink.c
4906 --- linux-2.6.16.20/fs/ext2/symlink.c   2005-08-29 22:25:30 +0200
4907 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/symlink.c      2006-04-26 19:07:00 +0200
4908 @@ -38,6 +38,7 @@ struct inode_operations ext2_symlink_ino
4909         .listxattr      = ext2_listxattr,
4910         .removexattr    = generic_removexattr,
4911  #endif
4912 +       .sync_flags     = ext2_sync_flags,
4913  };
4914   
4915  struct inode_operations ext2_fast_symlink_inode_operations = {
4916 @@ -49,4 +50,5 @@ struct inode_operations ext2_fast_symlin
4917         .listxattr      = ext2_listxattr,
4918         .removexattr    = generic_removexattr,
4919  #endif
4920 +       .sync_flags     = ext2_sync_flags,
4921  };
4922 diff -NurpP --minimal linux-2.6.16.20/fs/ext2/xattr.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/xattr.c
4923 --- linux-2.6.16.20/fs/ext2/xattr.c     2006-02-18 14:40:21 +0100
4924 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext2/xattr.c        2006-04-26 19:07:00 +0200
4925 @@ -60,6 +60,7 @@
4926  #include <linux/mbcache.h>
4927  #include <linux/quotaops.h>
4928  #include <linux/rwsem.h>
4929 +#include <linux/vs_dlimit.h>
4930  #include "ext2.h"
4931  #include "xattr.h"
4932  #include "acl.h"
4933 @@ -645,8 +646,12 @@ ext2_xattr_set2(struct inode *inode, str
4934                                    the inode.  */
4935                                 ea_bdebug(new_bh, "reusing block");
4936  
4937 +                               error = -ENOSPC;
4938 +                               if (DLIMIT_ALLOC_BLOCK(inode, 1))
4939 +                                       goto cleanup;
4940                                 error = -EDQUOT;
4941                                 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
4942 +                                       DLIMIT_FREE_BLOCK(inode, 1);
4943                                         unlock_buffer(new_bh);
4944                                         goto cleanup;
4945                                 }
4946 @@ -740,6 +745,7 @@ ext2_xattr_set2(struct inode *inode, str
4947                                 le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
4948                         if (ce)
4949                                 mb_cache_entry_release(ce);
4950 +                       DLIMIT_FREE_BLOCK(inode, 1);
4951                         DQUOT_FREE_BLOCK(inode, 1);
4952                         mark_buffer_dirty(old_bh);
4953                         ea_bdebug(old_bh, "refcount now=%d",
4954 @@ -804,6 +810,7 @@ ext2_xattr_delete_inode(struct inode *in
4955                 mark_buffer_dirty(bh);
4956                 if (IS_SYNC(inode))
4957                         sync_dirty_buffer(bh);
4958 +               DLIMIT_FREE_BLOCK(inode, 1);
4959                 DQUOT_FREE_BLOCK(inode, 1);
4960         }
4961         EXT2_I(inode)->i_file_acl = 0;
4962 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/balloc.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/balloc.c
4963 --- linux-2.6.16.20/fs/ext3/balloc.c    2006-02-18 14:40:21 +0100
4964 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/balloc.c       2006-04-26 19:07:00 +0200
4965 @@ -20,6 +20,8 @@
4966  #include <linux/ext3_jbd.h>
4967  #include <linux/quotaops.h>
4968  #include <linux/buffer_head.h>
4969 +#include <linux/vs_dlimit.h>
4970 +#include <linux/vs_tag.h>
4971  
4972  /*
4973   * balloc.c contains the blocks allocation and deallocation routines
4974 @@ -504,8 +506,10 @@ void ext3_free_blocks(handle_t *handle, 
4975                 return;
4976         }
4977         ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
4978 -       if (dquot_freed_blocks)
4979 +       if (dquot_freed_blocks) {
4980 +               DLIMIT_FREE_BLOCK(inode, dquot_freed_blocks);
4981                 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
4982 +       }
4983         return;
4984  }
4985  
4986 @@ -1116,18 +1120,32 @@ out:
4987         return ret;
4988  }
4989  
4990 -static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
4991 +static int ext3_has_free_blocks(struct super_block *sb)
4992  {
4993 -       int free_blocks, root_blocks;
4994 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
4995 +       int free_blocks, root_blocks, cond;
4996  
4997         free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
4998         root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
4999 -       if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
5000 +
5001 +       vxdprintk(VXD_CBIT(dlim, 3),
5002 +               "ext3_has_free_blocks(%p): free=%u, root=%u",
5003 +               sb, free_blocks, root_blocks);
5004 +
5005 +       DLIMIT_ADJUST_BLOCK(sb, dx_current_tag(), &free_blocks, &root_blocks);
5006 +
5007 +       cond = (free_blocks < root_blocks + 1 &&
5008 +               !capable(CAP_SYS_RESOURCE) &&
5009                 sbi->s_resuid != current->fsuid &&
5010 -               (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
5011 -               return 0;
5012 -       }
5013 -       return 1;
5014 +               (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid)));
5015 +
5016 +       vxdprintk(VXD_CBIT(dlim, 3),
5017 +               "ext3_has_free_blocks(%p): %u<%u+1, %c, %u!=%u r=%d",
5018 +               sb, free_blocks, root_blocks,
5019 +               !capable(CAP_SYS_RESOURCE)?'1':'0',
5020 +               sbi->s_resuid, current->fsuid, cond?0:1);
5021 +
5022 +       return (cond ? 0 : 1);
5023  }
5024  
5025  /*
5026 @@ -1138,7 +1156,7 @@ static int ext3_has_free_blocks(struct e
5027   */
5028  int ext3_should_retry_alloc(struct super_block *sb, int *retries)
5029  {
5030 -       if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
5031 +       if (!ext3_has_free_blocks(sb) || (*retries)++ > 3)
5032                 return 0;
5033  
5034         jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
5035 @@ -1193,6 +1211,8 @@ int ext3_new_block(handle_t *handle, str
5036                 *errp = -EDQUOT;
5037                 return 0;
5038         }
5039 +       if (DLIMIT_ALLOC_BLOCK(inode, 1))
5040 +           goto out_dlimit;
5041  
5042         sbi = EXT3_SB(sb);
5043         es = EXT3_SB(sb)->s_es;
5044 @@ -1209,7 +1229,7 @@ int ext3_new_block(handle_t *handle, str
5045         if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
5046                 my_rsv = &block_i->rsv_window_node;
5047  
5048 -       if (!ext3_has_free_blocks(sbi)) {
5049 +       if (!ext3_has_free_blocks(sb)) {
5050                 *errp = -ENOSPC;
5051                 goto out;
5052         }
5053 @@ -1393,6 +1413,9 @@ allocated:
5054  io_error:
5055         *errp = -EIO;
5056  out:
5057 +       if (!performed_allocation)
5058 +               DLIMIT_FREE_BLOCK(inode, 1);
5059 +out_dlimit:
5060         if (fatal) {
5061                 *errp = fatal;
5062                 ext3_std_error(sb, fatal);
5063 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/file.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/file.c
5064 --- linux-2.6.16.20/fs/ext3/file.c      2005-08-29 22:25:30 +0200
5065 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/file.c 2006-04-26 19:07:00 +0200
5066 @@ -119,6 +119,7 @@ struct file_operations ext3_file_operati
5067         .release        = ext3_release_file,
5068         .fsync          = ext3_sync_file,
5069         .sendfile       = generic_file_sendfile,
5070 +       .sendpage       = generic_file_sendpage,
5071  };
5072  
5073  struct inode_operations ext3_file_inode_operations = {
5074 @@ -131,5 +132,6 @@ struct inode_operations ext3_file_inode_
5075         .removexattr    = generic_removexattr,
5076  #endif
5077         .permission     = ext3_permission,
5078 +       .sync_flags     = ext3_sync_flags,
5079  };
5080  
5081 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/ialloc.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/ialloc.c
5082 --- linux-2.6.16.20/fs/ext3/ialloc.c    2006-04-09 13:49:53 +0200
5083 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/ialloc.c       2006-04-26 19:07:00 +0200
5084 @@ -23,6 +23,8 @@
5085  #include <linux/buffer_head.h>
5086  #include <linux/random.h>
5087  #include <linux/bitops.h>
5088 +#include <linux/vs_dlimit.h>
5089 +#include <linux/vs_tag.h>
5090  
5091  #include <asm/byteorder.h>
5092  
5093 @@ -127,6 +129,7 @@ void ext3_free_inode (handle_t *handle, 
5094         ext3_xattr_delete_inode(handle, inode);
5095         DQUOT_FREE_INODE(inode);
5096         DQUOT_DROP(inode);
5097 +       DLIMIT_FREE_INODE(inode);
5098  
5099         is_directory = S_ISDIR(inode->i_mode);
5100  
5101 @@ -443,6 +446,12 @@ struct inode *ext3_new_inode(handle_t *h
5102         inode = new_inode(sb);
5103         if (!inode)
5104                 return ERR_PTR(-ENOMEM);
5105 +
5106 +       inode->i_tag = dx_current_fstag(sb);
5107 +       if (DLIMIT_ALLOC_INODE(inode)) {
5108 +               err = -ENOSPC;
5109 +               goto out_dlimit;
5110 +       }
5111         ei = EXT3_I(inode);
5112  
5113         sbi = EXT3_SB(sb);
5114 @@ -565,7 +574,8 @@ got:
5115         ei->i_dir_start_lookup = 0;
5116         ei->i_disksize = 0;
5117  
5118 -       ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL;
5119 +       ei->i_flags = EXT3_I(dir)->i_flags &
5120 +               ~(EXT3_INDEX_FL|EXT3_IUNLINK_FL|EXT3_BARRIER_FL);
5121         if (S_ISLNK(mode))
5122                 ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
5123         /* dirsync only applies to directories */
5124 @@ -620,6 +630,8 @@ got:
5125  fail:
5126         ext3_std_error(sb, err);
5127  out:
5128 +       DLIMIT_FREE_INODE(inode);
5129 +out_dlimit:
5130         iput(inode);
5131         ret = ERR_PTR(err);
5132  really_out:
5133 @@ -631,6 +643,7 @@ fail_free_drop:
5134  
5135  fail_drop:
5136         DQUOT_DROP(inode);
5137 +       DLIMIT_FREE_INODE(inode);
5138         inode->i_flags |= S_NOQUOTA;
5139         inode->i_nlink = 0;
5140         iput(inode);
5141 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/inode.c
5142 --- linux-2.6.16.20/fs/ext3/inode.c     2006-04-09 13:49:53 +0200
5143 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/inode.c        2006-04-26 19:07:00 +0200
5144 @@ -36,6 +36,7 @@
5145  #include <linux/writeback.h>
5146  #include <linux/mpage.h>
5147  #include <linux/uio.h>
5148 +#include <linux/vserver/tag.h>
5149  #include "xattr.h"
5150  #include "acl.h"
5151  
5152 @@ -2422,19 +2423,77 @@ void ext3_set_inode_flags(struct inode *
5153  {
5154         unsigned int flags = EXT3_I(inode)->i_flags;
5155  
5156 -       inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
5157 +       inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER |
5158 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
5159 +
5160 +       if (flags & EXT3_IMMUTABLE_FL)
5161 +               inode->i_flags |= S_IMMUTABLE;
5162 +       if (flags & EXT3_IUNLINK_FL)
5163 +               inode->i_flags |= S_IUNLINK;
5164 +       if (flags & EXT3_BARRIER_FL)
5165 +               inode->i_flags |= S_BARRIER;
5166 +
5167         if (flags & EXT3_SYNC_FL)
5168                 inode->i_flags |= S_SYNC;
5169         if (flags & EXT3_APPEND_FL)
5170                 inode->i_flags |= S_APPEND;
5171 -       if (flags & EXT3_IMMUTABLE_FL)
5172 -               inode->i_flags |= S_IMMUTABLE;
5173         if (flags & EXT3_NOATIME_FL)
5174                 inode->i_flags |= S_NOATIME;
5175         if (flags & EXT3_DIRSYNC_FL)
5176                 inode->i_flags |= S_DIRSYNC;
5177  }
5178  
5179 +int ext3_sync_flags(struct inode *inode)
5180 +{
5181 +       unsigned int oldflags, newflags;
5182 +       int err = 0;
5183 +
5184 +       oldflags = EXT3_I(inode)->i_flags;
5185 +       newflags = oldflags & ~(EXT3_APPEND_FL |
5186 +               EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL |
5187 +               EXT3_BARRIER_FL | EXT3_NOATIME_FL |
5188 +               EXT3_SYNC_FL | EXT3_DIRSYNC_FL);
5189 +
5190 +       if (IS_APPEND(inode))
5191 +               newflags |= EXT3_APPEND_FL;
5192 +       if (IS_IMMUTABLE(inode))
5193 +               newflags |= EXT3_IMMUTABLE_FL;
5194 +       if (IS_IUNLINK(inode))
5195 +               newflags |= EXT3_IUNLINK_FL;
5196 +       if (IS_BARRIER(inode))
5197 +               newflags |= EXT3_BARRIER_FL;
5198 +
5199 +       /* we do not want to copy superblock flags */
5200 +       if (inode->i_flags & S_NOATIME)
5201 +               newflags |= EXT3_NOATIME_FL;
5202 +       if (inode->i_flags & S_SYNC)
5203 +               newflags |= EXT3_SYNC_FL;
5204 +       if (inode->i_flags & S_DIRSYNC)
5205 +               newflags |= EXT3_DIRSYNC_FL;
5206 +
5207 +       if (oldflags ^ newflags) {
5208 +               handle_t *handle;
5209 +               struct ext3_iloc iloc;
5210 +
5211 +               handle = ext3_journal_start(inode, 1);
5212 +               if (IS_ERR(handle))
5213 +                       return PTR_ERR(handle);
5214 +               if (IS_SYNC(inode))
5215 +                       handle->h_sync = 1;
5216 +               err = ext3_reserve_inode_write(handle, inode, &iloc);
5217 +               if (err)
5218 +                       goto flags_err;
5219 +
5220 +               EXT3_I(inode)->i_flags = newflags;
5221 +               inode->i_ctime = CURRENT_TIME;
5222 +
5223 +               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
5224 +       flags_err:
5225 +               ext3_journal_stop(handle);
5226 +       }
5227 +       return err;
5228 +}
5229 +
5230  void ext3_read_inode(struct inode * inode)
5231  {
5232         struct ext3_iloc iloc;
5233 @@ -2442,6 +2501,8 @@ void ext3_read_inode(struct inode * inod
5234         struct ext3_inode_info *ei = EXT3_I(inode);
5235         struct buffer_head *bh;
5236         int block;
5237 +       uid_t uid;
5238 +       gid_t gid;
5239  
5240  #ifdef CONFIG_EXT3_FS_POSIX_ACL
5241         ei->i_acl = EXT3_ACL_NOT_CACHED;
5242 @@ -2454,12 +2515,17 @@ void ext3_read_inode(struct inode * inod
5243         bh = iloc.bh;
5244         raw_inode = ext3_raw_inode(&iloc);
5245         inode->i_mode = le16_to_cpu(raw_inode->i_mode);
5246 -       inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
5247 -       inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
5248 +       uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
5249 +       gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
5250         if(!(test_opt (inode->i_sb, NO_UID32))) {
5251 -               inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
5252 -               inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
5253 +               uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
5254 +               gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
5255         }
5256 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
5257 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
5258 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid,
5259 +               le16_to_cpu(raw_inode->i_raw_tag));
5260 +
5261         inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
5262         inode->i_size = le32_to_cpu(raw_inode->i_size);
5263         inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
5264 @@ -2586,6 +2652,8 @@ static int ext3_do_update_inode(handle_t
5265         struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
5266         struct ext3_inode_info *ei = EXT3_I(inode);
5267         struct buffer_head *bh = iloc->bh;
5268 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
5269 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
5270         int err = 0, rc, block;
5271  
5272         /* For fields not not tracking in the in-memory inode,
5273 @@ -2595,29 +2663,32 @@ static int ext3_do_update_inode(handle_t
5274  
5275         raw_inode->i_mode = cpu_to_le16(inode->i_mode);
5276         if(!(test_opt(inode->i_sb, NO_UID32))) {
5277 -               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
5278 -               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
5279 +               raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
5280 +               raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
5281  /*
5282   * Fix up interoperability with old kernels. Otherwise, old inodes get
5283   * re-used with the upper 16 bits of the uid/gid intact
5284   */
5285                 if(!ei->i_dtime) {
5286                         raw_inode->i_uid_high =
5287 -                               cpu_to_le16(high_16_bits(inode->i_uid));
5288 +                               cpu_to_le16(high_16_bits(uid));
5289                         raw_inode->i_gid_high =
5290 -                               cpu_to_le16(high_16_bits(inode->i_gid));
5291 +                               cpu_to_le16(high_16_bits(gid));
5292                 } else {
5293                         raw_inode->i_uid_high = 0;
5294                         raw_inode->i_gid_high = 0;
5295                 }
5296         } else {
5297                 raw_inode->i_uid_low =
5298 -                       cpu_to_le16(fs_high2lowuid(inode->i_uid));
5299 +                       cpu_to_le16(fs_high2lowuid(uid));
5300                 raw_inode->i_gid_low =
5301 -                       cpu_to_le16(fs_high2lowgid(inode->i_gid));
5302 +                       cpu_to_le16(fs_high2lowgid(gid));
5303                 raw_inode->i_uid_high = 0;
5304                 raw_inode->i_gid_high = 0;
5305         }
5306 +#ifdef CONFIG_TAGGING_INTERN
5307 +       raw_inode->i_raw_tag = cpu_to_le16(inode->i_tag);
5308 +#endif
5309         raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
5310         raw_inode->i_size = cpu_to_le32(ei->i_disksize);
5311         raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
5312 @@ -2770,7 +2841,8 @@ int ext3_setattr(struct dentry *dentry, 
5313                 return error;
5314  
5315         if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
5316 -               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
5317 +               (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
5318 +               (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
5319                 handle_t *handle;
5320  
5321                 /* (user+group)*(old+new) structure, inode write (sb,
5322 @@ -2792,6 +2864,8 @@ int ext3_setattr(struct dentry *dentry, 
5323                         inode->i_uid = attr->ia_uid;
5324                 if (attr->ia_valid & ATTR_GID)
5325                         inode->i_gid = attr->ia_gid;
5326 +               if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
5327 +                       inode->i_tag = attr->ia_tag;
5328                 error = ext3_mark_inode_dirty(handle, inode);
5329                 ext3_journal_stop(handle);
5330         }
5331 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/ioctl.c
5332 --- linux-2.6.16.20/fs/ext3/ioctl.c     2006-02-18 14:40:22 +0100
5333 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/ioctl.c        2006-04-29 02:58:07 +0200
5334 @@ -8,11 +8,13 @@
5335   */
5336  
5337  #include <linux/fs.h>
5338 +#include <linux/mount.h>
5339  #include <linux/jbd.h>
5340  #include <linux/capability.h>
5341  #include <linux/ext3_fs.h>
5342  #include <linux/ext3_jbd.h>
5343  #include <linux/time.h>
5344 +#include <linux/vserver/tag.h>
5345  #include <asm/uaccess.h>
5346  
5347  
5348 @@ -36,7 +38,8 @@ int ext3_ioctl (struct inode * inode, st
5349                 unsigned int oldflags;
5350                 unsigned int jflag;
5351  
5352 -               if (IS_RDONLY(inode))
5353 +               if (IS_RDONLY(inode) ||
5354 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5355                         return -EROFS;
5356  
5357                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
5358 @@ -59,7 +62,9 @@ int ext3_ioctl (struct inode * inode, st
5359                  *
5360                  * This test looks nicer. Thanks to Pauline Middelink
5361                  */
5362 -               if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
5363 +               if ((oldflags & EXT3_IMMUTABLE_FL) ||
5364 +                       ((flags ^ oldflags) & (EXT3_APPEND_FL |
5365 +                       EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL))) {
5366                         if (!capable(CAP_LINUX_IMMUTABLE))
5367                                 return -EPERM;
5368                 }
5369 @@ -112,7 +117,8 @@ flags_err:
5370  
5371                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
5372                         return -EPERM;
5373 -               if (IS_RDONLY(inode))
5374 +               if (IS_RDONLY(inode) ||
5375 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5376                         return -EROFS;
5377                 if (get_user(generation, (int __user *) arg))
5378                         return -EFAULT;
5379 @@ -166,7 +172,8 @@ flags_err:
5380                 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
5381                         return -ENOTTY;
5382  
5383 -               if (IS_RDONLY(inode))
5384 +               if (IS_RDONLY(inode) ||
5385 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5386                         return -EROFS;
5387  
5388                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
5389 @@ -201,7 +208,8 @@ flags_err:
5390                 if (!capable(CAP_SYS_RESOURCE))
5391                         return -EPERM;
5392  
5393 -               if (IS_RDONLY(inode))
5394 +               if (IS_RDONLY(inode) ||
5395 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5396                         return -EROFS;
5397  
5398                 if (get_user(n_blocks_count, (__u32 __user *)arg))
5399 @@ -222,7 +230,8 @@ flags_err:
5400                 if (!capable(CAP_SYS_RESOURCE))
5401                         return -EPERM;
5402  
5403 -               if (IS_RDONLY(inode))
5404 +               if (IS_RDONLY(inode) ||
5405 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5406                         return -EROFS;
5407  
5408                 if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
5409 @@ -237,6 +246,38 @@ flags_err:
5410                 return err;
5411         }
5412  
5413 +#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_TAGGING_NONE)
5414 +       case EXT3_IOC_SETTAG: {
5415 +               handle_t *handle;
5416 +               struct ext3_iloc iloc;
5417 +               int tag;
5418 +               int err;
5419 +
5420 +               /* fixme: if stealth, return -ENOTTY */
5421 +               if (!capable(CAP_CONTEXT))
5422 +                       return -EPERM;
5423 +               if (IS_RDONLY(inode))
5424 +                       return -EROFS;
5425 +               if (!(inode->i_sb->s_flags & MS_TAGGED))
5426 +                       return -ENOSYS;
5427 +               if (get_user(tag, (int __user *) arg))
5428 +                       return -EFAULT;
5429 +
5430 +               handle = ext3_journal_start(inode, 1);
5431 +               if (IS_ERR(handle))
5432 +                       return PTR_ERR(handle);
5433 +               err = ext3_reserve_inode_write(handle, inode, &iloc);
5434 +               if (err)
5435 +                       return err;
5436 +
5437 +               inode->i_tag = (tag & 0xFFFF);
5438 +               inode->i_ctime = CURRENT_TIME;
5439 +
5440 +               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
5441 +               ext3_journal_stop(handle);
5442 +               return err;
5443 +       }
5444 +#endif
5445  
5446         default:
5447                 return -ENOTTY;
5448 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/namei.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/namei.c
5449 --- linux-2.6.16.20/fs/ext3/namei.c     2006-04-09 13:49:53 +0200
5450 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/namei.c        2006-04-26 19:07:00 +0200
5451 @@ -36,6 +36,7 @@
5452  #include <linux/quotaops.h>
5453  #include <linux/buffer_head.h>
5454  #include <linux/smp_lock.h>
5455 +#include <linux/vserver/tag.h>
5456  
5457  #include "namei.h"
5458  #include "xattr.h"
5459 @@ -1004,6 +1005,7 @@ static struct dentry *ext3_lookup(struct
5460  
5461                 if (!inode)
5462                         return ERR_PTR(-EACCES);
5463 +               dx_propagate_tag(nd, inode);
5464         }
5465         return d_splice_alias(inode, dentry);
5466  }
5467 @@ -2373,6 +2375,7 @@ struct inode_operations ext3_dir_inode_o
5468         .removexattr    = generic_removexattr,
5469  #endif
5470         .permission     = ext3_permission,
5471 +       .sync_flags     = ext3_sync_flags,
5472  };
5473  
5474  struct inode_operations ext3_special_inode_operations = {
5475 @@ -2384,4 +2387,5 @@ struct inode_operations ext3_special_ino
5476         .removexattr    = generic_removexattr,
5477  #endif
5478         .permission     = ext3_permission,
5479 +       .sync_flags     = ext3_sync_flags,
5480  }; 
5481 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/super.c
5482 --- linux-2.6.16.20/fs/ext3/super.c     2006-04-09 13:49:53 +0200
5483 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/super.c        2006-04-26 19:07:00 +0200
5484 @@ -563,12 +563,12 @@ static int ext3_write_dquot(struct dquot
5485  static int ext3_acquire_dquot(struct dquot *dquot);
5486  static int ext3_release_dquot(struct dquot *dquot);
5487  static int ext3_mark_dquot_dirty(struct dquot *dquot);
5488 -static int ext3_write_info(struct super_block *sb, int type);
5489 -static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path);
5490 -static int ext3_quota_on_mount(struct super_block *sb, int type);
5491 -static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
5492 +static int ext3_write_info(struct dqhash *hash, int type);
5493 +static int ext3_quota_on(struct dqhash *hash, int type, int format_id, char *path);
5494 +static int ext3_quota_on_mount(struct dqhash *hash, int type);
5495 +static ssize_t ext3_quota_read(struct dqhash *hash, int type, char *data,
5496                                size_t len, loff_t off);
5497 -static ssize_t ext3_quota_write(struct super_block *sb, int type,
5498 +static ssize_t ext3_quota_write(struct dqhash *hash, int type,
5499                                 const char *data, size_t len, loff_t off);
5500  
5501  static struct dquot_operations ext3_quota_operations = {
5502 @@ -634,7 +634,7 @@ enum {
5503         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
5504         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
5505         Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
5506 -       Opt_grpquota
5507 +       Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid
5508  };
5509  
5510  static match_table_t tokens = {
5511 @@ -683,6 +683,10 @@ static match_table_t tokens = {
5512         {Opt_quota, "quota"},
5513         {Opt_usrquota, "usrquota"},
5514         {Opt_barrier, "barrier=%u"},
5515 +       {Opt_tag, "tag"},
5516 +       {Opt_notag, "notag"},
5517 +       {Opt_tagid, "tagid=%u"},
5518 +       {Opt_tag, "tagxid"},
5519         {Opt_err, NULL},
5520         {Opt_resize, "resize"},
5521  };
5522 @@ -775,6 +779,20 @@ static int parse_options (char *options,
5523                 case Opt_nouid32:
5524                         set_opt (sbi->s_mount_opt, NO_UID32);
5525                         break;
5526 +#ifndef CONFIG_TAGGING_NONE
5527 +               case Opt_tag:
5528 +                       set_opt (sbi->s_mount_opt, TAGGED);
5529 +                       break;
5530 +               case Opt_notag:
5531 +                       clear_opt (sbi->s_mount_opt, TAGGED);
5532 +                       break;
5533 +#endif
5534 +#ifdef CONFIG_PROPAGATE
5535 +               case Opt_tagid:
5536 +                       /* use args[0] */
5537 +                       set_opt (sbi->s_mount_opt, TAGGED);
5538 +                       break;
5539 +#endif
5540                 case Opt_nocheck:
5541                         clear_opt (sbi->s_mount_opt, CHECK);
5542                         break;
5543 @@ -893,7 +911,7 @@ static int parse_options (char *options,
5544                 case Opt_grpjquota:
5545                         qtype = GRPQUOTA;
5546  set_qf_name:
5547 -                       if (sb_any_quota_enabled(sb)) {
5548 +                       if (dqh_any_quota_enabled(sb->s_dqh)) {
5549                                 printk(KERN_ERR
5550                                         "EXT3-fs: Cannot change journalled "
5551                                         "quota options when quota turned on.\n");
5552 @@ -931,7 +949,7 @@ set_qf_name:
5553                 case Opt_offgrpjquota:
5554                         qtype = GRPQUOTA;
5555  clear_qf_name:
5556 -                       if (sb_any_quota_enabled(sb)) {
5557 +                       if (dqh_any_quota_enabled(sb->s_dqh)) {
5558                                 printk(KERN_ERR "EXT3-fs: Cannot change "
5559                                         "journalled quota options when "
5560                                         "quota turned on.\n");
5561 @@ -959,7 +977,7 @@ clear_qf_name:
5562                         set_opt(sbi->s_mount_opt, GRPQUOTA);
5563                         break;
5564                 case Opt_noquota:
5565 -                       if (sb_any_quota_enabled(sb)) {
5566 +                       if (dqh_any_quota_enabled(sb->s_dqh)) {
5567                                 printk(KERN_ERR "EXT3-fs: Cannot change quota "
5568                                         "options when quota turned on.\n");
5569                                 return 0;
5570 @@ -1231,7 +1249,7 @@ static void ext3_orphan_cleanup (struct 
5571         /* Turn on quotas so that they are updated correctly */
5572         for (i = 0; i < MAXQUOTAS; i++) {
5573                 if (EXT3_SB(sb)->s_qf_names[i]) {
5574 -                       int ret = ext3_quota_on_mount(sb, i);
5575 +                       int ret = ext3_quota_on_mount(sb->s_dqh, i);
5576                         if (ret < 0)
5577                                 printk(KERN_ERR
5578                                         "EXT3-fs: Cannot turn on journalled "
5579 @@ -1281,8 +1299,8 @@ static void ext3_orphan_cleanup (struct 
5580  #ifdef CONFIG_QUOTA
5581         /* Turn quotas off */
5582         for (i = 0; i < MAXQUOTAS; i++) {
5583 -               if (sb_dqopt(sb)->files[i])
5584 -                       vfs_quota_off(sb, i);
5585 +               if (dqh_dqopt(sb->s_dqh)->files[i])
5586 +                       vfs_quota_off(sb->s_dqh, i);
5587         }
5588  #endif
5589         sb->s_flags = s_flags; /* Restore MS_RDONLY status */
5590 @@ -1429,6 +1447,9 @@ static int ext3_fill_super (struct super
5591                             NULL, 0))
5592                 goto failed_mount;
5593  
5594 +       if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAGGED)
5595 +               sb->s_flags |= MS_TAGGED;
5596 +
5597         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
5598                 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
5599  
5600 @@ -1620,8 +1641,8 @@ static int ext3_fill_super (struct super
5601         sb->s_export_op = &ext3_export_ops;
5602         sb->s_xattr = ext3_xattr_handlers;
5603  #ifdef CONFIG_QUOTA
5604 -       sb->s_qcop = &ext3_qctl_operations;
5605 -       sb->dq_op = &ext3_quota_operations;
5606 +       sb->s_dqh->dqh_qop = &ext3_quota_operations;
5607 +       sb->s_dqh->dqh_qcop = &ext3_qctl_operations;
5608  #endif
5609         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
5610  
5611 @@ -2238,6 +2259,12 @@ static int ext3_remount (struct super_bl
5612  
5613         if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
5614                 ext3_abort(sb, __FUNCTION__, "Abort forced by user");
5615 +       if ((sbi->s_mount_opt & EXT3_MOUNT_TAGGED) &&
5616 +               !(sb->s_flags & MS_TAGGED)) {
5617 +               printk("EXT3-fs: %s: tagging not permitted on remount.\n",
5618 +                       sb->s_id);
5619 +               return -EINVAL;
5620 +       }
5621  
5622         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
5623                 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
5624 @@ -2391,7 +2418,7 @@ static int ext3_statfs (struct super_blo
5625  
5626  static inline struct inode *dquot_to_inode(struct dquot *dquot)
5627  {
5628 -       return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
5629 +       return dqh_dqopt(dquot->dq_dqh)->files[dquot->dq_type];
5630  }
5631  
5632  static int ext3_dquot_initialize(struct inode *inode, int type)
5633 @@ -2434,7 +2461,7 @@ static int ext3_write_dquot(struct dquot
5634  
5635         inode = dquot_to_inode(dquot);
5636         handle = ext3_journal_start(inode,
5637 -                                       EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
5638 +               EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb));
5639         if (IS_ERR(handle))
5640                 return PTR_ERR(handle);
5641         ret = dquot_commit(dquot);
5642 @@ -2450,7 +2477,7 @@ static int ext3_acquire_dquot(struct dqu
5643         handle_t *handle;
5644  
5645         handle = ext3_journal_start(dquot_to_inode(dquot),
5646 -                                       EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
5647 +               EXT3_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb));
5648         if (IS_ERR(handle))
5649                 return PTR_ERR(handle);
5650         ret = dquot_acquire(dquot);
5651 @@ -2466,7 +2493,7 @@ static int ext3_release_dquot(struct dqu
5652         handle_t *handle;
5653  
5654         handle = ext3_journal_start(dquot_to_inode(dquot),
5655 -                                       EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
5656 +               EXT3_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb));
5657         if (IS_ERR(handle))
5658                 return PTR_ERR(handle);
5659         ret = dquot_release(dquot);
5660 @@ -2479,8 +2506,8 @@ static int ext3_release_dquot(struct dqu
5661  static int ext3_mark_dquot_dirty(struct dquot *dquot)
5662  {
5663         /* Are we journalling quotas? */
5664 -       if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
5665 -           EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
5666 +       if (EXT3_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[USRQUOTA] ||
5667 +           EXT3_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[GRPQUOTA]) {
5668                 dquot_mark_dquot_dirty(dquot);
5669                 return ext3_write_dquot(dquot);
5670         } else {
5671 @@ -2488,8 +2515,9 @@ static int ext3_mark_dquot_dirty(struct 
5672         }
5673  }
5674  
5675 -static int ext3_write_info(struct super_block *sb, int type)
5676 +static int ext3_write_info(struct dqhash *hash, int type)
5677  {
5678 +       struct super_block *sb = hash->dqh_sb;
5679         int ret, err;
5680         handle_t *handle;
5681  
5682 @@ -2497,7 +2525,7 @@ static int ext3_write_info(struct super_
5683         handle = ext3_journal_start(sb->s_root->d_inode, 2);
5684         if (IS_ERR(handle))
5685                 return PTR_ERR(handle);
5686 -       ret = dquot_commit_info(sb, type);
5687 +       ret = dquot_commit_info(hash, type);
5688         err = ext3_journal_stop(handle);
5689         if (!ret)
5690                 ret = err;
5691 @@ -2508,18 +2536,20 @@ static int ext3_write_info(struct super_
5692   * Turn on quotas during mount time - we need to find
5693   * the quota file and such...
5694   */
5695 -static int ext3_quota_on_mount(struct super_block *sb, int type)
5696 +static int ext3_quota_on_mount(struct dqhash *hash, int type)
5697  {
5698 -       return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
5699 -                       EXT3_SB(sb)->s_jquota_fmt, type);
5700 +       return vfs_quota_on_mount(hash,
5701 +               EXT3_SB(hash->dqh_sb)->s_qf_names[type],
5702 +               EXT3_SB(hash->dqh_sb)->s_jquota_fmt, type);
5703  }
5704  
5705  /*
5706   * Standard function to be called on quota_on
5707   */
5708 -static int ext3_quota_on(struct super_block *sb, int type, int format_id,
5709 +static int ext3_quota_on(struct dqhash *hash, int type, int format_id,
5710                          char *path)
5711  {
5712 +       struct super_block *sb = hash->dqh_sb;
5713         int err;
5714         struct nameidata nd;
5715  
5716 @@ -2528,7 +2558,7 @@ static int ext3_quota_on(struct super_bl
5717         /* Not journalling quota? */
5718         if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
5719             !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
5720 -               return vfs_quota_on(sb, type, format_id, path);
5721 +               return vfs_quota_on(hash, type, format_id, path);
5722         err = path_lookup(path, LOOKUP_FOLLOW, &nd);
5723         if (err)
5724                 return err;
5725 @@ -2543,17 +2573,18 @@ static int ext3_quota_on(struct super_bl
5726                         "EXT3-fs: Quota file not on filesystem root. "
5727                         "Journalled quota will not work.\n");
5728         path_release(&nd);
5729 -       return vfs_quota_on(sb, type, format_id, path);
5730 +       return vfs_quota_on(hash, type, format_id, path);
5731  }
5732  
5733  /* Read data from quotafile - avoid pagecache and such because we cannot afford
5734   * acquiring the locks... As quota files are never truncated and quota code
5735   * itself serializes the operations (and noone else should touch the files)
5736   * we don't have to be afraid of races */
5737 -static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
5738 +static ssize_t ext3_quota_read(struct dqhash *hash, int type, char *data,
5739                                size_t len, loff_t off)
5740  {
5741 -       struct inode *inode = sb_dqopt(sb)->files[type];
5742 +       struct inode *inode = dqh_dqopt(hash)->files[type];
5743 +       struct super_block *sb = hash->dqh_sb;
5744         sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
5745         int err = 0;
5746         int offset = off & (sb->s_blocksize - 1);
5747 @@ -2588,10 +2619,11 @@ static ssize_t ext3_quota_read(struct su
5748  
5749  /* Write to quotafile (we know the transaction is already started and has
5750   * enough credits) */
5751 -static ssize_t ext3_quota_write(struct super_block *sb, int type,
5752 +static ssize_t ext3_quota_write(struct dqhash *hash, int type,
5753                                 const char *data, size_t len, loff_t off)
5754  {
5755 -       struct inode *inode = sb_dqopt(sb)->files[type];
5756 +       struct inode *inode = dqh_dqopt(hash)->files[type];
5757 +       struct super_block *sb = hash->dqh_sb;
5758         sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
5759         int err = 0;
5760         int offset = off & (sb->s_blocksize - 1);
5761 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/symlink.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/symlink.c
5762 --- linux-2.6.16.20/fs/ext3/symlink.c   2005-08-29 22:25:30 +0200
5763 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/symlink.c      2006-04-26 19:07:00 +0200
5764 @@ -40,6 +40,7 @@ struct inode_operations ext3_symlink_ino
5765         .listxattr      = ext3_listxattr,
5766         .removexattr    = generic_removexattr,
5767  #endif
5768 +       .sync_flags     = ext3_sync_flags,
5769  };
5770  
5771  struct inode_operations ext3_fast_symlink_inode_operations = {
5772 @@ -51,4 +52,5 @@ struct inode_operations ext3_fast_symlin
5773         .listxattr      = ext3_listxattr,
5774         .removexattr    = generic_removexattr,
5775  #endif
5776 +       .sync_flags     = ext3_sync_flags,
5777  };
5778 diff -NurpP --minimal linux-2.6.16.20/fs/ext3/xattr.c linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/xattr.c
5779 --- linux-2.6.16.20/fs/ext3/xattr.c     2006-04-09 13:49:53 +0200
5780 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ext3/xattr.c        2006-04-26 19:07:00 +0200
5781 @@ -58,6 +58,7 @@
5782  #include <linux/mbcache.h>
5783  #include <linux/quotaops.h>
5784  #include <linux/rwsem.h>
5785 +#include <linux/vs_dlimit.h>
5786  #include "xattr.h"
5787  #include "acl.h"
5788  
5789 @@ -495,6 +496,7 @@ ext3_xattr_release_block(handle_t *handl
5790                         ext3_journal_dirty_metadata(handle, bh);
5791                         if (IS_SYNC(inode))
5792                                 handle->h_sync = 1;
5793 +                       DLIMIT_FREE_BLOCK(inode, 1);
5794                         DQUOT_FREE_BLOCK(inode, 1);
5795                         unlock_buffer(bh);
5796                         ea_bdebug(bh, "refcount now=%d; releasing",
5797 @@ -763,11 +765,14 @@ inserted:
5798                         if (new_bh == bs->bh)
5799                                 ea_bdebug(new_bh, "keeping");
5800                         else {
5801 +                               error = -ENOSPC;
5802 +                               if (DLIMIT_ALLOC_BLOCK(inode, 1))
5803 +                                       goto cleanup;
5804                                 /* The old block is released after updating
5805                                    the inode. */
5806                                 error = -EDQUOT;
5807                                 if (DQUOT_ALLOC_BLOCK(inode, 1))
5808 -                                       goto cleanup;
5809 +                                       goto cleanup_dlimit;
5810                                 error = ext3_journal_get_write_access(handle,
5811                                                                       new_bh);
5812                                 if (error)
5813 @@ -843,6 +848,8 @@ cleanup:
5814  
5815  cleanup_dquot:
5816         DQUOT_FREE_BLOCK(inode, 1);
5817 +cleanup_dlimit:
5818 +       DLIMIT_FREE_BLOCK(inode, 1);
5819         goto cleanup;
5820  
5821  bad_block:
5822 diff -NurpP --minimal linux-2.6.16.20/fs/fcntl.c linux-2.6.16.20-vs2.1.1-rc22/fs/fcntl.c
5823 --- linux-2.6.16.20/fs/fcntl.c  2006-02-18 14:40:22 +0100
5824 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/fcntl.c     2006-04-26 19:07:00 +0200
5825 @@ -18,6 +18,7 @@
5826  #include <linux/ptrace.h>
5827  #include <linux/signal.h>
5828  #include <linux/rcupdate.h>
5829 +#include <linux/vs_limit.h>
5830  
5831  #include <asm/poll.h>
5832  #include <asm/siginfo.h>
5833 @@ -85,6 +86,8 @@ repeat:
5834         error = -EMFILE;
5835         if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
5836                 goto out;
5837 +       if (!vx_files_avail(1))
5838 +               goto out;
5839  
5840         error = expand_files(files, newfd);
5841         if (error < 0)
5842 @@ -126,6 +129,7 @@ static int dupfd(struct file *file, unsi
5843                 FD_SET(fd, fdt->open_fds);
5844                 FD_CLR(fd, fdt->close_on_exec);
5845                 spin_unlock(&files->file_lock);
5846 +               vx_openfd_inc(fd);
5847                 fd_install(fd, file);
5848         } else {
5849                 spin_unlock(&files->file_lock);
5850 @@ -178,6 +182,9 @@ asmlinkage long sys_dup2(unsigned int ol
5851  
5852         if (tofree)
5853                 filp_close(tofree, files);
5854 +       else
5855 +               vx_openfd_inc(newfd);   /* fd was unused */
5856 +
5857         err = newfd;
5858  out:
5859         return err;
5860 @@ -481,7 +488,7 @@ void send_sigio(struct fown_struct *fown
5861         
5862         read_lock(&tasklist_lock);
5863         if (pid > 0) {
5864 -               p = find_task_by_pid(pid);
5865 +               p = find_task_by_real_pid(pid);
5866                 if (p) {
5867                         send_sigio_to_task(p, fown, fd, band);
5868                 }
5869 @@ -516,7 +523,7 @@ int send_sigurg(struct fown_struct *fown
5870         
5871         read_lock(&tasklist_lock);
5872         if (pid > 0) {
5873 -               p = find_task_by_pid(pid);
5874 +               p = find_task_by_real_pid(pid);
5875                 if (p) {
5876                         send_sigurg_to_task(p, fown);
5877                 }
5878 diff -NurpP --minimal linux-2.6.16.20/fs/file_table.c linux-2.6.16.20-vs2.1.1-rc22/fs/file_table.c
5879 --- linux-2.6.16.20/fs/file_table.c     2006-04-09 13:49:53 +0200
5880 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/file_table.c        2006-04-26 19:07:00 +0200
5881 @@ -22,6 +22,8 @@
5882  #include <linux/fsnotify.h>
5883  #include <linux/sysctl.h>
5884  #include <linux/percpu_counter.h>
5885 +#include <linux/vs_limit.h>
5886 +#include <linux/vs_context.h>
5887  
5888  #include <asm/atomic.h>
5889  
5890 @@ -119,6 +121,8 @@ struct file *get_empty_filp(void)
5891         rwlock_init(&f->f_owner.lock);
5892         /* f->f_version: 0 */
5893         INIT_LIST_HEAD(&f->f_u.fu_list);
5894 +       f->f_xid = vx_current_xid();
5895 +       vx_files_inc(f);
5896         return f;
5897  
5898  over:
5899 @@ -173,6 +177,8 @@ void fastcall __fput(struct file *file)
5900         fops_put(file->f_op);
5901         if (file->f_mode & FMODE_WRITE)
5902                 put_write_access(inode);
5903 +       vx_files_dec(file);
5904 +       file->f_xid = 0;
5905         file_kill(file);
5906         file->f_dentry = NULL;
5907         file->f_vfsmnt = NULL;
5908 @@ -238,6 +244,8 @@ void put_filp(struct file *file)
5909  {
5910         if (atomic_dec_and_test(&file->f_count)) {
5911                 security_file_free(file);
5912 +               vx_files_dec(file);
5913 +               file->f_xid = 0;
5914                 file_kill(file);
5915                 file_free(file);
5916         }
5917 diff -NurpP --minimal linux-2.6.16.20/fs/hfsplus/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/hfsplus/ioctl.c
5918 --- linux-2.6.16.20/fs/hfsplus/ioctl.c  2006-04-09 13:49:53 +0200
5919 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/hfsplus/ioctl.c     2006-04-26 19:07:00 +0200
5920 @@ -16,6 +16,7 @@
5921  #include <linux/fs.h>
5922  #include <linux/sched.h>
5923  #include <linux/xattr.h>
5924 +#include <linux/mount.h>
5925  #include <asm/uaccess.h>
5926  #include "hfsplus_fs.h"
5927  
5928 @@ -35,7 +36,8 @@ int hfsplus_ioctl(struct inode *inode, s
5929                         flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */
5930                 return put_user(flags, (int __user *)arg);
5931         case HFSPLUS_IOC_EXT2_SETFLAGS: {
5932 -               if (IS_RDONLY(inode))
5933 +               if (IS_RDONLY(inode) ||
5934 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
5935                         return -EROFS;
5936  
5937                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
5938 diff -NurpP --minimal linux-2.6.16.20/fs/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/inode.c
5939 --- linux-2.6.16.20/fs/inode.c  2006-02-18 14:40:22 +0100
5940 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/inode.c     2006-04-26 19:07:00 +0200
5941 @@ -116,6 +116,9 @@ static struct inode *alloc_inode(struct 
5942                 struct address_space * const mapping = &inode->i_data;
5943  
5944                 inode->i_sb = sb;
5945 +
5946 +               /* essential because of inode slab reuse */
5947 +               inode->i_tag = 0;
5948                 inode->i_blkbits = sb->s_blocksize_bits;
5949                 inode->i_flags = 0;
5950                 atomic_set(&inode->i_count, 1);
5951 @@ -128,6 +131,7 @@ static struct inode *alloc_inode(struct 
5952                 inode->i_bytes = 0;
5953                 inode->i_generation = 0;
5954  #ifdef CONFIG_QUOTA
5955 +               inode->i_dqh = dqhget(sb->s_dqh);
5956                 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
5957  #endif
5958                 inode->i_pipe = NULL;
5959 @@ -175,6 +179,8 @@ void destroy_inode(struct inode *inode) 
5960         if (inode_has_buffers(inode))
5961                 BUG();
5962         security_inode_free(inode);
5963 +       if (dqhash_valid(inode->i_dqh))
5964 +               dqhput(inode->i_dqh);
5965         if (inode->i_sb->s_op->destroy_inode)
5966                 inode->i_sb->s_op->destroy_inode(inode);
5967         else
5968 @@ -236,6 +242,8 @@ void __iget(struct inode * inode)
5969         inodes_stat.nr_unused--;
5970  }
5971  
5972 +EXPORT_SYMBOL_GPL(__iget);
5973 +
5974  /**
5975   * clear_inode - clear an inode
5976   * @inode: inode to clear
5977 @@ -1272,12 +1280,13 @@ EXPORT_SYMBOL(inode_needs_sync);
5978  /* Function back in dquot.c */
5979  int remove_inode_dquot_ref(struct inode *, int, struct list_head *);
5980  
5981 -void remove_dquot_ref(struct super_block *sb, int type,
5982 +void remove_dquot_ref(struct dqhash *hash, int type,
5983                         struct list_head *tofree_head)
5984  {
5985         struct inode *inode;
5986 +       struct super_block *sb = hash->dqh_sb;
5987  
5988 -       if (!sb->dq_op)
5989 +       if (!hash->dqh_qop)
5990                 return; /* nothing to do */
5991         spin_lock(&inode_lock); /* This lock is for inodes code */
5992  
5993 diff -NurpP --minimal linux-2.6.16.20/fs/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/ioctl.c
5994 --- linux-2.6.16.20/fs/ioctl.c  2006-04-09 13:49:53 +0200
5995 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ioctl.c     2006-04-29 02:58:07 +0200
5996 @@ -13,10 +13,19 @@
5997  #include <linux/fs.h>
5998  #include <linux/security.h>
5999  #include <linux/module.h>
6000 +#include <linux/proc_fs.h>
6001 +#include <linux/vserver/inode.h>
6002 +#include <linux/vserver/tag.h>
6003  
6004  #include <asm/uaccess.h>
6005  #include <asm/ioctls.h>
6006  
6007 +
6008 +#ifdef CONFIG_VSERVER_LEGACY
6009 +extern int vx_proc_ioctl(struct inode *, struct file *,
6010 +       unsigned int, unsigned long);
6011 +#endif
6012 +
6013  static long do_ioctl(struct file *filp, unsigned int cmd,
6014                 unsigned long arg)
6015  {
6016 @@ -147,6 +156,48 @@ int vfs_ioctl(struct file *filp, unsigne
6017                         else
6018                                 error = -ENOTTY;
6019                         break;
6020 +#ifdef CONFIG_VSERVER_LEGACY
6021 +#ifndef CONFIG_TAGGING_NONE
6022 +               case FIOC_GETTAG: {
6023 +                       struct inode *inode = filp->f_dentry->d_inode;
6024 +
6025 +                       /* fixme: if stealth, return -ENOTTY */
6026 +                       error = -EPERM;
6027 +                       if (capable(CAP_CONTEXT))
6028 +                               error = put_user(inode->i_tag, (int __user *) arg);
6029 +                       break;
6030 +               }
6031 +               case FIOC_SETTAG: {
6032 +                       struct inode *inode = filp->f_dentry->d_inode;
6033 +                       int tag;
6034 +
6035 +                       /* fixme: if stealth, return -ENOTTY */
6036 +                       error = -EPERM;
6037 +                       if (!capable(CAP_CONTEXT))
6038 +                               break;
6039 +                       error = -EROFS;
6040 +                       if (IS_RDONLY(inode))
6041 +                               break;
6042 +                       error = -ENOSYS;
6043 +                       if (!(inode->i_sb->s_flags & MS_TAGGED))
6044 +                               break;
6045 +                       error = -EFAULT;
6046 +                       if (get_user(tag, (int __user *) arg))
6047 +                               break;
6048 +                       error = 0;
6049 +                       inode->i_tag = (tag & 0xFFFF);
6050 +                       inode->i_ctime = CURRENT_TIME;
6051 +                       mark_inode_dirty(inode);
6052 +                       break;
6053 +               }
6054 +#endif
6055 +               case FIOC_GETXFLG:
6056 +               case FIOC_SETXFLG:
6057 +                       error = -ENOTTY;
6058 +                       if (filp->f_dentry->d_inode->i_sb->s_magic == PROC_SUPER_MAGIC)
6059 +                               error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
6060 +                       break;
6061 +#endif
6062                 default:
6063                         if (S_ISREG(filp->f_dentry->d_inode->i_mode))
6064                                 error = file_ioctl(filp, cmd, arg);
6065 diff -NurpP --minimal linux-2.6.16.20/fs/ioprio.c linux-2.6.16.20-vs2.1.1-rc22/fs/ioprio.c
6066 --- linux-2.6.16.20/fs/ioprio.c 2006-04-09 13:49:53 +0200
6067 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ioprio.c    2006-04-26 19:07:00 +0200
6068 @@ -24,6 +24,7 @@
6069  #include <linux/blkdev.h>
6070  #include <linux/capability.h>
6071  #include <linux/syscalls.h>
6072 +#include <linux/vs_pid.h>
6073  
6074  static int set_task_ioprio(struct task_struct *task, int ioprio)
6075  {
6076 @@ -95,7 +96,7 @@ asmlinkage long sys_ioprio_set(int which
6077                         if (!who)
6078                                 user = current->user;
6079                         else
6080 -                               user = find_user(who);
6081 +                               user = find_user(vx_current_xid(), who);
6082  
6083                         if (!user)
6084                                 break;
6085 @@ -149,7 +150,7 @@ asmlinkage long sys_ioprio_get(int which
6086                         if (!who)
6087                                 user = current->user;
6088                         else
6089 -                               user = find_user(who);
6090 +                               user = find_user(vx_current_xid(), who);
6091  
6092                         if (!user)
6093                                 break;
6094 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/Makefile linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/Makefile
6095 --- linux-2.6.16.20/fs/jfs/Makefile     2004-08-14 12:56:09 +0200
6096 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/Makefile        2006-04-26 19:07:00 +0200
6097 @@ -8,7 +8,8 @@ jfs-y    := super.o file.o inode.o namei
6098             jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \
6099             jfs_unicode.o jfs_dtree.o jfs_inode.o \
6100             jfs_extent.o symlink.o jfs_metapage.o \
6101 -           jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o resize.o xattr.o
6102 +           jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \
6103 +           resize.o xattr.o ioctl.o
6104  
6105  jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o
6106  
6107 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/acl.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/acl.c
6108 --- linux-2.6.16.20/fs/jfs/acl.c        2005-10-28 20:49:44 +0200
6109 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/acl.c   2006-04-26 19:07:00 +0200
6110 @@ -229,7 +229,8 @@ int jfs_setattr(struct dentry *dentry, s
6111                 return rc;
6112  
6113         if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
6114 -           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
6115 +           (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) ||
6116 +           (iattr->ia_valid & ATTR_TAG && iattr->ia_tag != inode->i_tag)) {
6117                 if (DQUOT_TRANSFER(inode, iattr))
6118                         return -EDQUOT;
6119         }
6120 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/file.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/file.c
6121 --- linux-2.6.16.20/fs/jfs/file.c       2005-08-29 22:25:31 +0200
6122 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/file.c  2006-04-26 19:07:00 +0200
6123 @@ -98,6 +98,7 @@ struct inode_operations jfs_file_inode_o
6124         .setattr        = jfs_setattr,
6125         .permission     = jfs_permission,
6126  #endif
6127 +       .sync_flags     = jfs_sync_flags,
6128  };
6129  
6130  struct file_operations jfs_file_operations = {
6131 @@ -111,6 +112,8 @@ struct file_operations jfs_file_operatio
6132         .readv          = generic_file_readv,
6133         .writev         = generic_file_writev,
6134         .sendfile       = generic_file_sendfile,
6135 +       .sendpage       = generic_file_sendpage,
6136         .fsync          = jfs_fsync,
6137         .release        = jfs_release,
6138 +       .ioctl          = jfs_ioctl,
6139  };
6140 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/inode.c
6141 --- linux-2.6.16.20/fs/jfs/inode.c      2005-10-28 20:49:44 +0200
6142 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/inode.c 2006-04-26 19:07:00 +0200
6143 @@ -22,6 +22,7 @@
6144  #include <linux/buffer_head.h>
6145  #include <linux/pagemap.h>
6146  #include <linux/quotaops.h>
6147 +#include <linux/vs_dlimit.h>
6148  #include "jfs_incore.h"
6149  #include "jfs_inode.h"
6150  #include "jfs_filsys.h"
6151 @@ -55,6 +56,7 @@ void jfs_read_inode(struct inode *inode)
6152                 inode->i_op = &jfs_file_inode_operations;
6153                 init_special_inode(inode, inode->i_mode, inode->i_rdev);
6154         }
6155 +       jfs_set_inode_flags(inode);
6156  }
6157  
6158  /*
6159 @@ -143,6 +145,7 @@ void jfs_delete_inode(struct inode *inod
6160                 DQUOT_INIT(inode);
6161                 DQUOT_FREE_INODE(inode);
6162                 DQUOT_DROP(inode);
6163 +               DLIMIT_FREE_INODE(inode);
6164         }
6165  
6166         clear_inode(inode);
6167 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/ioctl.c
6168 --- linux-2.6.16.20/fs/jfs/ioctl.c      1970-01-01 01:00:00 +0100
6169 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/ioctl.c 2006-04-26 19:07:00 +0200
6170 @@ -0,0 +1,111 @@
6171 +/*
6172 + * linux/fs/jfs/ioctl.c
6173 + *
6174 + * Copyright (C) 2006 Herbert Poetzl
6175 + * adapted from Remy Card's ext2/ioctl.c
6176 + */
6177 +
6178 +#include <linux/fs.h>
6179 +#include <linux/ext2_fs.h>
6180 +#include <linux/ctype.h>
6181 +#include <linux/capability.h>
6182 +#include <linux/time.h>
6183 +#include <linux/mount.h>
6184 +#include <asm/current.h>
6185 +#include <asm/uaccess.h>
6186 +
6187 +#include "jfs_incore.h"
6188 +#include "jfs_dinode.h"
6189 +#include "jfs_inode.h"
6190 +
6191 +
6192 +static struct {
6193 +       long jfs_flag;
6194 +       long ext2_flag;
6195 +} jfs_map[] = {
6196 +       {JFS_NOATIME_FL, EXT2_NOATIME_FL},
6197 +       {JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL},
6198 +       {JFS_SYNC_FL, EXT2_SYNC_FL},
6199 +       {JFS_SECRM_FL, EXT2_SECRM_FL},
6200 +       {JFS_UNRM_FL, EXT2_UNRM_FL},
6201 +       {JFS_APPEND_FL, EXT2_APPEND_FL},
6202 +       {JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL},
6203 +       {0, 0},
6204 +};
6205 +
6206 +static long jfs_map_ext2(unsigned long flags, int from)
6207 +{
6208 +       int index=0;
6209 +       long mapped=0;
6210 +
6211 +       while (jfs_map[index].jfs_flag) {
6212 +               if (from) {
6213 +                       if (jfs_map[index].ext2_flag & flags)
6214 +                               mapped |= jfs_map[index].jfs_flag;
6215 +               } else {
6216 +                       if (jfs_map[index].jfs_flag & flags)
6217 +                               mapped |= jfs_map[index].ext2_flag;
6218 +               }
6219 +               index++;
6220 +       }
6221 +       return mapped;
6222 +}
6223 +
6224 +
6225 +int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd,
6226 +               unsigned long arg)
6227 +{
6228 +       struct jfs_inode_info *jfs_inode = JFS_IP(inode);
6229 +       unsigned int flags;
6230 +
6231 +       switch (cmd) {
6232 +       case JFS_IOC_GETFLAGS:
6233 +               flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE;
6234 +               flags = jfs_map_ext2(flags, 0);
6235 +               return put_user(flags, (int __user *) arg);
6236 +       case JFS_IOC_SETFLAGS: {
6237 +               unsigned int oldflags;
6238 +
6239 +               if (IS_RDONLY(inode) ||
6240 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
6241 +                       return -EROFS;
6242 +
6243 +               if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
6244 +                       return -EACCES;
6245 +
6246 +               if (get_user(flags, (int __user *) arg))
6247 +                       return -EFAULT;
6248 +
6249 +               flags = jfs_map_ext2(flags, 1);
6250 +               if (!S_ISDIR(inode->i_mode))
6251 +                       flags &= ~JFS_DIRSYNC_FL;
6252 +
6253 +               oldflags = jfs_inode->mode2;
6254 +
6255 +               /*
6256 +                * The IMMUTABLE and APPEND_ONLY flags can only be changed by
6257 +                * the relevant capability.
6258 +                *
6259 +                * This test looks nicer. Thanks to Pauline Middelink
6260 +                */
6261 +               if ((oldflags & JFS_IMMUTABLE_FL) ||
6262 +                       ((flags ^ oldflags) & (JFS_APPEND_FL |
6263 +                       JFS_IMMUTABLE_FL | JFS_IUNLINK_FL))) {
6264 +                       if (!capable(CAP_LINUX_IMMUTABLE))
6265 +                               return -EPERM;
6266 +               }
6267 +
6268 +               flags = flags & JFS_FL_USER_MODIFIABLE;
6269 +               flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
6270 +               jfs_inode->mode2 = flags;
6271 +
6272 +               jfs_set_inode_flags(inode);
6273 +               inode->i_ctime = CURRENT_TIME_SEC;
6274 +               mark_inode_dirty(inode);
6275 +               return 0;
6276 +       }
6277 +       default:
6278 +               return -ENOTTY;
6279 +       }
6280 +}
6281 +
6282 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_dinode.h linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_dinode.h
6283 --- linux-2.6.16.20/fs/jfs/jfs_dinode.h 2004-12-25 01:55:20 +0100
6284 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_dinode.h    2006-04-26 19:07:00 +0200
6285 @@ -139,13 +139,39 @@ struct dinode {
6286  
6287  /* more extended mode bits: attributes for OS/2 */
6288  #define IREADONLY      0x02000000      /* no write access to file */
6289 -#define IARCHIVE       0x40000000      /* file archive bit */
6290 -#define ISYSTEM                0x08000000      /* system file */
6291  #define IHIDDEN                0x04000000      /* hidden file */
6292 -#define IRASH          0x4E000000      /* mask for changeable attributes */
6293 -#define INEWNAME       0x80000000      /* non-8.3 filename format */
6294 +#define ISYSTEM                0x08000000      /* system file */
6295 +
6296  #define IDIRECTORY     0x20000000      /* directory (shadow of real bit) */
6297 +#define IARCHIVE       0x40000000      /* file archive bit */
6298 +#define INEWNAME       0x80000000      /* non-8.3 filename format */
6299 +
6300 +#define IRASH          0x4E000000      /* mask for changeable attributes */
6301  #define ATTRSHIFT      25      /* bits to shift to move attribute
6302                                    specification to mode position */
6303  
6304 +/* extended attributes for Linux */
6305 +
6306 +#define JFS_NOATIME_FL         0x00080000 /* do not update atime */
6307 +
6308 +#define JFS_DIRSYNC_FL         0x00100000 /* dirsync behaviour */
6309 +#define JFS_SYNC_FL            0x00200000 /* Synchronous updates */
6310 +#define JFS_SECRM_FL           0x00400000 /* Secure deletion */
6311 +#define JFS_UNRM_FL            0x00800000 /* allow for undelete */
6312 +
6313 +#define        JFS_APPEND_FL           0x01000000 /* writes to file may only append */
6314 +#define        JFS_IMMUTABLE_FL        0x02000000 /* Immutable file */
6315 +
6316 +#define        JFS_BARRIER_FL          0x04000000 /* Barrier for chroot() */
6317 +#define        JFS_IUNLINK_FL          0x08000000 /* Immutable unlink */
6318 +
6319 +#define JFS_FL_USER_VISIBLE    0x0FF80000
6320 +#define JFS_FL_USER_MODIFIABLE 0x03F80000
6321 +#define JFS_FL_INHERIT         0x0BC80000
6322 +
6323 +
6324 +#define JFS_IOC_GETFLAGS       _IOR('f', 1, long)
6325 +#define JFS_IOC_SETFLAGS       _IOW('f', 2, long)
6326 +
6327 +
6328  #endif /*_H_JFS_DINODE */
6329 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_dtree.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_dtree.c
6330 --- linux-2.6.16.20/fs/jfs/jfs_dtree.c  2005-08-29 22:25:31 +0200
6331 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_dtree.c     2006-04-26 19:07:00 +0200
6332 @@ -102,6 +102,7 @@
6333  
6334  #include <linux/fs.h>
6335  #include <linux/quotaops.h>
6336 +#include <linux/vs_dlimit.h>
6337  #include "jfs_incore.h"
6338  #include "jfs_superblock.h"
6339  #include "jfs_filsys.h"
6340 @@ -383,10 +384,10 @@ static u32 add_index(tid_t tid, struct i
6341                  */
6342                 if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage))
6343                         goto clean_up;
6344 -               if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
6345 -                       DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
6346 -                       goto clean_up;
6347 -               }
6348 +               if (DLIMIT_ALLOC_BLOCK(ip, sbi->nbperpage))
6349 +                       goto clean_up_dquot;
6350 +               if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr))
6351 +                       goto clean_up_dlimit;
6352  
6353                 /*
6354                  * Save the table, we're going to overwrite it with the
6355 @@ -479,6 +480,12 @@ static u32 add_index(tid_t tid, struct i
6356  
6357         return index;
6358  
6359 +      clean_up_dlimit:
6360 +       DLIMIT_FREE_BLOCK(ip, sbi->nbperpage);
6361 +
6362 +      clean_up_dquot:
6363 +       DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
6364 +
6365        clean_up:
6366  
6367         jfs_ip->next_index--;
6368 @@ -930,7 +937,8 @@ int dtInsert(tid_t tid, struct inode *ip
6369  static int dtSplitUp(tid_t tid,
6370           struct inode *ip, struct dtsplit * split, struct btstack * btstack)
6371  {
6372 -       struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
6373 +       struct super_block *sb = ip->i_sb;
6374 +       struct jfs_sb_info *sbi = JFS_SBI(sb);
6375         int rc = 0;
6376         struct metapage *smp;
6377         dtpage_t *sp;           /* split page */
6378 @@ -952,6 +960,7 @@ static int dtSplitUp(tid_t tid,
6379         struct tlock *tlck;
6380         struct lv *lv;
6381         int quota_allocation = 0;
6382 +       int dlimit_allocation = 0;
6383  
6384         /* get split page */
6385         smp = split->mp;
6386 @@ -1033,6 +1042,12 @@ static int dtSplitUp(tid_t tid,
6387                 }
6388                 quota_allocation += n;
6389  
6390 +               if (DLIMIT_ALLOC_BLOCK(ip, n)) {
6391 +                       rc = -ENOSPC;
6392 +                       goto extendOut;
6393 +               }
6394 +               dlimit_allocation += n;
6395 +
6396                 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
6397                                     (s64) n, &nxaddr)))
6398                         goto extendOut;
6399 @@ -1301,6 +1316,9 @@ static int dtSplitUp(tid_t tid,
6400        freeKeyName:
6401         kfree(key.name);
6402  
6403 +       /* Rollback dlimit allocation */
6404 +       if (rc && dlimit_allocation)
6405 +               DLIMIT_FREE_BLOCK(ip, dlimit_allocation);
6406         /* Rollback quota allocation */
6407         if (rc && quota_allocation)
6408                 DQUOT_FREE_BLOCK(ip, quota_allocation);
6409 @@ -1368,6 +1386,12 @@ static int dtSplitPage(tid_t tid, struct
6410                 release_metapage(rmp);
6411                 return -EDQUOT;
6412         }
6413 +       /* Allocate blocks to dlimit. */
6414 +       if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6415 +               DQUOT_FREE_BLOCK(ip, lengthPXD(pxd));
6416 +               release_metapage(rmp);
6417 +               return -ENOSPC;
6418 +       }
6419  
6420         jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
6421  
6422 @@ -1918,6 +1942,12 @@ static int dtSplitRoot(tid_t tid,
6423                 release_metapage(rmp);
6424                 return -EDQUOT;
6425         }
6426 +       /* Allocate blocks to dlimit. */
6427 +       if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6428 +               DQUOT_FREE_BLOCK(ip, lengthPXD(pxd));
6429 +               release_metapage(rmp);
6430 +               return -ENOSPC;
6431 +       }
6432  
6433         BT_MARK_DIRTY(rmp, ip);
6434         /*
6435 @@ -2284,6 +2314,8 @@ static int dtDeleteUp(tid_t tid, struct 
6436  
6437         xlen = lengthPXD(&fp->header.self);
6438  
6439 +       /* Free dlimit allocation. */
6440 +       DLIMIT_FREE_BLOCK(ip, xlen);
6441         /* Free quota allocation. */
6442         DQUOT_FREE_BLOCK(ip, xlen);
6443  
6444 @@ -2360,6 +2392,8 @@ static int dtDeleteUp(tid_t tid, struct 
6445  
6446                                 xlen = lengthPXD(&p->header.self);
6447  
6448 +                               /* Free dlimit allocation */
6449 +                               DLIMIT_FREE_BLOCK(ip, xlen);
6450                                 /* Free quota allocation */
6451                                 DQUOT_FREE_BLOCK(ip, xlen);
6452  
6453 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_extent.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_extent.c
6454 --- linux-2.6.16.20/fs/jfs/jfs_extent.c 2005-08-29 22:25:32 +0200
6455 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_extent.c    2006-04-26 19:07:00 +0200
6456 @@ -18,6 +18,7 @@
6457  
6458  #include <linux/fs.h>
6459  #include <linux/quotaops.h>
6460 +#include <linux/vs_dlimit.h>
6461  #include "jfs_incore.h"
6462  #include "jfs_inode.h"
6463  #include "jfs_superblock.h"
6464 @@ -146,6 +147,13 @@ extAlloc(struct inode *ip, s64 xlen, s64
6465                 up(&JFS_IP(ip)->commit_sem);
6466                 return -EDQUOT;
6467         }
6468 +       /* Allocate blocks to dlimit. */
6469 +       if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) {
6470 +               DQUOT_FREE_BLOCK(ip, nxlen);
6471 +               dbFree(ip, nxaddr, (s64) nxlen);
6472 +               up(&JFS_IP(ip)->commit_sem);
6473 +               return -ENOSPC;
6474 +       }
6475  
6476         /* determine the value of the extent flag */
6477         xflag = (abnr == TRUE) ? XAD_NOTRECORDED : 0;
6478 @@ -164,6 +172,7 @@ extAlloc(struct inode *ip, s64 xlen, s64
6479          */
6480         if (rc) {
6481                 dbFree(ip, nxaddr, nxlen);
6482 +               DLIMIT_FREE_BLOCK(ip, nxlen);
6483                 DQUOT_FREE_BLOCK(ip, nxlen);
6484                 up(&JFS_IP(ip)->commit_sem);
6485                 return (rc);
6486 @@ -261,6 +270,13 @@ int extRealloc(struct inode *ip, s64 nxl
6487                 up(&JFS_IP(ip)->commit_sem);
6488                 return -EDQUOT;
6489         }
6490 +       /* Allocate blocks to dlimit. */
6491 +       if (DLIMIT_ALLOC_BLOCK(ip, nxlen)) {
6492 +               DQUOT_FREE_BLOCK(ip, nxlen);
6493 +               dbFree(ip, nxaddr, (s64) nxlen);
6494 +               up(&JFS_IP(ip)->commit_sem);
6495 +               return -ENOSPC;
6496 +       }
6497  
6498         delta = nxlen - xlen;
6499  
6500 @@ -297,6 +313,7 @@ int extRealloc(struct inode *ip, s64 nxl
6501                 /* extend the extent */
6502                 if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
6503                         dbFree(ip, xaddr + xlen, delta);
6504 +                       DLIMIT_FREE_BLOCK(ip, nxlen);
6505                         DQUOT_FREE_BLOCK(ip, nxlen);
6506                         goto exit;
6507                 }
6508 @@ -308,6 +325,7 @@ int extRealloc(struct inode *ip, s64 nxl
6509                  */
6510                 if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
6511                         dbFree(ip, nxaddr, nxlen);
6512 +                       DLIMIT_FREE_BLOCK(ip, nxlen);
6513                         DQUOT_FREE_BLOCK(ip, nxlen);
6514                         goto exit;
6515                 }
6516 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_filsys.h linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_filsys.h
6517 --- linux-2.6.16.20/fs/jfs/jfs_filsys.h 2005-10-28 20:49:44 +0200
6518 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_filsys.h    2006-04-26 19:07:00 +0200
6519 @@ -84,6 +84,7 @@
6520  #define JFS_DIR_INDEX          0x00200000      /* Persistant index for */
6521                                                 /* directory entries    */
6522  
6523 +#define JFS_TAGGED             0x00800000      /* Context Tagging */
6524  
6525  /*
6526   *     buffer cache configuration
6527 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_imap.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_imap.c
6528 --- linux-2.6.16.20/fs/jfs/jfs_imap.c   2006-04-09 13:49:53 +0200
6529 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_imap.c      2006-04-26 19:07:00 +0200
6530 @@ -45,6 +45,7 @@
6531  #include <linux/buffer_head.h>
6532  #include <linux/pagemap.h>
6533  #include <linux/quotaops.h>
6534 +#include <linux/vserver/tag.h>
6535  
6536  #include "jfs_incore.h"
6537  #include "jfs_inode.h"
6538 @@ -3074,14 +3075,21 @@ static void duplicateIXtree(struct super
6539  static int copy_from_dinode(struct dinode * dip, struct inode *ip)
6540  {
6541         struct jfs_inode_info *jfs_ip = JFS_IP(ip);
6542 +       uid_t uid;
6543 +       gid_t gid;
6544  
6545         jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
6546         jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
6547  
6548         ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
6549         ip->i_nlink = le32_to_cpu(dip->di_nlink);
6550 -       ip->i_uid = le32_to_cpu(dip->di_uid);
6551 -       ip->i_gid = le32_to_cpu(dip->di_gid);
6552 +
6553 +       uid = le32_to_cpu(dip->di_uid);
6554 +       gid = le32_to_cpu(dip->di_gid);
6555 +       ip->i_uid = INOTAG_UID(DX_TAG(ip), uid, gid);
6556 +       ip->i_gid = INOTAG_GID(DX_TAG(ip), uid, gid);
6557 +       ip->i_tag = INOTAG_TAG(DX_TAG(ip), uid, gid, 0);
6558 +
6559         ip->i_size = le64_to_cpu(dip->di_size);
6560         ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
6561         ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
6562 @@ -3132,6 +3140,8 @@ static int copy_from_dinode(struct dinod
6563  static void copy_to_dinode(struct dinode * dip, struct inode *ip)
6564  {
6565         struct jfs_inode_info *jfs_ip = JFS_IP(ip);
6566 +       uid_t uid;
6567 +       gid_t gid;
6568  
6569         dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
6570         dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp);
6571 @@ -3140,8 +3150,11 @@ static void copy_to_dinode(struct dinode
6572         dip->di_size = cpu_to_le64(ip->i_size);
6573         dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
6574         dip->di_nlink = cpu_to_le32(ip->i_nlink);
6575 -       dip->di_uid = cpu_to_le32(ip->i_uid);
6576 -       dip->di_gid = cpu_to_le32(ip->i_gid);
6577 +
6578 +       uid = TAGINO_UID(DX_TAG(ip), ip->i_uid, ip->i_tag);
6579 +       gid = TAGINO_GID(DX_TAG(ip), ip->i_gid, ip->i_tag);
6580 +       dip->di_uid = cpu_to_le32(uid);
6581 +       dip->di_gid = cpu_to_le32(gid);
6582         /*
6583          * mode2 is only needed for storing the higher order bits.
6584          * Trust i_mode for the lower order ones
6585 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_inode.c
6586 --- linux-2.6.16.20/fs/jfs/jfs_inode.c  2005-08-29 22:25:32 +0200
6587 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_inode.c     2006-04-26 19:07:00 +0200
6588 @@ -18,6 +18,8 @@
6589  
6590  #include <linux/fs.h>
6591  #include <linux/quotaops.h>
6592 +#include <linux/vs_dlimit.h>
6593 +#include <linux/vs_tag.h>
6594  #include "jfs_incore.h"
6595  #include "jfs_inode.h"
6596  #include "jfs_filsys.h"
6597 @@ -25,6 +27,66 @@
6598  #include "jfs_dinode.h"
6599  #include "jfs_debug.h"
6600  
6601 +
6602 +void jfs_set_inode_flags(struct inode *inode)
6603 +{
6604 +       unsigned int flags = JFS_IP(inode)->mode2;
6605 +
6606 +       inode->i_flags &= ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER |
6607 +               S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
6608 +
6609 +       if (flags & JFS_IMMUTABLE_FL)
6610 +               inode->i_flags |= S_IMMUTABLE;
6611 +       if (flags & JFS_IUNLINK_FL)
6612 +               inode->i_flags |= S_IUNLINK;
6613 +       if (flags & JFS_BARRIER_FL)
6614 +               inode->i_flags |= S_BARRIER;
6615 +
6616 +       if (flags & JFS_SYNC_FL)
6617 +               inode->i_flags |= S_SYNC;
6618 +       if (flags & JFS_APPEND_FL)
6619 +               inode->i_flags |= S_APPEND;
6620 +       if (flags & JFS_NOATIME_FL)
6621 +               inode->i_flags |= S_NOATIME;
6622 +       if (flags & JFS_DIRSYNC_FL)
6623 +               inode->i_flags |= S_DIRSYNC;
6624 +}
6625 +
6626 +int jfs_sync_flags(struct inode *inode)
6627 +{
6628 +       unsigned int oldflags, newflags;
6629 +
6630 +       oldflags = JFS_IP(inode)->mode2;
6631 +       newflags = oldflags & ~(JFS_APPEND_FL |
6632 +               JFS_IMMUTABLE_FL | JFS_IUNLINK_FL |
6633 +               JFS_BARRIER_FL | JFS_NOATIME_FL |
6634 +               JFS_SYNC_FL | JFS_DIRSYNC_FL);
6635 +
6636 +       if (IS_APPEND(inode))
6637 +               newflags |= JFS_APPEND_FL;
6638 +       if (IS_IMMUTABLE(inode))
6639 +               newflags |= JFS_IMMUTABLE_FL;
6640 +       if (IS_IUNLINK(inode))
6641 +               newflags |= JFS_IUNLINK_FL;
6642 +       if (IS_BARRIER(inode))
6643 +               newflags |= JFS_BARRIER_FL;
6644 +
6645 +       /* we do not want to copy superblock flags */
6646 +       if (inode->i_flags & S_NOATIME)
6647 +               newflags |= JFS_NOATIME_FL;
6648 +       if (inode->i_flags & S_SYNC)
6649 +               newflags |= JFS_SYNC_FL;
6650 +       if (inode->i_flags & S_DIRSYNC)
6651 +               newflags |= JFS_DIRSYNC_FL;
6652 +
6653 +       if (oldflags ^ newflags) {
6654 +               JFS_IP(inode)->mode2 = newflags;
6655 +               inode->i_ctime = CURRENT_TIME;
6656 +               mark_inode_dirty(inode);
6657 +       }
6658 +       return 0;
6659 +}
6660 +
6661  /*
6662   * NAME:       ialloc()
6663   *
6664 @@ -62,10 +124,17 @@ struct inode *ialloc(struct inode *paren
6665         } else
6666                 inode->i_gid = current->fsgid;
6667  
6668 +       inode->i_tag = dx_current_fstag(sb);
6669 +       if (DLIMIT_ALLOC_INODE(inode)) {
6670 +               iput(inode);
6671 +               return NULL;
6672 +       }
6673 +
6674         /*
6675          * Allocate inode to quota.
6676          */
6677         if (DQUOT_ALLOC_INODE(inode)) {
6678 +               DLIMIT_FREE_INODE(inode);
6679                 DQUOT_DROP(inode);
6680                 inode->i_flags |= S_NOQUOTA;
6681                 inode->i_nlink = 0;
6682 @@ -74,10 +143,20 @@ struct inode *ialloc(struct inode *paren
6683         }
6684  
6685         inode->i_mode = mode;
6686 -       if (S_ISDIR(mode))
6687 -               jfs_inode->mode2 = IDIRECTORY | mode;
6688 +       /* inherit flags from parent */
6689 +       jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT;
6690 +
6691 +       if (S_ISDIR(mode)) {
6692 +               jfs_inode->mode2 |= IDIRECTORY;
6693 +               jfs_inode->mode2 &= ~JFS_DIRSYNC_FL;
6694 +       }
6695 +       else if (S_ISLNK(mode))
6696 +               jfs_inode->mode2 &=
6697 +                       ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL);
6698         else
6699 -               jfs_inode->mode2 = INLINEEA | ISPARSE | mode;
6700 +               jfs_inode->mode2 |= INLINEEA | ISPARSE;
6701 +       jfs_inode->mode2 |= mode;
6702 +
6703         inode->i_blksize = sb->s_blocksize;
6704         inode->i_blocks = 0;
6705         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
6706 @@ -98,6 +177,7 @@ struct inode *ialloc(struct inode *paren
6707         jfs_inode->atlhead = 0;
6708         jfs_inode->atltail = 0;
6709         jfs_inode->xtlid = 0;
6710 +       jfs_set_inode_flags(inode);
6711  
6712         jfs_info("ialloc returns inode = 0x%p\n", inode);
6713  
6714 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_inode.h linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_inode.h
6715 --- linux-2.6.16.20/fs/jfs/jfs_inode.h  2005-08-29 22:25:32 +0200
6716 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_inode.h     2006-04-26 19:07:00 +0200
6717 @@ -20,6 +20,8 @@
6718  
6719  extern struct inode *ialloc(struct inode *, umode_t);
6720  extern int jfs_fsync(struct file *, struct dentry *, int);
6721 +extern int jfs_ioctl(struct inode *, struct file *,
6722 +                       unsigned int, unsigned long);
6723  extern void jfs_read_inode(struct inode *);
6724  extern int jfs_commit_inode(struct inode *, int);
6725  extern int jfs_write_inode(struct inode*, int);
6726 @@ -29,6 +31,8 @@ extern void jfs_truncate(struct inode *)
6727  extern void jfs_truncate_nolock(struct inode *, loff_t);
6728  extern void jfs_free_zero_link(struct inode *);
6729  extern struct dentry *jfs_get_parent(struct dentry *dentry);
6730 +extern int jfs_sync_flags(struct inode *);
6731 +extern void jfs_set_inode_flags(struct inode *);
6732  
6733  extern struct address_space_operations jfs_aops;
6734  extern struct inode_operations jfs_dir_inode_operations;
6735 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/jfs_xtree.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_xtree.c
6736 --- linux-2.6.16.20/fs/jfs/jfs_xtree.c  2006-01-03 17:29:57 +0100
6737 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/jfs_xtree.c     2006-04-26 19:07:00 +0200
6738 @@ -21,6 +21,7 @@
6739  
6740  #include <linux/fs.h>
6741  #include <linux/quotaops.h>
6742 +#include <linux/vs_dlimit.h>
6743  #include "jfs_incore.h"
6744  #include "jfs_filsys.h"
6745  #include "jfs_metapage.h"
6746 @@ -841,7 +842,12 @@ int xtInsert(tid_t tid,            /* transaction 
6747                         hint = 0;
6748                 if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen)))
6749                         goto out;
6750 +               if ((rc = DLIMIT_ALLOC_BLOCK(ip, xlen))) {
6751 +                       DQUOT_FREE_BLOCK(ip, xlen);
6752 +                       goto out;
6753 +               }
6754                 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
6755 +                       DLIMIT_FREE_BLOCK(ip, xlen);
6756                         DQUOT_FREE_BLOCK(ip, xlen);
6757                         goto out;
6758                 }
6759 @@ -871,6 +877,7 @@ int xtInsert(tid_t tid,             /* transaction 
6760                         /* undo data extent allocation */
6761                         if (*xaddrp == 0) {
6762                                 dbFree(ip, xaddr, (s64) xlen);
6763 +                               DLIMIT_FREE_BLOCK(ip, xlen);
6764                                 DQUOT_FREE_BLOCK(ip, xlen);
6765                         }
6766                         return rc;
6767 @@ -1231,6 +1238,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
6768         struct tlock *tlck;
6769         struct xtlock *sxtlck = NULL, *rxtlck = NULL;
6770         int quota_allocation = 0;
6771 +       int dlimit_allocation = 0;
6772  
6773         smp = split->mp;
6774         sp = XT_PAGE(ip, smp);
6775 @@ -1243,13 +1251,20 @@ xtSplitPage(tid_t tid, struct inode *ip,
6776         rbn = addressPXD(pxd);
6777  
6778         /* Allocate blocks to quota. */
6779 -       if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6780 +       if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6781                rc = -EDQUOT;
6782                goto clean_up;
6783         }
6784  
6785         quota_allocation += lengthPXD(pxd);
6786  
6787 +       /* Allocate blocks to dlimit. */
6788 +       if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6789 +              rc = -ENOSPC;
6790 +              goto clean_up;
6791 +       }
6792 +       dlimit_allocation += lengthPXD(pxd);
6793 +
6794         /*
6795          * allocate the new right page for the split
6796          */
6797 @@ -1451,6 +1466,9 @@ xtSplitPage(tid_t tid, struct inode *ip,
6798  
6799        clean_up:
6800  
6801 +       /* Rollback dlimit allocation. */
6802 +       if (dlimit_allocation)
6803 +               DLIMIT_FREE_BLOCK(ip, dlimit_allocation);
6804         /* Rollback quota allocation. */
6805         if (quota_allocation)
6806                 DQUOT_FREE_BLOCK(ip, quota_allocation);
6807 @@ -1515,6 +1533,12 @@ xtSplitRoot(tid_t tid,
6808                 release_metapage(rmp);
6809                 return -EDQUOT;
6810         }
6811 +       /* Allocate blocks to dlimit. */
6812 +       if (DLIMIT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
6813 +               DQUOT_FREE_BLOCK(ip, lengthPXD(pxd));
6814 +               release_metapage(rmp);
6815 +               return -ENOSPC;
6816 +       }
6817  
6818         jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
6819  
6820 @@ -3941,6 +3965,8 @@ s64 xtTruncate(tid_t tid, struct inode *
6821         else
6822                 ip->i_size = newsize;
6823  
6824 +       /* update dlimit allocation to reflect freed blocks */
6825 +       DLIMIT_FREE_BLOCK(ip, nfreed);
6826         /* update quota allocation to reflect freed blocks */
6827         DQUOT_FREE_BLOCK(ip, nfreed);
6828  
6829 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/namei.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/namei.c
6830 --- linux-2.6.16.20/fs/jfs/namei.c      2006-01-03 17:29:57 +0100
6831 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/namei.c 2006-04-26 19:07:00 +0200
6832 @@ -20,6 +20,7 @@
6833  #include <linux/fs.h>
6834  #include <linux/ctype.h>
6835  #include <linux/quotaops.h>
6836 +#include <linux/vserver/tag.h>
6837  #include "jfs_incore.h"
6838  #include "jfs_superblock.h"
6839  #include "jfs_inode.h"
6840 @@ -1465,6 +1466,7 @@ static struct dentry *jfs_lookup(struct 
6841                 return ERR_PTR(-EACCES);
6842         }
6843  
6844 +       dx_propagate_tag(nd, ip);
6845         dentry = d_splice_alias(ip, dentry);
6846  
6847         if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
6848 @@ -1517,12 +1519,14 @@ struct inode_operations jfs_dir_inode_op
6849         .setattr        = jfs_setattr,
6850         .permission     = jfs_permission,
6851  #endif
6852 +       .sync_flags     = jfs_sync_flags,
6853  };
6854  
6855  struct file_operations jfs_dir_operations = {
6856         .read           = generic_read_dir,
6857         .readdir        = jfs_readdir,
6858         .fsync          = jfs_fsync,
6859 +       .ioctl          = jfs_ioctl,
6860  };
6861  
6862  static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
6863 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/super.c
6864 --- linux-2.6.16.20/fs/jfs/super.c      2006-02-18 14:40:22 +0100
6865 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/super.c 2006-04-26 19:07:00 +0200
6866 @@ -195,7 +195,7 @@ static void jfs_put_super(struct super_b
6867  enum {
6868         Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
6869         Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
6870 -       Opt_usrquota, Opt_grpquota
6871 +       Opt_usrquota, Opt_grpquota, Opt_tag, Opt_notag, Opt_tagid
6872  };
6873  
6874  static match_table_t tokens = {
6875 @@ -205,6 +205,10 @@ static match_table_t tokens = {
6876         {Opt_resize, "resize=%u"},
6877         {Opt_resize_nosize, "resize"},
6878         {Opt_errors, "errors=%s"},
6879 +       {Opt_tag, "tag"},
6880 +       {Opt_notag, "notag"},
6881 +       {Opt_tagid, "tagid=%u"},
6882 +       {Opt_tag, "tagxid"},
6883         {Opt_ignore, "noquota"},
6884         {Opt_ignore, "quota"},
6885         {Opt_usrquota, "usrquota"},
6886 @@ -313,6 +317,20 @@ static int parse_options(char *options, 
6887                                "JFS: quota operations not supported\n");
6888                         break;
6889  #endif
6890 +#ifndef CONFIG_TAGGING_NONE
6891 +               case Opt_tag:
6892 +                       *flag |= JFS_TAGGED;
6893 +                       break;
6894 +               case Opt_notag:
6895 +                       *flag &= JFS_TAGGED;
6896 +                       break;
6897 +#endif
6898 +#ifdef CONFIG_PROPAGATE
6899 +               case Opt_tagid:
6900 +                       /* use args[0] */
6901 +                       *flag |= JFS_TAGGED;
6902 +                       break;
6903 +#endif
6904  
6905                 default:
6906                         printk("jfs: Unrecognized mount option \"%s\" "
6907 @@ -344,6 +362,13 @@ static int jfs_remount(struct super_bloc
6908         if (!parse_options(data, sb, &newLVSize, &flag)) {
6909                 return -EINVAL;
6910         }
6911 +
6912 +       if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
6913 +               printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
6914 +                       sb->s_id);
6915 +               return -EINVAL;
6916 +       }
6917 +
6918         if (newLVSize) {
6919                 if (sb->s_flags & MS_RDONLY) {
6920                         printk(KERN_ERR
6921 @@ -415,6 +440,9 @@ static int jfs_fill_super(struct super_b
6922  #ifdef CONFIG_JFS_POSIX_ACL
6923         sb->s_flags |= MS_POSIXACL;
6924  #endif
6925 +       /* map mount option tagxid */
6926 +       if (sbi->flag & JFS_TAGGED)
6927 +               sb->s_flags |= MS_TAGGED;
6928  
6929         if (newLVSize) {
6930                 printk(KERN_ERR "resize option for remount only\n");
6931 diff -NurpP --minimal linux-2.6.16.20/fs/jfs/xattr.c linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/xattr.c
6932 --- linux-2.6.16.20/fs/jfs/xattr.c      2006-02-18 14:40:22 +0100
6933 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/jfs/xattr.c 2006-04-26 19:07:00 +0200
6934 @@ -23,6 +23,7 @@
6935  #include <linux/posix_acl_xattr.h>
6936  #include <linux/quotaops.h>
6937  #include <linux/security.h>
6938 +#include <linux/vs_dlimit.h>
6939  #include "jfs_incore.h"
6940  #include "jfs_superblock.h"
6941  #include "jfs_dmap.h"
6942 @@ -263,9 +264,16 @@ static int ea_write(struct inode *ip, st
6943         if (DQUOT_ALLOC_BLOCK(ip, nblocks)) {
6944                 return -EDQUOT;
6945         }
6946 +       /* Allocate new blocks to dlimit. */
6947 +       if (DLIMIT_ALLOC_BLOCK(ip, nblocks)) {
6948 +               DQUOT_FREE_BLOCK(ip, nblocks);
6949 +               return -ENOSPC;
6950 +       }
6951  
6952         rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno);
6953         if (rc) {
6954 +               /*Rollback dlimit allocation. */
6955 +               DLIMIT_FREE_BLOCK(ip, nblocks);
6956                 /*Rollback quota allocation. */
6957                 DQUOT_FREE_BLOCK(ip, nblocks);
6958                 return rc;
6959 @@ -332,6 +340,8 @@ static int ea_write(struct inode *ip, st
6960  
6961        failed:
6962         /* Rollback quota allocation. */
6963 +       DLIMIT_FREE_BLOCK(ip, nblocks);
6964 +       /* Rollback quota allocation. */
6965         DQUOT_FREE_BLOCK(ip, nblocks);
6966  
6967         dbFree(ip, blkno, nblocks);
6968 @@ -468,6 +478,7 @@ static int ea_get(struct inode *inode, s
6969         s64 blkno;
6970         int rc;
6971         int quota_allocation = 0;
6972 +       int dlimit_allocation = 0;
6973  
6974         /* When fsck.jfs clears a bad ea, it doesn't clear the size */
6975         if (ji->ea.flag == 0)
6976 @@ -543,6 +554,12 @@ static int ea_get(struct inode *inode, s
6977  
6978                 quota_allocation = blocks_needed;
6979  
6980 +               /* Allocate new blocks to dlimit. */
6981 +               rc = -ENOSPC;
6982 +               if (DLIMIT_ALLOC_BLOCK(inode, blocks_needed))
6983 +                       goto clean_up;
6984 +               dlimit_allocation = blocks_needed;
6985 +
6986                 rc = dbAlloc(inode, INOHINT(inode), (s64) blocks_needed,
6987                              &blkno);
6988                 if (rc)
6989 @@ -599,6 +616,9 @@ static int ea_get(struct inode *inode, s
6990         return ea_size;
6991  
6992        clean_up:
6993 +       /* Rollback dlimit allocation */
6994 +       if (dlimit_allocation)
6995 +               DLIMIT_FREE_BLOCK(inode, dlimit_allocation);
6996         /* Rollback quota allocation */
6997         if (quota_allocation)
6998                 DQUOT_FREE_BLOCK(inode, quota_allocation);
6999 @@ -675,8 +695,10 @@ static int ea_put(tid_t tid, struct inod
7000         }
7001  
7002         /* If old blocks exist, they must be removed from quota allocation. */
7003 -       if (old_blocks)
7004 +       if (old_blocks) {
7005 +               DLIMIT_FREE_BLOCK(inode, old_blocks);
7006                 DQUOT_FREE_BLOCK(inode, old_blocks);
7007 +       }
7008  
7009         inode->i_ctime = CURRENT_TIME;
7010  
7011 diff -NurpP --minimal linux-2.6.16.20/fs/libfs.c linux-2.6.16.20-vs2.1.1-rc22/fs/libfs.c
7012 --- linux-2.6.16.20/fs/libfs.c  2006-02-18 14:40:22 +0100
7013 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/libfs.c     2006-04-26 19:07:00 +0200
7014 @@ -122,7 +122,8 @@ static inline unsigned char dt_type(stru
7015   * both impossible due to the lock on directory.
7016   */
7017  
7018 -int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
7019 +static inline int do_dcache_readdir_filter(struct file * filp,
7020 +       void * dirent, filldir_t filldir, int (*filter)(struct dentry *dentry))
7021  {
7022         struct dentry *dentry = filp->f_dentry;
7023         struct dentry *cursor = filp->private_data;
7024 @@ -156,6 +157,8 @@ int dcache_readdir(struct file * filp, v
7025                                 next = list_entry(p, struct dentry, d_u.d_child);
7026                                 if (d_unhashed(next) || !next->d_inode)
7027                                         continue;
7028 +                               if (filter && !filter(next))
7029 +                                       continue;
7030  
7031                                 spin_unlock(&dcache_lock);
7032                                 if (filldir(dirent, next->d_name.name, next->d_name.len, filp->f_pos, next->d_inode->i_ino, dt_type(next->d_inode)) < 0)
7033 @@ -172,6 +175,18 @@ int dcache_readdir(struct file * filp, v
7034         return 0;
7035  }
7036  
7037 +int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
7038 +{
7039 +       return do_dcache_readdir_filter(filp, dirent, filldir, NULL);
7040 +}
7041 +
7042 +int dcache_readdir_filter(struct file * filp, void * dirent, filldir_t filldir,
7043 +       int (*filter)(struct dentry *))
7044 +{
7045 +       return do_dcache_readdir_filter(filp, dirent, filldir, filter);
7046 +}
7047 +
7048 +
7049  ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
7050  {
7051         return -EISDIR;
7052 @@ -621,6 +636,7 @@ EXPORT_SYMBOL(dcache_dir_close);
7053  EXPORT_SYMBOL(dcache_dir_lseek);
7054  EXPORT_SYMBOL(dcache_dir_open);
7055  EXPORT_SYMBOL(dcache_readdir);
7056 +EXPORT_SYMBOL(dcache_readdir_filter);
7057  EXPORT_SYMBOL(generic_read_dir);
7058  EXPORT_SYMBOL(get_sb_pseudo);
7059  EXPORT_SYMBOL(simple_commit_write);
7060 diff -NurpP --minimal linux-2.6.16.20/fs/lockd/clntproc.c linux-2.6.16.20-vs2.1.1-rc22/fs/lockd/clntproc.c
7061 --- linux-2.6.16.20/fs/lockd/clntproc.c 2006-04-09 13:49:53 +0200
7062 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/lockd/clntproc.c    2006-04-26 19:07:00 +0200
7063 @@ -14,6 +14,7 @@
7064  #include <linux/nfs_fs.h>
7065  #include <linux/utsname.h>
7066  #include <linux/smp_lock.h>
7067 +#include <linux/vs_cvirt.h>
7068  #include <linux/sunrpc/clnt.h>
7069  #include <linux/sunrpc/svc.h>
7070  #include <linux/lockd/lockd.h>
7071 @@ -130,10 +131,10 @@ static void nlmclnt_setlockargs(struct n
7072         nlmclnt_next_cookie(&argp->cookie);
7073         argp->state   = nsm_local_state;
7074         memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
7075 -       lock->caller  = system_utsname.nodename;
7076 +       lock->caller  = vx_new_uts(nodename);
7077         lock->oh.data = req->a_owner;
7078         lock->oh.len  = sprintf(req->a_owner, "%d@%s",
7079 -                               current->pid, system_utsname.nodename);
7080 +                               current->pid, vx_new_uts(nodename));
7081         locks_copy_lock(&lock->fl, fl);
7082  }
7083  
7084 @@ -154,7 +155,7 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca
7085  {
7086         locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
7087         memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
7088 -       call->a_args.lock.caller = system_utsname.nodename;
7089 +       call->a_args.lock.caller = vx_new_uts(nodename);
7090         call->a_args.lock.oh.len = lock->oh.len;
7091  
7092         /* set default data area */
7093 diff -NurpP --minimal linux-2.6.16.20/fs/lockd/mon.c linux-2.6.16.20-vs2.1.1-rc22/fs/lockd/mon.c
7094 --- linux-2.6.16.20/fs/lockd/mon.c      2006-01-18 06:08:30 +0100
7095 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/lockd/mon.c 2006-04-26 19:07:00 +0200
7096 @@ -13,6 +13,7 @@
7097  #include <linux/sunrpc/svc.h>
7098  #include <linux/lockd/lockd.h>
7099  #include <linux/lockd/sm_inter.h>
7100 +#include <linux/vs_cvirt.h>
7101  
7102  
7103  #define NLMDBG_FACILITY                NLMDBG_MONITOR
7104 @@ -147,7 +148,7 @@ xdr_encode_common(struct rpc_rqst *rqstp
7105          */
7106         sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
7107         if (!(p = xdr_encode_string(p, buffer))
7108 -        || !(p = xdr_encode_string(p, system_utsname.nodename)))
7109 +        || !(p = xdr_encode_string(p, vx_new_uts(nodename))))
7110                 return ERR_PTR(-EIO);
7111         *p++ = htonl(argp->prog);
7112         *p++ = htonl(argp->vers);
7113 diff -NurpP --minimal linux-2.6.16.20/fs/locks.c linux-2.6.16.20-vs2.1.1-rc22/fs/locks.c
7114 --- linux-2.6.16.20/fs/locks.c  2006-05-22 16:25:40 +0200
7115 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/locks.c     2006-06-02 17:45:59 +0200
7116 @@ -125,6 +125,7 @@
7117  #include <linux/syscalls.h>
7118  #include <linux/time.h>
7119  #include <linux/rcupdate.h>
7120 +#include <linux/vs_limit.h>
7121  
7122  #include <asm/semaphore.h>
7123  #include <asm/uaccess.h>
7124 @@ -150,6 +151,8 @@ static kmem_cache_t *filelock_cache;
7125  /* Allocate an empty lock structure. */
7126  static struct file_lock *locks_alloc_lock(void)
7127  {
7128 +       if (!vx_locks_avail(1))
7129 +               return NULL;
7130         return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
7131  }
7132  
7133 @@ -160,6 +163,9 @@ static void locks_free_lock(struct file_
7134                 BUG();
7135                 return;
7136         }
7137 +
7138 +       vx_locks_dec(fl);
7139 +
7140         if (waitqueue_active(&fl->fl_wait))
7141                 panic("Attempting to free lock with active wait queue");
7142  
7143 @@ -199,6 +205,7 @@ void locks_init_lock(struct file_lock *f
7144         fl->fl_start = fl->fl_end = 0;
7145         fl->fl_ops = NULL;
7146         fl->fl_lmops = NULL;
7147 +       fl->fl_xid = -1;
7148  }
7149  
7150  EXPORT_SYMBOL(locks_init_lock);
7151 @@ -236,6 +243,8 @@ void locks_copy_lock(struct file_lock *n
7152                 fl->fl_ops->fl_copy_lock(new, fl);
7153         if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock)
7154                 fl->fl_lmops->fl_copy_lock(new, fl);
7155 +
7156 +       new->fl_xid = fl->fl_xid;
7157  }
7158  
7159  EXPORT_SYMBOL(locks_copy_lock);
7160 @@ -272,6 +281,11 @@ static int flock_make_lock(struct file *
7161         fl->fl_flags = FL_FLOCK;
7162         fl->fl_type = type;
7163         fl->fl_end = OFFSET_MAX;
7164 +
7165 +       vxd_assert(filp->f_xid == vx_current_xid(),
7166 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
7167 +       fl->fl_xid = filp->f_xid;
7168 +       vx_locks_inc(fl);
7169         
7170         *lock = fl;
7171         return 0;
7172 @@ -437,6 +451,7 @@ static int lease_init(struct file *filp,
7173  
7174         fl->fl_owner = current->files;
7175         fl->fl_pid = current->tgid;
7176 +       fl->fl_xid = vx_current_xid();
7177  
7178         fl->fl_file = filp;
7179         fl->fl_flags = FL_LEASE;
7180 @@ -456,6 +471,11 @@ static int lease_alloc(struct file *filp
7181         if (fl == NULL)
7182                 goto out;
7183  
7184 +       fl->fl_xid = vx_current_xid();
7185 +       if (filp)
7186 +               vxd_assert(filp->f_xid == fl->fl_xid,
7187 +                       "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
7188 +       vx_locks_inc(fl);
7189         error = lease_init(filp, type, fl);
7190         if (error) {
7191                 locks_free_lock(fl);
7192 @@ -765,6 +785,7 @@ static int flock_lock_file(struct file *
7193                 goto out;
7194         }
7195         locks_copy_lock(new_fl, request);
7196 +       vx_locks_inc(new_fl);
7197         locks_insert_lock(&inode->i_flock, new_fl);
7198         new_fl = NULL;
7199  
7200 @@ -777,7 +798,7 @@ out:
7201  
7202  EXPORT_SYMBOL(posix_lock_file);
7203  
7204 -static int __posix_lock_file(struct inode *inode, struct file_lock *request)
7205 +static int __posix_lock_file(struct inode *inode, struct file_lock *request, xid_t xid)
7206  {
7207         struct file_lock *fl;
7208         struct file_lock *new_fl, *new_fl2;
7209 @@ -786,12 +807,18 @@ static int __posix_lock_file(struct inod
7210         struct file_lock **before;
7211         int error, added = 0;
7212  
7213 +       vxd_assert(xid == vx_current_xid(),
7214 +               "xid(%d) == current(%d)", xid, vx_current_xid());
7215         /*
7216          * We may need two file_lock structures for this operation,
7217          * so we get them in advance to avoid races.
7218          */
7219         new_fl = locks_alloc_lock();
7220 +       new_fl->fl_xid = xid;
7221 +       vx_locks_inc(new_fl);
7222         new_fl2 = locks_alloc_lock();
7223 +       new_fl2->fl_xid = xid;
7224 +       vx_locks_inc(new_fl2);
7225  
7226         lock_kernel();
7227         if (request->fl_type != F_UNLCK) {
7228 @@ -969,7 +996,7 @@ static int __posix_lock_file(struct inod
7229   */
7230  int posix_lock_file(struct file *filp, struct file_lock *fl)
7231  {
7232 -       return __posix_lock_file(filp->f_dentry->d_inode, fl);
7233 +       return __posix_lock_file(filp->f_dentry->d_inode, fl, filp->f_xid);
7234  }
7235  
7236  /**
7237 @@ -986,7 +1013,8 @@ int posix_lock_file_wait(struct file *fi
7238         int error;
7239         might_sleep ();
7240         for (;;) {
7241 -               error = __posix_lock_file(filp->f_dentry->d_inode, fl);
7242 +               error = __posix_lock_file(filp->f_dentry->d_inode,
7243 +                       fl, filp->f_xid);
7244                 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
7245                         break;
7246                 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
7247 @@ -1058,7 +1086,7 @@ int locks_mandatory_area(int read_write,
7248         fl.fl_end = offset + count - 1;
7249  
7250         for (;;) {
7251 -               error = __posix_lock_file(inode, &fl);
7252 +               error = __posix_lock_file(inode, &fl, filp->f_xid);
7253                 if (error != -EAGAIN)
7254                         break;
7255                 if (!(fl.fl_flags & FL_SLEEP))
7256 @@ -1618,6 +1646,11 @@ int fcntl_setlk(unsigned int fd, struct 
7257         if (file_lock == NULL)
7258                 return -ENOLCK;
7259  
7260 +       vxd_assert(filp->f_xid == vx_current_xid(),
7261 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
7262 +       file_lock->fl_xid = filp->f_xid;
7263 +       vx_locks_inc(file_lock);
7264 +
7265         /*
7266          * This might block, so we do it before checking the inode.
7267          */
7268 @@ -1670,7 +1703,8 @@ again:
7269                 error = filp->f_op->lock(filp, cmd, file_lock);
7270         else {
7271                 for (;;) {
7272 -                       error = __posix_lock_file(inode, file_lock);
7273 +                       error = __posix_lock_file(inode, file_lock,
7274 +                               filp->f_xid);
7275                         if ((error != -EAGAIN) || (cmd == F_SETLK))
7276                                 break;
7277                         error = wait_event_interruptible(file_lock->fl_wait,
7278 @@ -1761,6 +1795,11 @@ int fcntl_setlk64(unsigned int fd, struc
7279         if (file_lock == NULL)
7280                 return -ENOLCK;
7281  
7282 +       vxd_assert(filp->f_xid == vx_current_xid(),
7283 +               "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
7284 +       file_lock->fl_xid = filp->f_xid;
7285 +       vx_locks_inc(file_lock);
7286 +
7287         /*
7288          * This might block, so we do it before checking the inode.
7289          */
7290 @@ -1813,7 +1852,8 @@ again:
7291                 error = filp->f_op->lock(filp, cmd, file_lock);
7292         else {
7293                 for (;;) {
7294 -                       error = __posix_lock_file(inode, file_lock);
7295 +                       error = __posix_lock_file(inode, file_lock,
7296 +                               filp->f_xid);
7297                         if ((error != -EAGAIN) || (cmd == F_SETLK64))
7298                                 break;
7299                         error = wait_event_interruptible(file_lock->fl_wait,
7300 @@ -2086,6 +2126,10 @@ int get_locks_status(char *buffer, char 
7301         list_for_each(tmp, &file_lock_list) {
7302                 struct list_head *btmp;
7303                 struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
7304 +
7305 +               if (!vx_check(fl->fl_xid, VX_IDENT|VX_WATCH))
7306 +                       continue;
7307 +
7308                 lock_get_status(q, fl, ++i, "");
7309                 move_lock_status(&q, &pos, offset);
7310  
7311 --- linux-2.6.16.59/fs/namei.c~ 2008-01-19 19:52:57.000000000 +0200
7312 +++ linux-2.6.16.59/fs/namei.c  2008-01-19 19:55:48.031266357 +0200
7313 @@ -32,6 +32,10 @@
7314  #include <linux/file.h>
7315  #include <linux/fcntl.h>
7316  #include <linux/namei.h>
7317 +#include <linux/proc_fs.h>
7318 +#include <linux/vserver/inode.h>
7319 +#include <linux/vs_tag.h>
7320 +#include <linux/vserver/debug.h>
7321  #include <asm/namei.h>
7322  #include <asm/uaccess.h>
7323  
7324 @@ -225,6 +229,24 @@ int generic_permission(struct inode *ino
7325         return -EACCES;
7326  }
7327  
7328 +static inline int dx_permission(struct inode *inode, int mask, struct nameidata *nd)
7329 +{
7330 +       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) {
7331 +               vxwprintk(1, "xid=%d did hit the barrier.",
7332 +                       vx_current_xid());
7333 +               return -EACCES;
7334 +       }
7335 +       if (inode->i_tag == 0)
7336 +               return 0;
7337 +       if (dx_check(inode->i_tag, DX_ADMIN|DX_WATCH|DX_IDENT))
7338 +               return 0;
7339 +
7340 +       vxwprintk(1, "xid=%d denied access to %p[#%d,%lu] »%s«.",
7341 +               vx_current_xid(), inode, inode->i_tag, inode->i_ino,
7342 +               vxd_cond_path(nd));
7343 +       return -EACCES;
7344 +}
7345 +
7346  int permission(struct inode *inode, int mask, struct nameidata *nd)
7347  {
7348         int retval, submask;
7349 @@ -235,7 +257,7 @@ int permission(struct inode *inode, int 
7350                 /*
7351                  * Nobody gets write access to a read-only fs.
7352                  */
7353 -               if (IS_RDONLY(inode) &&
7354 +               if ((IS_RDONLY(inode) || (nd && MNT_IS_RDONLY(nd->mnt))) &&
7355                     (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
7356                         return -EROFS;
7357  
7358 @@ -249,6 +271,8 @@ int permission(struct inode *inode, int 
7359  
7360         /* Ordinary permission routines do not understand MAY_APPEND. */
7361         submask = mask & ~MAY_APPEND;
7362 +       if ((retval = dx_permission(inode, mask, nd)))
7363 +               return retval;
7364         if (inode->i_op && inode->i_op->permission)
7365                 retval = inode->i_op->permission(inode, submask, nd);
7366         else
7367 @@ -702,7 +726,8 @@ static __always_inline void follow_dotdo
7368                 if (nd->dentry == current->fs->root &&
7369                     nd->mnt == current->fs->rootmnt) {
7370                          read_unlock(&current->fs->lock);
7371 -                       break;
7372 +                       /* for sane '/' avoid follow_mount() */
7373 +                       return;
7374                 }
7375                  read_unlock(&current->fs->lock);
7376                 spin_lock(&dcache_lock);
7377 @@ -739,16 +764,34 @@ static int do_lookup(struct nameidata *n
7378  {
7379         struct vfsmount *mnt = nd->mnt;
7380         struct dentry *dentry = __d_lookup(nd->dentry, name);
7381 +       struct inode *inode;
7382  
7383         if (!dentry)
7384                 goto need_lookup;
7385         if (dentry->d_op && dentry->d_op->d_revalidate)
7386                 goto need_revalidate;
7387 +       inode = dentry->d_inode;
7388 +       if (!inode)
7389 +               goto done;
7390 +       if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
7391 +               struct proc_dir_entry *de = PDE(inode);
7392 +
7393 +               if (de && !vx_hide_check(0, de->vx_flags))
7394 +                       goto hidden;
7395 +       }
7396 +       if (!dx_check(inode->i_tag, DX_WATCH|DX_ADMIN|DX_HOSTID|DX_IDENT))
7397 +               goto hidden;
7398  done:
7399         path->mnt = mnt;
7400         path->dentry = dentry;
7401         __follow_mount(path);
7402         return 0;
7403 +hidden:
7404 +       vxwprintk(1, "xid=%d did lookup hidden %p[#%d,%lu] »%s«.",
7405 +               vx_current_xid(), inode, inode->i_tag, inode->i_ino,
7406 +               vxd_path(dentry, mnt));
7407 +       dput(dentry);
7408 +       return -ENOENT;
7409  
7410  need_lookup:
7411         dentry = real_lookup(nd->dentry, name, nd);
7412 @@ -1345,7 +1388,8 @@ static inline int check_sticky(struct in
7413   * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
7414   *     nfs_async_unlink().
7415   */
7416 -static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
7417 +static int may_delete(struct inode *dir, struct dentry *victim,
7418 +       int isdir, struct nameidata *nd)
7419  {
7420         int error;
7421  
7422 @@ -1354,13 +1398,13 @@ static int may_delete(struct inode *dir,
7423  
7424         BUG_ON(victim->d_parent->d_inode != dir);
7425  
7426 -       error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
7427 +       error = permission(dir,MAY_WRITE | MAY_EXEC, nd);
7428         if (error)
7429                 return error;
7430         if (IS_APPEND(dir))
7431                 return -EPERM;
7432         if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
7433 -           IS_IMMUTABLE(victim->d_inode))
7434 +               IS_IXORUNLINK(victim->d_inode))
7435                 return -EPERM;
7436         if (isdir) {
7437                 if (!S_ISDIR(victim->d_inode->i_mode))
7438 @@ -1491,6 +1535,14 @@ int may_open(struct nameidata *nd, int a
7439         if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
7440                 return -EISDIR;
7441  
7442 +#ifdef CONFIG_VSERVER_COWBL
7443 +       if (IS_COW(inode) && (flag & FMODE_WRITE)) {
7444 +               if (IS_COW_LINK(inode))
7445 +                       return -EMLINK;
7446 +               inode->i_flags &= ~(S_IUNLINK|S_IMMUTABLE);
7447 +               mark_inode_dirty(inode);
7448 +       }
7449 +#endif
7450         error = vfs_permission(nd, acc_mode);
7451         if (error)
7452                 return error;
7453 @@ -1560,7 +1560,7 @@
7454                         return -EACCES;
7455  
7456                 flag &= ~O_TRUNC;
7457 -       } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE))
7458 +       } else if ((IS_RDONLY(inode) || MNT_IS_RDONLY(nd->mnt)) && (acc_mode & MAY_WRITE))
7459                 return -EROFS;
7460         /*
7461          * An append-only file must be opened in append mode for writing.
7462 @@ -1555,6 +1608,8 @@ int may_open(struct nameidata *nd, int a
7463         return 0;
7464  }
7465  
7466 +int cow_break_link(struct dentry *dentry, const char *pathname);
7467 +
7468  /*
7469   *     open_namei()
7470   *
7471 @@ -1577,6 +1632,11 @@ int open_namei(int dfd, const char *path
7472         struct dentry *dir;
7473         int count = 0;
7474  
7475 +#ifdef CONFIG_VSERVER_COWBL
7476 +       int rflag = flag;
7477 +       int rmode = mode;
7478 +restart:
7479 +#endif
7480         acc_mode = ACC_MODE(flag);
7481  
7482         /* O_TRUNC implies we need access checks for write permissions */
7483 @@ -1670,6 +1730,18 @@ do_last:
7484                 goto exit;
7485  ok:
7486         error = may_open(nd, acc_mode, flag);
7487 +#ifdef CONFIG_VSERVER_COWBL
7488 +       if (error == -EMLINK) {
7489 +               error = cow_break_link(path.dentry, pathname);
7490 +               if (error)
7491 +                       goto exit;
7492 +               path_release(nd);
7493 +               vxdprintk(VXD_CBIT(misc, 2), "restarting open_namei() ...");
7494 +               flag = rflag;
7495 +               mode = rmode;
7496 +               goto restart;
7497 +       }
7498 +#endif
7499         if (error)
7500                 goto exit;
7501         return 0;
7502 @@ -1773,9 +1845,10 @@ fail:
7503  }
7504  EXPORT_SYMBOL_GPL(lookup_create);
7505  
7506 -int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
7507 +int vfs_mknod(struct inode *dir, struct dentry *dentry,
7508 +       int mode, dev_t dev, struct nameidata *nd)
7509  {
7510 -       int error = may_create(dir, dentry, NULL);
7511 +       int error = may_create(dir, dentry, nd);
7512  
7513         if (error)
7514                 return error;
7515 @@ -1825,11 +1898,12 @@ asmlinkage long sys_mknodat(int dfd, con
7516                         error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd);
7517                         break;
7518                 case S_IFCHR: case S_IFBLK:
7519 -                       error = vfs_mknod(nd.dentry->d_inode,dentry,mode,
7520 -                                       new_decode_dev(dev));
7521 +                       error = vfs_mknod(nd.dentry->d_inode, dentry, mode,
7522 +                                       new_decode_dev(dev), &nd);
7523                         break;
7524                 case S_IFIFO: case S_IFSOCK:
7525 -                       error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0);
7526 +                       error = vfs_mknod(nd.dentry->d_inode, dentry, mode,
7527 +                                       0, &nd);
7528                         break;
7529                 case S_IFDIR:
7530                         error = -EPERM;
7531 @@ -1852,9 +1926,10 @@ asmlinkage long sys_mknod(const char __u
7532         return sys_mknodat(AT_FDCWD, filename, mode, dev);
7533  }
7534  
7535 -int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
7536 +int vfs_mkdir(struct inode *dir, struct dentry *dentry,
7537 +       int mode, struct nameidata *nd)
7538  {
7539 -       int error = may_create(dir, dentry, NULL);
7540 +       int error = may_create(dir, dentry, nd);
7541  
7542         if (error)
7543                 return error;
7544 @@ -1893,7 +1968,8 @@ asmlinkage long sys_mkdirat(int dfd, con
7545                 if (!IS_ERR(dentry)) {
7546                         if (!IS_POSIXACL(nd.dentry->d_inode))
7547                                 mode &= ~current->fs->umask;
7548 -                       error = vfs_mkdir(nd.dentry->d_inode, dentry, mode);
7549 +                       error = vfs_mkdir(nd.dentry->d_inode, dentry,
7550 +                               mode, &nd);
7551                         dput(dentry);
7552                 }
7553                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
7554 @@ -1938,9 +2014,10 @@ void dentry_unhash(struct dentry *dentry
7555         spin_unlock(&dcache_lock);
7556  }
7557  
7558 -int vfs_rmdir(struct inode *dir, struct dentry *dentry)
7559 +int vfs_rmdir(struct inode *dir, struct dentry *dentry,
7560 +       struct nameidata *nd)
7561  {
7562 -       int error = may_delete(dir, dentry, 1);
7563 +       int error = may_delete(dir, dentry, 1, nd);
7564  
7565         if (error)
7566                 return error;
7567 @@ -2001,7 +2078,7 @@ static long do_rmdir(int dfd, const char
7568         dentry = lookup_hash(&nd);
7569         error = PTR_ERR(dentry);
7570         if (!IS_ERR(dentry)) {
7571 -               error = vfs_rmdir(nd.dentry->d_inode, dentry);
7572 +               error = vfs_rmdir(nd.dentry->d_inode, dentry, &nd);
7573                 dput(dentry);
7574         }
7575         mutex_unlock(&nd.dentry->d_inode->i_mutex);
7576 @@ -2017,9 +2094,10 @@ asmlinkage long sys_rmdir(const char __u
7577         return do_rmdir(AT_FDCWD, pathname);
7578  }
7579  
7580 -int vfs_unlink(struct inode *dir, struct dentry *dentry)
7581 +int vfs_unlink(struct inode *dir, struct dentry *dentry,
7582 +       struct nameidata *nd)
7583  {
7584 -       int error = may_delete(dir, dentry, 0);
7585 +       int error = may_delete(dir, dentry, 0, nd);
7586  
7587         if (error)
7588                 return error;
7589 @@ -2081,7 +2159,7 @@ static long do_unlinkat(int dfd, const c
7590                 inode = dentry->d_inode;
7591                 if (inode)
7592                         atomic_inc(&inode->i_count);
7593 -               error = vfs_unlink(nd.dentry->d_inode, dentry);
7594 +               error = vfs_unlink(nd.dentry->d_inode, dentry, &nd);
7595         exit2:
7596                 dput(dentry);
7597         }
7598 @@ -2116,9 +2194,10 @@ asmlinkage long sys_unlink(const char __
7599         return do_unlinkat(AT_FDCWD, pathname);
7600  }
7601  
7602 -int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode)
7603 +int vfs_symlink(struct inode *dir, struct dentry *dentry,
7604 +       const char *oldname, int mode, struct nameidata *nd)
7605  {
7606 -       int error = may_create(dir, dentry, NULL);
7607 +       int error = may_create(dir, dentry, nd);
7608  
7609         if (error)
7610                 return error;
7611 @@ -2159,7 +2238,8 @@ asmlinkage long sys_symlinkat(const char
7612                 dentry = lookup_create(&nd, 0);
7613                 error = PTR_ERR(dentry);
7614                 if (!IS_ERR(dentry)) {
7615 -                       error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO);
7616 +                       error = vfs_symlink(nd.dentry->d_inode, dentry,
7617 +                               from, S_IALLUGO, &nd);
7618                         dput(dentry);
7619                 }
7620                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
7621 @@ -2176,7 +2256,8 @@ asmlinkage long sys_symlink(const char _
7622         return sys_symlinkat(oldname, AT_FDCWD, newname);
7623  }
7624  
7625 -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
7626 +int vfs_link(struct dentry *old_dentry, struct inode *dir,
7627 +       struct dentry *new_dentry, struct nameidata *nd)
7628  {
7629         struct inode *inode = old_dentry->d_inode;
7630         int error;
7631 @@ -2184,7 +2265,7 @@ int vfs_link(struct dentry *old_dentry, 
7632         if (!inode)
7633                 return -ENOENT;
7634  
7635 -       error = may_create(dir, new_dentry, NULL);
7636 +       error = may_create(dir, new_dentry, nd);
7637         if (error)
7638                 return error;
7639  
7640 @@ -2194,7 +2275,7 @@ int vfs_link(struct dentry *old_dentry, 
7641         /*
7642          * A link to an append-only or immutable file cannot be created.
7643          */
7644 -       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
7645 +       if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
7646                 return -EPERM;
7647         if (!dir->i_op || !dir->i_op->link)
7648                 return -EPERM;
7649 @@ -2251,7 +2332,8 @@ asmlinkage long sys_linkat(int olddfd, c
7650         new_dentry = lookup_create(&nd, 0);
7651         error = PTR_ERR(new_dentry);
7652         if (!IS_ERR(new_dentry)) {
7653 -               error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
7654 +               error = vfs_link(old_nd.dentry, nd.dentry->d_inode,
7655 +                       new_dentry, &nd);
7656                 dput(new_dentry);
7657         }
7658         mutex_unlock(&nd.dentry->d_inode->i_mutex);
7659 @@ -2383,14 +2465,14 @@ int vfs_rename(struct inode *old_dir, st
7660         if (old_dentry->d_inode == new_dentry->d_inode)
7661                 return 0;
7662   
7663 -       error = may_delete(old_dir, old_dentry, is_dir);
7664 +       error = may_delete(old_dir, old_dentry, is_dir, NULL);
7665         if (error)
7666                 return error;
7667  
7668         if (!new_dentry->d_inode)
7669                 error = may_create(new_dir, new_dentry, NULL);
7670         else
7671 -               error = may_delete(new_dir, new_dentry, is_dir);
7672 +               error = may_delete(new_dir, new_dentry, is_dir, NULL);
7673         if (error)
7674                 return error;
7675  
7676 @@ -2468,6 +2550,9 @@ static int do_rename(int olddfd, const c
7677         error = -EINVAL;
7678         if (old_dentry == trap)
7679                 goto exit4;
7680 +       error = -EROFS;
7681 +       if (MNT_IS_RDONLY(newnd.mnt))
7682 +               goto exit4;
7683         new_dentry = lookup_hash(&newnd);
7684         error = PTR_ERR(new_dentry);
7685         if (IS_ERR(new_dentry))
7686 @@ -2561,6 +2646,125 @@ int vfs_follow_link(struct nameidata *nd
7687         return __vfs_follow_link(nd, link);
7688  }
7689  
7690 +
7691 +#ifdef CONFIG_VSERVER_COWBL
7692 +
7693 +#include <linux/file.h>
7694 +
7695 +int cow_break_link(struct dentry *dentry, const char *pathname)
7696 +{
7697 +       int err = -EMLINK;
7698 +       int ret, mode, pathlen;
7699 +       struct nameidata old_nd, dir_nd;
7700 +       struct dentry *old_dentry, *new_dentry;
7701 +       struct vfsmount *old_mnt, *new_mnt;
7702 +       struct file *old_file;
7703 +       struct file *new_file;
7704 +       char *to, *path, pad='\251';
7705 +       loff_t size;
7706 +
7707 +       vxdprintk(VXD_CBIT(misc, 2),
7708 +               "cow_break_link(%p,»%s«)", dentry, pathname);
7709 +       path = kmalloc(PATH_MAX, GFP_KERNEL);
7710 +
7711 +       ret = path_lookup(pathname, LOOKUP_FOLLOW, &old_nd);
7712 +       vxdprintk(VXD_CBIT(misc, 2), "path_lookup(old): %d", ret);
7713 +       old_dentry = old_nd.dentry;
7714 +       old_mnt = old_nd.mnt;
7715 +       mode = old_dentry->d_inode->i_mode;
7716 +
7717 +       to = d_path(old_dentry, old_mnt, path, PATH_MAX-2);
7718 +       pathlen = strlen(to);
7719 +       vxdprintk(VXD_CBIT(misc, 2), "old path »%s«", to);
7720 +
7721 +       to[pathlen+1] = 0;
7722 +retry:
7723 +       to[pathlen] = pad--;
7724 +       if (pad <= '\240')
7725 +               goto out_rel_old;
7726 +
7727 +       vxdprintk(VXD_CBIT(misc, 2), "temp copy »%s«", to);
7728 +       ret = path_lookup(to,
7729 +               LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, &dir_nd);
7730 +
7731 +       /* this puppy downs the inode sem */
7732 +       new_dentry = lookup_create(&dir_nd, 0);
7733 +       vxdprintk(VXD_CBIT(misc, 2),
7734 +               "lookup_create(new): %p", new_dentry);
7735 +       if (!new_dentry) {
7736 +               path_release(&dir_nd);
7737 +               goto retry;
7738 +       }
7739 +
7740 +       ret = vfs_create(dir_nd.dentry->d_inode, new_dentry, mode, &dir_nd);
7741 +       vxdprintk(VXD_CBIT(misc, 2),
7742 +               "vfs_create(new): %d", ret);
7743 +       if (ret == -EEXIST) {
7744 +
7745 +               mutex_unlock(&dir_nd.dentry->d_inode->i_mutex);
7746 +               dput(new_dentry);
7747 +               path_release(&dir_nd);
7748 +               goto retry;
7749 +       }
7750 +
7751 +       new_mnt = dir_nd.mnt;
7752 +
7753 +       dget(old_dentry);
7754 +       mntget(old_mnt);
7755 +       /* this one cleans up the dentry in case of failure */
7756 +       old_file = dentry_open(old_dentry, old_mnt, O_RDONLY);
7757 +       vxdprintk(VXD_CBIT(misc, 2),
7758 +               "dentry_open(old): %p", old_file);
7759 +       if (!old_file)
7760 +               goto out_rel_both;
7761 +
7762 +       dget(new_dentry);
7763 +       mntget(new_mnt);
7764 +       /* this one cleans up the dentry in case of failure */
7765 +       new_file = dentry_open(new_dentry, new_mnt, O_WRONLY);
7766 +       vxdprintk(VXD_CBIT(misc, 2),
7767 +               "dentry_open(new): %p", new_file);
7768 +       if (!new_file)
7769 +               goto out_fput_old;
7770 +
7771 +       size = i_size_read(old_file->f_dentry->d_inode);
7772 +       ret = vfs_sendfile(new_file, old_file, NULL, size, 0);
7773 +       vxdprintk(VXD_CBIT(misc, 2), "vfs_sendfile: %d", ret);
7774 +
7775 +       if (ret < 0)
7776 +               goto out_fput_both;
7777 +
7778 +       ret = vfs_rename(dir_nd.dentry->d_inode, new_dentry,
7779 +               old_nd.dentry->d_parent->d_inode, old_dentry);
7780 +       vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
7781 +       if (!ret)
7782 +               err = 0;
7783 +
7784 +out_fput_both:
7785 +       vxdprintk(VXD_CBIT(misc, 3),
7786 +               "fput(new_file=%p[#%d])", new_file,
7787 +               atomic_read(&new_file->f_count));
7788 +       fput(new_file);
7789 +
7790 +out_fput_old:
7791 +       vxdprintk(VXD_CBIT(misc, 3),
7792 +               "fput(old_file=%p[#%d])", old_file,
7793 +               atomic_read(&old_file->f_count));
7794 +       fput(old_file);
7795 +
7796 +out_rel_both:
7797 +       mutex_unlock(&dir_nd.dentry->d_inode->i_mutex);
7798 +       dput(new_dentry);
7799 +
7800 +       path_release(&dir_nd);
7801 +out_rel_old:
7802 +       path_release(&old_nd);
7803 +       kfree(path);
7804 +       return err;
7805 +}
7806 +
7807 +#endif
7808 +
7809  /* get the link contents into pagecache */
7810  static char *page_getlink(struct dentry * dentry, struct page **ppage)
7811  {
7812 diff -NurpP --minimal linux-2.6.16.20/fs/namespace.c linux-2.6.16.20-vs2.1.1-rc22/fs/namespace.c
7813 --- linux-2.6.16.20/fs/namespace.c      2006-04-09 13:49:53 +0200
7814 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/namespace.c 2006-05-29 19:20:26 +0200
7815 @@ -23,6 +23,8 @@
7816  #include <linux/namei.h>
7817  #include <linux/security.h>
7818  #include <linux/mount.h>
7819 +#include <linux/vserver/namespace.h>
7820 +#include <linux/vserver/tag.h>
7821  #include <asm/uaccess.h>
7822  #include <asm/unistd.h>
7823  #include "pnode.h"
7824 @@ -241,6 +243,7 @@ static struct vfsmount *clone_mnt(struct
7825                 mnt->mnt_root = dget(root);
7826                 mnt->mnt_mountpoint = mnt->mnt_root;
7827                 mnt->mnt_parent = mnt;
7828 +               mnt->mnt_tag = old->mnt_tag;
7829  
7830                 if (flag & CL_SLAVE) {
7831                         list_add(&mnt->mnt_slave, &old->mnt_slave_list);
7832 @@ -349,43 +352,85 @@ static inline void mangle(struct seq_fil
7833         seq_escape(m, s, " \t\n\\");
7834  }
7835  
7836 +static int mnt_is_reachable(struct vfsmount *mnt)
7837 +{
7838 +       struct vfsmount *root_mnt;
7839 +       struct dentry *root, *point;
7840 +       int ret;
7841 +
7842 +       if (mnt == mnt->mnt_namespace->root)
7843 +               return 1;
7844 +
7845 +       spin_lock(&dcache_lock);
7846 +       root_mnt = current->fs->rootmnt;
7847 +       root = current->fs->root;
7848 +       point = root;
7849 +
7850 +       while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) {
7851 +               point = mnt->mnt_mountpoint;
7852 +               mnt = mnt->mnt_parent;
7853 +       }
7854 +
7855 +       ret = (mnt == root_mnt) && is_subdir(point, root);
7856 +
7857 +       spin_unlock(&dcache_lock);
7858 +
7859 +       return ret;
7860 +}
7861 +
7862  static int show_vfsmnt(struct seq_file *m, void *v)
7863  {
7864         struct vfsmount *mnt = v;
7865         int err = 0;
7866         static struct proc_fs_info {
7867 -               int flag;
7868 -               char *str;
7869 +               int s_flag;
7870 +               int mnt_flag;
7871 +               char *set_str;
7872 +               char *unset_str;
7873         } fs_info[] = {
7874 -               { MS_SYNCHRONOUS, ",sync" },
7875 -               { MS_DIRSYNC, ",dirsync" },
7876 -               { MS_MANDLOCK, ",mand" },
7877 -               { 0, NULL }
7878 -       };
7879 -       static struct proc_fs_info mnt_info[] = {
7880 -               { MNT_NOSUID, ",nosuid" },
7881 -               { MNT_NODEV, ",nodev" },
7882 -               { MNT_NOEXEC, ",noexec" },
7883 -               { MNT_NOATIME, ",noatime" },
7884 -               { MNT_NODIRATIME, ",nodiratime" },
7885 -               { 0, NULL }
7886 +               { MS_RDONLY, MNT_RDONLY, "ro", "rw" },
7887 +               { MS_SYNCHRONOUS, 0, ",sync", NULL },
7888 +               { MS_DIRSYNC, 0, ",dirsync", NULL },
7889 +               { MS_MANDLOCK, 0, ",mand", NULL },
7890 +               { MS_TAGGED, 0, ",tag", NULL },
7891 +               { MS_NOATIME, MNT_NOATIME, ",noatime", NULL },
7892 +               { MS_NODIRATIME, MNT_NODIRATIME, ",nodiratime", NULL },
7893 +               { 0, MNT_NOSUID, ",nosuid", NULL },
7894 +               { 0, MNT_NODEV, ",nodev", NULL },
7895 +               { 0, MNT_NOEXEC, ",noexec", NULL },
7896 +               { 0, 0, NULL, NULL }
7897         };
7898 -       struct proc_fs_info *fs_infop;
7899 +       struct proc_fs_info *p;
7900 +       unsigned long s_flags = mnt->mnt_sb->s_flags;
7901 +       int mnt_flags = mnt->mnt_flags;
7902  
7903 -       mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
7904 -       seq_putc(m, ' ');
7905 -       seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
7906 -       seq_putc(m, ' ');
7907 -       mangle(m, mnt->mnt_sb->s_type->name);
7908 -       seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
7909 -       for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
7910 -               if (mnt->mnt_sb->s_flags & fs_infop->flag)
7911 -                       seq_puts(m, fs_infop->str);
7912 +       if (vx_flags(VXF_HIDE_MOUNT, 0))
7913 +               return 0;
7914 +       if (!mnt_is_reachable(mnt) && !vx_check(0, VX_WATCH))
7915 +               return 0;
7916 +
7917 +       if (!vx_check(0, VX_ADMIN|VX_WATCH) &&
7918 +               mnt == current->fs->rootmnt) {
7919 +               seq_puts(m, "/dev/root / ");
7920 +       } else {
7921 +               mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
7922 +               seq_putc(m, ' ');
7923 +               seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
7924 +               seq_putc(m, ' ');
7925         }
7926 -       for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
7927 -               if (mnt->mnt_flags & fs_infop->flag)
7928 -                       seq_puts(m, fs_infop->str);
7929 +       mangle(m, mnt->mnt_sb->s_type->name);
7930 +       seq_putc(m, ' ');
7931 +       for (p = fs_info; (p->s_flag | p->mnt_flag) ; p++) {
7932 +               if ((s_flags & p->s_flag) || (mnt_flags & p->mnt_flag)) {
7933 +                       if (p->set_str)
7934 +                               seq_puts(m, p->set_str);
7935 +               } else {
7936 +                       if (p->unset_str)
7937 +                               seq_puts(m, p->unset_str);
7938 +               }
7939         }
7940 +       if (mnt->mnt_flags & MNT_TAGID)
7941 +               seq_printf(m, ",tag=%d", mnt->mnt_tag);
7942         if (mnt->mnt_sb->s_op->show_options)
7943                 err = mnt->mnt_sb->s_op->show_options(m, mnt);
7944         seq_puts(m, " 0 0\n");
7945 @@ -475,15 +520,11 @@ void release_mounts(struct list_head *he
7946         }
7947  }
7948  
7949 -void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
7950 +static inline void __umount_list(struct vfsmount *mnt,
7951 +       int propagate, struct list_head *kill)
7952  {
7953         struct vfsmount *p;
7954  
7955 -       for (p = mnt; p; p = next_mnt(p, mnt)) {
7956 -               list_del(&p->mnt_hash);
7957 -               list_add(&p->mnt_hash, kill);
7958 -       }
7959 -
7960         if (propagate)
7961                 propagate_umount(kill);
7962  
7963 @@ -499,6 +540,33 @@ void umount_tree(struct vfsmount *mnt, i
7964         }
7965  }
7966  
7967 +void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
7968 +{
7969 +       struct vfsmount *p;
7970 +
7971 +       for (p = mnt; p; p = next_mnt(p, mnt)) {
7972 +               list_del(&p->mnt_hash);
7973 +               list_add(&p->mnt_hash, kill);
7974 +               // p->mnt_namespace = NULL;
7975 +       }
7976 +       __umount_list(mnt, propagate, kill);
7977 +}
7978 +
7979 +void umount_unused(struct vfsmount *mnt, struct fs_struct *fs)
7980 +{
7981 +       struct vfsmount *p;
7982 +       LIST_HEAD(kill);
7983 +
7984 +       for (p = mnt; p; p = next_mnt(p, mnt)) {
7985 +               if (p == fs->rootmnt || p == fs->pwdmnt)
7986 +                       continue;
7987 +               list_del(&p->mnt_list);
7988 +               list_add(&p->mnt_list, &kill);
7989 +               p->mnt_namespace = NULL;
7990 +       }
7991 +       __umount_list(mnt, 0, &kill);
7992 +}
7993 +
7994  static int do_umount(struct vfsmount *mnt, int flags)
7995  {
7996         struct super_block *sb = mnt->mnt_sb;
7997 @@ -559,7 +627,7 @@ static int do_umount(struct vfsmount *mn
7998                 down_write(&sb->s_umount);
7999                 if (!(sb->s_flags & MS_RDONLY)) {
8000                         lock_kernel();
8001 -                       DQUOT_OFF(sb);
8002 +                       DQUOT_OFF(sb->s_dqh);
8003                         retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
8004                         unlock_kernel();
8005                 }
8006 @@ -608,7 +676,7 @@ asmlinkage long sys_umount(char __user *
8007                 goto dput_and_out;
8008  
8009         retval = -EPERM;
8010 -       if (!capable(CAP_SYS_ADMIN))
8011 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
8012                 goto dput_and_out;
8013  
8014         retval = do_umount(nd.mnt, flags);
8015 @@ -632,7 +700,7 @@ asmlinkage long sys_oldumount(char __use
8016  
8017  static int mount_is_safe(struct nameidata *nd)
8018  {
8019 -       if (capable(CAP_SYS_ADMIN))
8020 +       if (vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
8021                 return 0;
8022         return -EPERM;
8023  #ifdef notyet
8024 @@ -861,11 +929,13 @@ static int do_change_type(struct nameida
8025  /*
8026   * do loopback mount.
8027   */
8028 -static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
8029 +static int do_loopback(struct nameidata *nd, char *old_name, tag_t tag,
8030 +       unsigned long flags, int mnt_flags)
8031  {
8032         struct nameidata old_nd;
8033         struct vfsmount *mnt = NULL;
8034         int err = mount_is_safe(nd);
8035 +       int recurse = flags & MS_REC;
8036         if (err)
8037                 return err;
8038         if (!old_name || !*old_name)
8039 @@ -891,6 +961,12 @@ static int do_loopback(struct nameidata 
8040         if (!mnt)
8041                 goto out;
8042  
8043 +       mnt->mnt_flags = mnt_flags;
8044 +       if (flags & MS_TAGID) {
8045 +               mnt->mnt_tag = tag;
8046 +               mnt->mnt_flags |= MNT_TAGID;
8047 +       }
8048 +
8049         err = graft_tree(mnt, nd);
8050         if (err) {
8051                 LIST_HEAD(umount_list);
8052 @@ -899,6 +975,7 @@ static int do_loopback(struct nameidata 
8053                 spin_unlock(&vfsmount_lock);
8054                 release_mounts(&umount_list);
8055         }
8056 +       mnt->mnt_flags = mnt_flags;
8057  
8058  out:
8059         up_write(&namespace_sem);
8060 @@ -912,12 +989,12 @@ out:
8061   * on it - tough luck.
8062   */
8063  static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
8064 -                     void *data)
8065 +                     void *data, xid_t xid)
8066  {
8067         int err;
8068         struct super_block *sb = nd->mnt->mnt_sb;
8069  
8070 -       if (!capable(CAP_SYS_ADMIN))
8071 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_REMOUNT))
8072                 return -EPERM;
8073  
8074         if (!check_mnt(nd->mnt))
8075 @@ -951,7 +1028,7 @@ static int do_move_mount(struct nameidat
8076         struct nameidata old_nd, parent_nd;
8077         struct vfsmount *p;
8078         int err = 0;
8079 -       if (!capable(CAP_SYS_ADMIN))
8080 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
8081                 return -EPERM;
8082         if (!old_name || !*old_name)
8083                 return -EINVAL;
8084 @@ -1031,7 +1108,7 @@ static int do_new_mount(struct nameidata
8085                 return -EINVAL;
8086  
8087         /* we need capabilities... */
8088 -       if (!capable(CAP_SYS_ADMIN))
8089 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
8090                 return -EPERM;
8091  
8092         mnt = do_kern_mount(type, flags, name, data);
8093 @@ -1269,6 +1346,7 @@ long do_mount(char *dev_name, char *dir_
8094         struct nameidata nd;
8095         int retval = 0;
8096         int mnt_flags = 0;
8097 +       tag_t tag = 0;
8098  
8099         /* Discard magic */
8100         if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
8101 @@ -1284,7 +1362,19 @@ long do_mount(char *dev_name, char *dir_
8102         if (data_page)
8103                 ((char *)data_page)[PAGE_SIZE - 1] = 0;
8104  
8105 +#ifdef CONFIG_PROPAGATE
8106 +       retval = dx_parse_tag(data_page, &tag, 1);
8107 +       if (retval) {
8108 +               mnt_flags |= MNT_TAGID;
8109 +               /* bind and re-mounts get the tag flag */
8110 +               if (flags & (MS_BIND|MS_REMOUNT))
8111 +                       flags |= MS_TAGID;
8112 +       }
8113 +#endif
8114 +
8115         /* Separate the per-mountpoint flags */
8116 +       if (flags & MS_RDONLY)
8117 +               mnt_flags |= MNT_RDONLY;
8118         if (flags & MS_NOSUID)
8119                 mnt_flags |= MNT_NOSUID;
8120         if (flags & MS_NODEV)
8121 @@ -1296,6 +1386,8 @@ long do_mount(char *dev_name, char *dir_
8122         if (flags & MS_NODIRATIME)
8123                 mnt_flags |= MNT_NODIRATIME;
8124  
8125 +       if (vx_ccaps(VXC_SECURE_MOUNT))
8126 +               mnt_flags |= MNT_NODEV;
8127         flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
8128                    MS_NOATIME | MS_NODIRATIME);
8129  
8130 @@ -1310,9 +1402,9 @@ long do_mount(char *dev_name, char *dir_
8131  
8132         if (flags & MS_REMOUNT)
8133                 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
8134 -                                   data_page);
8135 +                                   data_page, tag);
8136         else if (flags & MS_BIND)
8137 -               retval = do_loopback(&nd, dev_name, flags & MS_REC);
8138 +               retval = do_loopback(&nd, dev_name, tag, flags, mnt_flags);
8139         else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
8140                 retval = do_change_type(&nd, flags);
8141         else if (flags & MS_MOVE)
8142 @@ -1410,7 +1502,7 @@ int copy_namespace(int flags, struct tas
8143         if (!(flags & CLONE_NEWNS))
8144                 return 0;
8145  
8146 -       if (!capable(CAP_SYS_ADMIN)) {
8147 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SECURE_MOUNT)) {
8148                 err = -EPERM;
8149                 goto out;
8150         }
8151 diff -NurpP --minimal linux-2.6.16.20/fs/nfs/dir.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/dir.c
8152 --- linux-2.6.16.20/fs/nfs/dir.c        2006-02-18 14:40:23 +0100
8153 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/dir.c   2006-04-26 19:07:00 +0200
8154 @@ -28,9 +28,11 @@
8155  #include <linux/sunrpc/clnt.h>
8156  #include <linux/nfs_fs.h>
8157  #include <linux/nfs_mount.h>
8158 +#include <linux/mount.h>
8159  #include <linux/pagemap.h>
8160  #include <linux/smp_lock.h>
8161  #include <linux/namei.h>
8162 +#include <linux/vserver/tag.h>
8163  
8164  #include "nfs4_fs.h"
8165  #include "delegation.h"
8166 @@ -869,6 +871,7 @@ static struct dentry *nfs_lookup(struct 
8167         inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
8168         if (!inode)
8169                 goto out_unlock;
8170 +       dx_propagate_tag(nd, inode);
8171  no_entry:
8172         res = d_add_unique(dentry, inode);
8173         if (res != NULL)
8174 @@ -902,7 +905,8 @@ static int is_atomic_open(struct inode *
8175         if (nd->flags & LOOKUP_DIRECTORY)
8176                 return 0;
8177         /* Are we trying to write to a read only partition? */
8178 -       if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
8179 +       if ((IS_RDONLY(dir) || MNT_IS_RDONLY(nd->mnt)) &&
8180 +               (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
8181                 return 0;
8182         return 1;
8183  }
8184 diff -NurpP --minimal linux-2.6.16.20/fs/nfs/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/inode.c
8185 --- linux-2.6.16.20/fs/nfs/inode.c      2006-02-18 14:40:23 +0100
8186 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/inode.c 2006-04-26 19:07:00 +0200
8187 @@ -35,6 +35,7 @@
8188  #include <linux/mount.h>
8189  #include <linux/nfs_idmap.h>
8190  #include <linux/vfs.h>
8191 +#include <linux/vserver/tag.h>
8192  
8193  #include <asm/system.h>
8194  #include <asm/uaccess.h>
8195 @@ -336,12 +337,16 @@ nfs_sb_init(struct super_block *sb, rpc_
8196         }
8197         server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
8198  
8199 +       if (server->flags & NFS_MOUNT_TAGGED)
8200 +               sb->s_flags |= MS_TAGGED;
8201 +
8202         sb->s_maxbytes = fsinfo.maxfilesize;
8203         if (sb->s_maxbytes > MAX_LFS_FILESIZE) 
8204                 sb->s_maxbytes = MAX_LFS_FILESIZE; 
8205  
8206         server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
8207         server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
8208 +       server->client->cl_tag = (server->flags & NFS_MOUNT_TAGGED) ? 1 : 0;
8209  
8210         /* We're airborne Set socket buffersize */
8211         rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
8212 @@ -413,6 +418,7 @@ nfs_create_client(struct nfs_server *ser
8213  
8214         clnt->cl_intr     = 1;
8215         clnt->cl_softrtry = 1;
8216 +       clnt->cl_tag      = 1;
8217  
8218         return clnt;
8219  
8220 @@ -593,6 +599,7 @@ static int nfs_show_options(struct seq_f
8221                 { NFS_MOUNT_NOAC, ",noac", "" },
8222                 { NFS_MOUNT_NONLM, ",nolock", ",lock" },
8223                 { NFS_MOUNT_NOACL, ",noacl", "" },
8224 +               { NFS_MOUNT_TAGGED, ",tag", "" },
8225                 { 0, NULL, NULL }
8226         };
8227         struct proc_nfs_info *nfs_infop;
8228 @@ -805,8 +812,10 @@ nfs_fhget(struct super_block *sb, struct
8229                         nfsi->change_attr = fattr->change_attr;
8230                 inode->i_size = nfs_size_to_loff_t(fattr->size);
8231                 inode->i_nlink = fattr->nlink;
8232 -               inode->i_uid = fattr->uid;
8233 -               inode->i_gid = fattr->gid;
8234 +               inode->i_uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
8235 +               inode->i_gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
8236 +               inode->i_tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
8237 +                                        /* maybe fattr->xid someday */
8238                 if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
8239                         /*
8240                          * report the blocks in 512byte units
8241 @@ -897,6 +906,8 @@ void nfs_setattr_update_inode(struct ino
8242                         inode->i_uid = attr->ia_uid;
8243                 if ((attr->ia_valid & ATTR_GID) != 0)
8244                         inode->i_gid = attr->ia_gid;
8245 +               if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
8246 +                       inode->i_tag = attr->ia_tag;
8247                 spin_lock(&inode->i_lock);
8248                 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
8249                 spin_unlock(&inode->i_lock);
8250 @@ -1294,6 +1305,9 @@ static int nfs_check_inode_attributes(st
8251         struct nfs_inode *nfsi = NFS_I(inode);
8252         loff_t cur_size, new_isize;
8253         int data_unstable;
8254 +       uid_t uid;
8255 +       gid_t gid;
8256 +       tag_t tag;
8257  
8258  
8259         if ((fattr->valid & NFS_ATTR_FATTR) == 0)
8260 @@ -1333,10 +1347,15 @@ static int nfs_check_inode_attributes(st
8261                         nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
8262         }
8263  
8264 +       uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
8265 +       gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
8266 +       tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
8267 +
8268         /* Have any file permissions changed? */
8269         if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
8270 -                       || inode->i_uid != fattr->uid
8271 -                       || inode->i_gid != fattr->gid)
8272 +                       || inode->i_uid != uid
8273 +                       || inode->i_gid != gid
8274 +                       || inode->i_tag != tag)
8275                 nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
8276  
8277         /* Has the link count changed? */
8278 @@ -1420,6 +1439,9 @@ static int nfs_update_inode(struct inode
8279         loff_t cur_isize, new_isize;
8280         unsigned int    invalid = 0;
8281         int data_stable;
8282 +       uid_t uid;
8283 +       gid_t gid;
8284 +       tag_t tag;
8285  
8286         dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
8287                         __FUNCTION__, inode->i_sb->s_id, inode->i_ino,
8288 @@ -1498,15 +1520,21 @@ static int nfs_update_inode(struct inode
8289         }
8290         memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
8291  
8292 +       uid = INOTAG_UID(DX_TAG(inode), fattr->uid, fattr->gid);
8293 +       gid = INOTAG_GID(DX_TAG(inode), fattr->uid, fattr->gid);
8294 +       tag = INOTAG_TAG(DX_TAG(inode), fattr->uid, fattr->gid, 0);
8295 +
8296         if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
8297 -           inode->i_uid != fattr->uid ||
8298 -           inode->i_gid != fattr->gid)
8299 +           inode->i_uid != uid ||
8300 +           inode->i_gid != gid ||
8301 +           inode->i_tag != tag)
8302                 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
8303  
8304         inode->i_mode = fattr->mode;
8305         inode->i_nlink = fattr->nlink;
8306 -       inode->i_uid = fattr->uid;
8307 -       inode->i_gid = fattr->gid;
8308 +       inode->i_uid = uid;
8309 +       inode->i_gid = gid;
8310 +       inode->i_tag = tag;
8311  
8312         if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
8313                 /*
8314 diff -NurpP --minimal linux-2.6.16.20/fs/nfs/nfs3xdr.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/nfs3xdr.c
8315 --- linux-2.6.16.20/fs/nfs/nfs3xdr.c    2006-02-18 14:40:23 +0100
8316 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/nfs3xdr.c       2006-04-26 19:07:00 +0200
8317 @@ -22,6 +22,7 @@
8318  #include <linux/nfs3.h>
8319  #include <linux/nfs_fs.h>
8320  #include <linux/nfsacl.h>
8321 +#include <linux/vserver/tag.h>
8322  
8323  #define NFSDBG_FACILITY                NFSDBG_XDR
8324  
8325 @@ -178,7 +179,7 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt
8326  }
8327  
8328  static inline u32 *
8329 -xdr_encode_sattr(u32 *p, struct iattr *attr)
8330 +xdr_encode_sattr(u32 *p, struct iattr *attr, int tag)
8331  {
8332         if (attr->ia_valid & ATTR_MODE) {
8333                 *p++ = xdr_one;
8334 @@ -186,15 +187,17 @@ xdr_encode_sattr(u32 *p, struct iattr *a
8335         } else {
8336                 *p++ = xdr_zero;
8337         }
8338 -       if (attr->ia_valid & ATTR_UID) {
8339 +       if (attr->ia_valid & ATTR_UID ||
8340 +               (tag && (attr->ia_valid & ATTR_TAG))) {
8341                 *p++ = xdr_one;
8342 -               *p++ = htonl(attr->ia_uid);
8343 +               *p++ = htonl(TAGINO_UID(tag, attr->ia_uid, attr->ia_tag));
8344         } else {
8345                 *p++ = xdr_zero;
8346         }
8347 -       if (attr->ia_valid & ATTR_GID) {
8348 +       if (attr->ia_valid & ATTR_GID ||
8349 +               (tag && (attr->ia_valid & ATTR_TAG))) {
8350                 *p++ = xdr_one;
8351 -               *p++ = htonl(attr->ia_gid);
8352 +               *p++ = htonl(TAGINO_GID(tag, attr->ia_gid, attr->ia_tag));
8353         } else {
8354                 *p++ = xdr_zero;
8355         }
8356 @@ -279,7 +282,8 @@ static int
8357  nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args)
8358  {
8359         p = xdr_encode_fhandle(p, args->fh);
8360 -       p = xdr_encode_sattr(p, args->sattr);
8361 +       p = xdr_encode_sattr(p, args->sattr,
8362 +               req->rq_task->tk_client->cl_tag);
8363         *p++ = htonl(args->guard);
8364         if (args->guard)
8365                 p = xdr_encode_time3(p, &args->guardtime);
8366 @@ -370,7 +374,8 @@ nfs3_xdr_createargs(struct rpc_rqst *req
8367                 *p++ = args->verifier[0];
8368                 *p++ = args->verifier[1];
8369         } else
8370 -               p = xdr_encode_sattr(p, args->sattr);
8371 +               p = xdr_encode_sattr(p, args->sattr,
8372 +                       req->rq_task->tk_client->cl_tag);
8373  
8374         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
8375         return 0;
8376 @@ -384,7 +389,8 @@ nfs3_xdr_mkdirargs(struct rpc_rqst *req,
8377  {
8378         p = xdr_encode_fhandle(p, args->fh);
8379         p = xdr_encode_array(p, args->name, args->len);
8380 -       p = xdr_encode_sattr(p, args->sattr);
8381 +       p = xdr_encode_sattr(p, args->sattr,
8382 +               req->rq_task->tk_client->cl_tag);
8383         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
8384         return 0;
8385  }
8386 @@ -397,7 +403,8 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *re
8387  {
8388         p = xdr_encode_fhandle(p, args->fromfh);
8389         p = xdr_encode_array(p, args->fromname, args->fromlen);
8390 -       p = xdr_encode_sattr(p, args->sattr);
8391 +       p = xdr_encode_sattr(p, args->sattr,
8392 +               req->rq_task->tk_client->cl_tag);
8393         p = xdr_encode_array(p, args->topath, args->tolen);
8394         req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
8395         return 0;
8396 @@ -412,7 +419,8 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req,
8397         p = xdr_encode_fhandle(p, args->fh);
8398         p = xdr_encode_array(p, args->name, args->len);
8399         *p++ = htonl(args->type);
8400 -       p = xdr_encode_sattr(p, args->sattr);
8401 +       p = xdr_encode_sattr(p, args->sattr,
8402 +               req->rq_task->tk_client->cl_tag);
8403         if (args->type == NF3CHR || args->type == NF3BLK) {
8404                 *p++ = htonl(MAJOR(args->rdev));
8405                 *p++ = htonl(MINOR(args->rdev));
8406 diff -NurpP --minimal linux-2.6.16.20/fs/nfs/nfsroot.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/nfsroot.c
8407 --- linux-2.6.16.20/fs/nfs/nfsroot.c    2006-02-18 14:40:23 +0100
8408 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfs/nfsroot.c       2006-04-26 19:07:00 +0200
8409 @@ -87,6 +87,7 @@
8410  #include <linux/root_dev.h>
8411  #include <net/ipconfig.h>
8412  #include <linux/parser.h>
8413 +#include <linux/vs_cvirt.h>
8414  
8415  /* Define this to allow debugging output */
8416  #undef NFSROOT_DEBUG
8417 @@ -119,12 +120,12 @@ static int mount_port __initdata = 0;             /
8418  enum {
8419         /* Options that take integer arguments */
8420         Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
8421 -       Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
8422 +       Opt_acregmax, Opt_acdirmin, Opt_acdirmax, Opt_tagid,
8423         /* Options that take no arguments */
8424         Opt_soft, Opt_hard, Opt_intr,
8425         Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
8426         Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
8427 -       Opt_acl, Opt_noacl,
8428 +       Opt_acl, Opt_noacl, Opt_tag, Opt_notag,
8429         /* Error token */
8430         Opt_err
8431  };
8432 @@ -161,6 +162,10 @@ static match_table_t __initdata tokens =
8433         {Opt_tcp, "tcp"},
8434         {Opt_acl, "acl"},
8435         {Opt_noacl, "noacl"},
8436 +       {Opt_tag, "tag"},
8437 +       {Opt_notag, "notag"},
8438 +       {Opt_tagid, "tagid=%u"},
8439 +       {Opt_tag, "tagxid"},
8440         {Opt_err, NULL}
8441         
8442  };
8443 @@ -275,6 +280,20 @@ static int __init root_nfs_parse(char *n
8444                         case Opt_noacl:
8445                                 nfs_data.flags |= NFS_MOUNT_NOACL;
8446                                 break;
8447 +#ifndef CONFIG_TAGGING_NONE
8448 +                       case Opt_tag:
8449 +                               nfs_data.flags |= NFS_MOUNT_TAGGED;
8450 +                               break;
8451 +                       case Opt_notag:
8452 +                               nfs_data.flags &= ~NFS_MOUNT_TAGGED;
8453 +                               break;
8454 +#endif
8455 +#ifdef CONFIG_PROPAGATE
8456 +                       case Opt_tagid:
8457 +                               /* use args[0] */
8458 +                               nfs_data.flags |= NFS_MOUNT_TAGGED;
8459 +                               break;
8460 +#endif
8461                         default:
8462                                 printk(KERN_WARNING "Root-NFS: unknown "
8463                                         "option: %s\n", p);
8464 @@ -312,7 +331,7 @@ static int __init root_nfs_name(char *na
8465         /* Override them by options set on kernel command-line */
8466         root_nfs_parse(name, buf);
8467  
8468 -       cp = system_utsname.nodename;
8469 +       cp = vx_new_uts(nodename);
8470         if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
8471                 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
8472                 return -1;
8473 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/auth.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/auth.c
8474 --- linux-2.6.16.20/fs/nfsd/auth.c      2004-08-14 12:56:14 +0200
8475 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/auth.c 2006-04-26 19:07:00 +0200
8476 @@ -9,6 +9,7 @@
8477  #include <linux/sunrpc/svc.h>
8478  #include <linux/sunrpc/svcauth.h>
8479  #include <linux/nfsd/nfsd.h>
8480 +#include <linux/vserver/tag.h>
8481  
8482  #define        CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE))
8483  
8484 @@ -42,18 +43,21 @@ int nfsd_setuser(struct svc_rqst *rqstp,
8485         }
8486  
8487         if (cred->cr_uid != (uid_t) -1)
8488 -               current->fsuid = cred->cr_uid;
8489 +               current->fsuid = INOTAG_UID(DX_TAG_NFSD, cred->cr_uid, cred->cr_gid);
8490         else
8491                 current->fsuid = exp->ex_anon_uid;
8492         if (cred->cr_gid != (gid_t) -1)
8493 -               current->fsgid = cred->cr_gid;
8494 +               current->fsgid = INOTAG_GID(DX_TAG_NFSD, cred->cr_uid, cred->cr_gid);
8495         else
8496                 current->fsgid = exp->ex_anon_gid;
8497  
8498 +       /* this desperately needs a tag :) */
8499 +       current->xid = (xid_t)INOTAG_TAG(DX_TAG_NFSD, cred->cr_uid, cred->cr_gid, 0);
8500 +
8501         if (!cred->cr_group_info)
8502                 return -ENOMEM;
8503         ret = set_current_groups(cred->cr_group_info);
8504 -       if ((cred->cr_uid)) {
8505 +       if (INOTAG_UID(DX_TAG_NFSD, cred->cr_uid, cred->cr_gid)) {
8506                 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
8507         } else {
8508                 cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
8509 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/nfs3xdr.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs3xdr.c
8510 --- linux-2.6.16.20/fs/nfsd/nfs3xdr.c   2006-04-09 13:49:54 +0200
8511 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs3xdr.c      2006-04-26 19:07:00 +0200
8512 @@ -21,6 +21,7 @@
8513  #include <linux/sunrpc/svc.h>
8514  #include <linux/nfsd/nfsd.h>
8515  #include <linux/nfsd/xdr3.h>
8516 +#include <linux/vserver/tag.h>
8517  
8518  #define NFSDDBG_FACILITY               NFSDDBG_XDR
8519  
8520 @@ -111,6 +112,8 @@ static inline u32 *
8521  decode_sattr3(u32 *p, struct iattr *iap)
8522  {
8523         u32     tmp;
8524 +       uid_t   uid = 0;
8525 +       gid_t   gid = 0;
8526  
8527         iap->ia_valid = 0;
8528  
8529 @@ -120,12 +123,15 @@ decode_sattr3(u32 *p, struct iattr *iap)
8530         }
8531         if (*p++) {
8532                 iap->ia_valid |= ATTR_UID;
8533 -               iap->ia_uid = ntohl(*p++);
8534 +               uid = ntohl(*p++);
8535         }
8536         if (*p++) {
8537                 iap->ia_valid |= ATTR_GID;
8538 -               iap->ia_gid = ntohl(*p++);
8539 +               gid = ntohl(*p++);
8540         }
8541 +       iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
8542 +       iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
8543 +       iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
8544         if (*p++) {
8545                 u64     newsize;
8546  
8547 @@ -163,8 +169,10 @@ encode_fattr3(struct svc_rqst *rqstp, u3
8548         *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
8549         *p++ = htonl((u32) stat->mode);
8550         *p++ = htonl((u32) stat->nlink);
8551 -       *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
8552 -       *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
8553 +       *p++ = htonl((u32) nfsd_ruid(rqstp,
8554 +               TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
8555 +       *p++ = htonl((u32) nfsd_rgid(rqstp,
8556 +               TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
8557         if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
8558                 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
8559         } else {
8560 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/nfs4recover.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs4recover.c
8561 --- linux-2.6.16.20/fs/nfsd/nfs4recover.c       2006-02-18 14:40:23 +0100
8562 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs4recover.c  2006-04-26 19:07:00 +0200
8563 @@ -155,7 +155,7 @@ nfsd4_create_clid_dir(struct nfs4_client
8564                 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
8565                 goto out_put;
8566         }
8567 -       status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
8568 +       status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU, NULL);
8569  out_put:
8570         dput(dentry);
8571  out_unlock:
8572 @@ -259,7 +259,7 @@ nfsd4_remove_clid_file(struct dentry *di
8573                 return -EINVAL;
8574         }
8575         mutex_lock(&dir->d_inode->i_mutex);
8576 -       status = vfs_unlink(dir->d_inode, dentry);
8577 +       status = vfs_unlink(dir->d_inode, dentry, NULL);
8578         mutex_unlock(&dir->d_inode->i_mutex);
8579         return status;
8580  }
8581 @@ -274,7 +274,7 @@ nfsd4_clear_clid_dir(struct dentry *dir,
8582          * a kernel from the future.... */
8583         nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
8584         mutex_lock(&dir->d_inode->i_mutex);
8585 -       status = vfs_rmdir(dir->d_inode, dentry);
8586 +       status = vfs_rmdir(dir->d_inode, dentry, NULL);
8587         mutex_unlock(&dir->d_inode->i_mutex);
8588         return status;
8589  }
8590 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/nfs4xdr.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs4xdr.c
8591 --- linux-2.6.16.20/fs/nfsd/nfs4xdr.c   2006-02-18 14:40:23 +0100
8592 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfs4xdr.c      2006-04-26 19:07:00 +0200
8593 @@ -57,6 +57,7 @@
8594  #include <linux/nfsd_idmap.h>
8595  #include <linux/nfs4.h>
8596  #include <linux/nfs4_acl.h>
8597 +#include <linux/vserver/tag.h>
8598  
8599  #define NFSDDBG_FACILITY               NFSDDBG_XDR
8600  
8601 @@ -1561,14 +1562,18 @@ out_acl:
8602                 WRITE32(stat.nlink);
8603         }
8604         if (bmval1 & FATTR4_WORD1_OWNER) {
8605 -               status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
8606 +               status = nfsd4_encode_user(rqstp,
8607 +                       TAGINO_UID(DX_TAG(dentry->d_inode),
8608 +                       stat.uid, stat.tag), &p, &buflen);
8609                 if (status == nfserr_resource)
8610                         goto out_resource;
8611                 if (status)
8612                         goto out;
8613         }
8614         if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
8615 -               status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
8616 +               status = nfsd4_encode_group(rqstp,
8617 +                       TAGINO_GID(DX_TAG(dentry->d_inode),
8618 +                       stat.gid, stat.tag), &p, &buflen);
8619                 if (status == nfserr_resource)
8620                         goto out_resource;
8621                 if (status)
8622 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/nfsxdr.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfsxdr.c
8623 --- linux-2.6.16.20/fs/nfsd/nfsxdr.c    2006-04-09 13:49:54 +0200
8624 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/nfsxdr.c       2006-04-26 19:07:00 +0200
8625 @@ -15,6 +15,7 @@
8626  #include <linux/nfsd/nfsd.h>
8627  #include <linux/nfsd/xdr.h>
8628  #include <linux/mm.h>
8629 +#include <linux/vserver/tag.h>
8630  
8631  #define NFSDDBG_FACILITY               NFSDDBG_XDR
8632  
8633 @@ -102,6 +103,8 @@ static inline u32 *
8634  decode_sattr(u32 *p, struct iattr *iap)
8635  {
8636         u32     tmp, tmp1;
8637 +       uid_t   uid = 0;
8638 +       gid_t   gid = 0;
8639  
8640         iap->ia_valid = 0;
8641  
8642 @@ -115,12 +118,15 @@ decode_sattr(u32 *p, struct iattr *iap)
8643         }
8644         if ((tmp = ntohl(*p++)) != (u32)-1) {
8645                 iap->ia_valid |= ATTR_UID;
8646 -               iap->ia_uid = tmp;
8647 +               uid = tmp;
8648         }
8649         if ((tmp = ntohl(*p++)) != (u32)-1) {
8650                 iap->ia_valid |= ATTR_GID;
8651 -               iap->ia_gid = tmp;
8652 +               gid = tmp;
8653         }
8654 +       iap->ia_uid = INOTAG_UID(DX_TAG_NFSD, uid, gid);
8655 +       iap->ia_gid = INOTAG_GID(DX_TAG_NFSD, uid, gid);
8656 +       iap->ia_tag = INOTAG_TAG(DX_TAG_NFSD, uid, gid, 0);
8657         if ((tmp = ntohl(*p++)) != (u32)-1) {
8658                 iap->ia_valid |= ATTR_SIZE;
8659                 iap->ia_size = tmp;
8660 @@ -164,8 +170,10 @@ encode_fattr(struct svc_rqst *rqstp, u32
8661         *p++ = htonl(nfs_ftypes[type >> 12]);
8662         *p++ = htonl((u32) stat->mode);
8663         *p++ = htonl((u32) stat->nlink);
8664 -       *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
8665 -       *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
8666 +       *p++ = htonl((u32) nfsd_ruid(rqstp,
8667 +               TAGINO_UID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
8668 +       *p++ = htonl((u32) nfsd_rgid(rqstp,
8669 +               TAGINO_GID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
8670  
8671         if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
8672                 *p++ = htonl(NFS_MAXPATHLEN);
8673 diff -NurpP --minimal linux-2.6.16.20/fs/nfsd/vfs.c linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/vfs.c
8674 --- linux-2.6.16.20/fs/nfsd/vfs.c       2006-02-18 14:40:23 +0100
8675 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/nfsd/vfs.c  2006-04-26 19:07:00 +0200
8676 @@ -1160,13 +1160,13 @@ nfsd_create(struct svc_rqst *rqstp, stru
8677                 err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
8678                 break;
8679         case S_IFDIR:
8680 -               err = vfs_mkdir(dirp, dchild, iap->ia_mode);
8681 +               err = vfs_mkdir(dirp, dchild, iap->ia_mode, NULL);
8682                 break;
8683         case S_IFCHR:
8684         case S_IFBLK:
8685         case S_IFIFO:
8686         case S_IFSOCK:
8687 -               err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
8688 +               err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev, NULL);
8689                 break;
8690         default:
8691                 printk("nfsd: bad file type %o in nfsd_create\n", type);
8692 @@ -1446,11 +1446,13 @@ nfsd_symlink(struct svc_rqst *rqstp, str
8693                 else {
8694                         strncpy(path_alloced, path, plen);
8695                         path_alloced[plen] = 0;
8696 -                       err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
8697 +                       err = vfs_symlink(dentry->d_inode, dnew,
8698 +                               path_alloced, mode, NULL);
8699                         kfree(path_alloced);
8700                 }
8701         } else
8702 -               err = vfs_symlink(dentry->d_inode, dnew, path, mode);
8703 +               err = vfs_symlink(dentry->d_inode, dnew,
8704 +                       path, mode, NULL);
8705  
8706         if (!err)
8707                 if (EX_ISSYNC(fhp->fh_export))
8708 @@ -1508,7 +1510,7 @@ nfsd_link(struct svc_rqst *rqstp, struct
8709         dold = tfhp->fh_dentry;
8710         dest = dold->d_inode;
8711  
8712 -       err = vfs_link(dold, dirp, dnew);
8713 +       err = vfs_link(dold, dirp, dnew, NULL);
8714         if (!err) {
8715                 if (EX_ISSYNC(ffhp->fh_export)) {
8716                         err = nfserrno(nfsd_sync_dir(ddir));
8717 @@ -1670,9 +1672,9 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
8718                         err = -EPERM;
8719                 } else
8720  #endif
8721 -               err = vfs_unlink(dirp, rdentry);
8722 +               err = vfs_unlink(dirp, rdentry, NULL);
8723         } else { /* It's RMDIR */
8724 -               err = vfs_rmdir(dirp, rdentry);
8725 +               err = vfs_rmdir(dirp, rdentry, NULL);
8726         }
8727  
8728         dput(rdentry);
8729 @@ -1781,7 +1783,8 @@ nfsd_permission(struct svc_export *exp, 
8730          */
8731         if (!(acc & MAY_LOCAL_ACCESS))
8732                 if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
8733 -                       if (EX_RDONLY(exp) || IS_RDONLY(inode))
8734 +                       if (EX_RDONLY(exp) || IS_RDONLY(inode)
8735 +                               || MNT_IS_RDONLY(exp->ex_mnt))
8736                                 return nfserr_rofs;
8737                         if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
8738                                 return nfserr_perm;
8739 diff -NurpP --minimal linux-2.6.16.20/fs/open.c linux-2.6.16.20-vs2.1.1-rc22/fs/open.c
8740 --- linux-2.6.16.20/fs/open.c   2006-05-11 21:25:36 +0200
8741 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/open.c      2006-04-26 19:07:00 +0200
8742 @@ -27,6 +27,9 @@
8743  #include <linux/pagemap.h>
8744  #include <linux/syscalls.h>
8745  #include <linux/rcupdate.h>
8746 +#include <linux/vs_limit.h>
8747 +#include <linux/vs_dlimit.h>
8748 +#include <linux/vserver/tag.h>
8749  
8750  #include <asm/unistd.h>
8751  
8752 @@ -45,6 +48,8 @@ int vfs_statfs(struct super_block *sb, s
8753                         if (retval == 0 && buf->f_frsize == 0)
8754                                 buf->f_frsize = buf->f_bsize;
8755                 }
8756 +               if (!vx_check(0, VX_ADMIN|VX_WATCH))
8757 +                       vx_vsi_statfs(sb, buf);
8758         }
8759         return retval;
8760  }
8761 @@ -248,7 +253,7 @@ static long do_sys_truncate(const char _
8762                 goto dput_and_out;
8763  
8764         error = -EROFS;
8765 -       if (IS_RDONLY(inode))
8766 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt))
8767                 goto dput_and_out;
8768  
8769         error = -EPERM;
8770 @@ -378,7 +383,7 @@ asmlinkage long sys_utime(char __user * 
8771         inode = nd.dentry->d_inode;
8772  
8773         error = -EROFS;
8774 -       if (IS_RDONLY(inode))
8775 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt))
8776                 goto dput_and_out;
8777  
8778         /* Don't worry, the checks are done in inode_change_ok() */
8779 @@ -435,7 +440,7 @@ long do_utimes(int dfd, char __user *fil
8780         inode = nd.dentry->d_inode;
8781  
8782         error = -EROFS;
8783 -       if (IS_RDONLY(inode))
8784 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt))
8785                 goto dput_and_out;
8786  
8787         /* Don't worry, the checks are done in inode_change_ok() */
8788 @@ -522,7 +527,8 @@ asmlinkage long sys_faccessat(int dfd, c
8789         if (!res) {
8790                 res = vfs_permission(&nd, mode);
8791                 /* SuS v2 requires we report a read only fs too */
8792 -               if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
8793 +               if(!res && (mode & S_IWOTH)
8794 +                  && (IS_RDONLY(nd.dentry->d_inode) || MNT_IS_RDONLY(nd.mnt))
8795                    && !special_file(nd.dentry->d_inode->i_mode))
8796                         res = -EROFS;
8797                 path_release(&nd);
8798 @@ -633,7 +639,7 @@ asmlinkage long sys_fchmod(unsigned int 
8799         inode = dentry->d_inode;
8800  
8801         err = -EROFS;
8802 -       if (IS_RDONLY(inode))
8803 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(file->f_vfsmnt))
8804                 goto out_putf;
8805         err = -EPERM;
8806         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
8807 @@ -666,7 +672,7 @@ asmlinkage long sys_fchmodat(int dfd, co
8808         inode = nd.dentry->d_inode;
8809  
8810         error = -EROFS;
8811 -       if (IS_RDONLY(inode))
8812 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(nd.mnt))
8813                 goto dput_and_out;
8814  
8815         error = -EPERM;
8816 @@ -692,7 +698,8 @@ asmlinkage long sys_chmod(const char __u
8817         return sys_fchmodat(AT_FDCWD, filename, mode);
8818  }
8819  
8820 -static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
8821 +static int chown_common(struct dentry *dentry, struct vfsmount *mnt,
8822 +       uid_t user, gid_t group)
8823  {
8824         struct inode * inode;
8825         int error;
8826 @@ -704,7 +711,7 @@ static int chown_common(struct dentry * 
8827                 goto out;
8828         }
8829         error = -EROFS;
8830 -       if (IS_RDONLY(inode))
8831 +       if (IS_RDONLY(inode) || MNT_IS_RDONLY(mnt))
8832                 goto out;
8833         error = -EPERM;
8834         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
8835 @@ -712,11 +719,11 @@ static int chown_common(struct dentry * 
8836         newattrs.ia_valid =  ATTR_CTIME;
8837         if (user != (uid_t) -1) {
8838                 newattrs.ia_valid |= ATTR_UID;
8839 -               newattrs.ia_uid = user;
8840 +               newattrs.ia_uid = dx_map_uid(user);
8841         }
8842         if (group != (gid_t) -1) {
8843                 newattrs.ia_valid |= ATTR_GID;
8844 -               newattrs.ia_gid = group;
8845 +               newattrs.ia_gid = dx_map_gid(group);
8846         }
8847         if (!S_ISDIR(inode->i_mode))
8848                 newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
8849 @@ -734,7 +741,7 @@ asmlinkage long sys_chown(const char __u
8850  
8851         error = user_path_walk(filename, &nd);
8852         if (!error) {
8853 -               error = chown_common(nd.dentry, user, group);
8854 +               error = chown_common(nd.dentry, nd.mnt, user, group);
8855                 path_release(&nd);
8856         }
8857         return error;
8858 @@ -753,7 +760,7 @@ asmlinkage long sys_fchownat(int dfd, co
8859         follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
8860         error = __user_walk_fd(dfd, filename, follow, &nd);
8861         if (!error) {
8862 -               error = chown_common(nd.dentry, user, group);
8863 +               error = chown_common(nd.dentry, nd.mnt, user, group);
8864                 path_release(&nd);
8865         }
8866  out:
8867 @@ -767,7 +774,7 @@ asmlinkage long sys_lchown(const char __
8868  
8869         error = user_path_walk_link(filename, &nd);
8870         if (!error) {
8871 -               error = chown_common(nd.dentry, user, group);
8872 +               error = chown_common(nd.dentry, nd.mnt, user, group);
8873                 path_release(&nd);
8874         }
8875         return error;
8876 @@ -781,7 +788,7 @@ asmlinkage long sys_fchown(unsigned int 
8877  
8878         file = fget(fd);
8879         if (file) {
8880 -               error = chown_common(file->f_dentry, user, group);
8881 +               error = chown_common(file->f_dentry, file->f_vfsmnt, user, group);
8882                 fput(file);
8883         }
8884         return error;
8885 @@ -1005,6 +1012,7 @@ repeat:
8886         FD_SET(fd, fdt->open_fds);
8887         FD_CLR(fd, fdt->close_on_exec);
8888         fdt->next_fd = fd + 1;
8889 +       vx_openfd_inc(fd);
8890  #if 1
8891         /* Sanity check */
8892         if (fdt->fd[fd] != NULL) {
8893 @@ -1027,6 +1035,7 @@ static void __put_unused_fd(struct files
8894         __FD_CLR(fd, fdt->open_fds);
8895         if (fd < fdt->next_fd)
8896                 fdt->next_fd = fd;
8897 +       vx_openfd_dec(fd);
8898  }
8899  
8900  void fastcall put_unused_fd(unsigned int fd)
8901 diff -NurpP --minimal linux-2.6.16.20/fs/proc/array.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/array.c
8902 --- linux-2.6.16.20/fs/proc/array.c     2006-02-18 14:40:26 +0100
8903 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/array.c        2006-04-27 20:29:01 +0200
8904 @@ -75,6 +75,9 @@
8905  #include <linux/times.h>
8906  #include <linux/cpuset.h>
8907  #include <linux/rcupdate.h>
8908 +#include <linux/vs_context.h>
8909 +#include <linux/vs_network.h>
8910 +#include <linux/vs_pid.h>
8911  
8912  #include <asm/uaccess.h>
8913  #include <asm/pgtable.h>
8914 @@ -135,7 +138,9 @@ static const char *task_state_array[] = 
8915         "T (stopped)",          /*  4 */
8916         "T (tracing stop)",     /*  8 */
8917         "Z (zombie)",           /* 16 */
8918 -       "X (dead)"              /* 32 */
8919 +       "X (dead)",             /* 32 */
8920 +       "N (noninteractive)",   /* 64 */
8921 +       "H (on hold)"           /* 128 */
8922  };
8923  
8924  static inline const char * get_task_state(struct task_struct *tsk)
8925 @@ -144,7 +149,8 @@ static inline const char * get_task_stat
8926                                             TASK_INTERRUPTIBLE |
8927                                             TASK_UNINTERRUPTIBLE |
8928                                             TASK_STOPPED |
8929 -                                           TASK_TRACED)) |
8930 +                                          TASK_TRACED |
8931 +                                          TASK_ONHOLD)) |
8932                         (tsk->exit_state & (EXIT_ZOMBIE |
8933                                             EXIT_DEAD));
8934         const char **p = &task_state_array[0];
8935 @@ -161,8 +167,13 @@ static inline char * task_state(struct t
8936         struct group_info *group_info;
8937         int g;
8938         struct fdtable *fdt = NULL;
8939 +       pid_t pid, ptgid, tppid, tgid;
8940  
8941         read_lock(&tasklist_lock);
8942 +       tgid = vx_map_tgid(p->tgid);
8943 +       pid = vx_map_pid(p->pid);
8944 +       ptgid = vx_map_pid(p->group_leader->real_parent->tgid);
8945 +       tppid = vx_map_pid(p->parent->pid);
8946         buffer += sprintf(buffer,
8947                 "State:\t%s\n"
8948                 "SleepAVG:\t%lu%%\n"
8949 @@ -174,9 +185,8 @@ static inline char * task_state(struct t
8950                 "Gid:\t%d\t%d\t%d\t%d\n",
8951                 get_task_state(p),
8952                 (p->sleep_avg/1024)*100/(1020000000/1024),
8953 -               p->tgid,
8954 -               p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
8955 -               pid_alive(p) && p->ptrace ? p->parent->pid : 0,
8956 +               tgid, pid, (pid > 1) ? ptgid : 0,
8957 +               pid_alive(p) && p->ptrace ? tppid : 0,
8958                 p->uid, p->euid, p->suid, p->fsuid,
8959                 p->gid, p->egid, p->sgid, p->fsgid);
8960         read_unlock(&tasklist_lock);
8961 @@ -285,17 +295,26 @@ static inline char * task_sig(struct tas
8962  
8963  static inline char *task_cap(struct task_struct *p, char *buffer)
8964  {
8965 -    return buffer + sprintf(buffer, "CapInh:\t%016x\n"
8966 -                           "CapPrm:\t%016x\n"
8967 -                           "CapEff:\t%016x\n",
8968 -                           cap_t(p->cap_inheritable),
8969 -                           cap_t(p->cap_permitted),
8970 -                           cap_t(p->cap_effective));
8971 +       struct vx_info *vxi = p->vx_info;
8972 +
8973 +       return buffer + sprintf(buffer,
8974 +               "CapInh:\t%016x\n"
8975 +               "CapPrm:\t%016x\n"
8976 +               "CapEff:\t%016x\n",
8977 +               (unsigned)vx_info_mbcap(vxi, p->cap_inheritable),
8978 +               (unsigned)vx_info_mbcap(vxi, p->cap_permitted),
8979 +               (unsigned)vx_info_mbcap(vxi, p->cap_effective));
8980  }
8981  
8982  int proc_pid_status(struct task_struct *task, char * buffer)
8983  {
8984         char * orig = buffer;
8985 +#ifdef CONFIG_VSERVER_LEGACY
8986 +       struct vx_info *vxi;
8987 +#endif
8988 +#ifdef CONFIG_VSERVER_LEGACYNET
8989 +       struct nx_info *nxi;
8990 +#endif
8991         struct mm_struct *mm = get_task_mm(task);
8992  
8993         buffer = task_name(task, buffer);
8994 @@ -308,6 +327,46 @@ int proc_pid_status(struct task_struct *
8995         buffer = task_sig(task, buffer);
8996         buffer = task_cap(task, buffer);
8997         buffer = cpuset_task_status_allowed(task, buffer);
8998 +
8999 +       if (task_vx_flags(task, VXF_INFO_HIDE, 0))
9000 +               goto skip;
9001 +#ifdef CONFIG_VSERVER_LEGACY
9002 +       buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task));
9003 +       vxi = task_get_vx_info(task);
9004 +       if (vxi) {
9005 +               buffer += sprintf (buffer,"ctxflags: %08llx\n"
9006 +                       ,(unsigned long long)vxi->vx_flags);
9007 +               buffer += sprintf (buffer,"initpid: %d\n"
9008 +                       ,vxi->vx_initpid);
9009 +       } else {
9010 +               buffer += sprintf (buffer,"ctxflags: none\n");
9011 +               buffer += sprintf (buffer,"initpid: none\n");
9012 +       }
9013 +       put_vx_info(vxi);
9014 +#else
9015 +       buffer += sprintf (buffer,"VxID: %d\n", vx_task_xid(task));
9016 +#endif
9017 +#ifdef CONFIG_VSERVER_LEGACYNET
9018 +       nxi = task_get_nx_info(task);
9019 +       if (nxi) {
9020 +               int i;
9021 +
9022 +               buffer += sprintf (buffer,"ipv4root:");
9023 +               for (i=0; i<nxi->nbipv4; i++){
9024 +                       buffer += sprintf (buffer," %08x/%08x"
9025 +                               ,nxi->ipv4[i]
9026 +                               ,nxi->mask[i]);
9027 +               }
9028 +               *buffer++ = '\n';
9029 +               buffer += sprintf (buffer,"ipv4root_bcast: %08x\n"
9030 +                       ,nxi->v4_bcast);
9031 +       } else {
9032 +               buffer += sprintf (buffer,"ipv4root: 0\n");
9033 +               buffer += sprintf (buffer,"ipv4root_bcast: 0\n");
9034 +       }
9035 +       put_nx_info(nxi);
9036 +#endif
9037 +skip:
9038  #if defined(CONFIG_S390)
9039         buffer = task_show_regs(task, buffer);
9040  #endif
9041 @@ -322,7 +381,7 @@ static int do_task_stat(struct task_stru
9042         sigset_t sigign, sigcatch;
9043         char state;
9044         int res;
9045 -       pid_t ppid, pgid = -1, sid = -1;
9046 +       pid_t pid, ppid, pgid = -1, sid = -1;
9047         int num_threads = 0;
9048         struct mm_struct *mm;
9049         unsigned long long start_time;
9050 @@ -388,7 +447,11 @@ static int do_task_stat(struct task_stru
9051                 }
9052                 it_real_value = task->signal->real_timer.expires;
9053         }
9054 -       ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
9055 +       pid = vx_info_map_pid(task->vx_info, pid_alive(task) ? task->pid : 0);
9056 +       ppid = (!(pid > 1)) ? 0 : vx_info_map_tgid(task->vx_info,
9057 +               task->group_leader->real_parent->tgid);
9058 +       pgid = vx_info_map_pid(task->vx_info, pgid);
9059 +
9060         read_unlock(&tasklist_lock);
9061  
9062         if (!whole || num_threads<2)
9063 @@ -412,10 +475,21 @@ static int do_task_stat(struct task_stru
9064         /* convert nsec -> ticks */
9065         start_time = nsec_to_clock_t(start_time);
9066  
9067 +       /* fixup start time for virt uptime */
9068 +       if (vx_flags(VXF_VIRT_UPTIME, 0)) {
9069 +               unsigned long long bias =
9070 +                       current->vx_info->cvirt.bias_clock;
9071 +
9072 +               if (start_time > bias)
9073 +                       start_time -= bias;
9074 +               else
9075 +                       start_time = 0;
9076 +       }
9077 +
9078         res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
9079  %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
9080  %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
9081 -               task->pid,
9082 +               pid,
9083                 tcomm,
9084                 state,
9085                 ppid,
9086 diff -NurpP --minimal linux-2.6.16.20/fs/proc/base.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/base.c
9087 --- linux-2.6.16.20/fs/proc/base.c      2006-05-11 21:25:36 +0200
9088 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/base.c 2006-04-26 19:07:00 +0200
9089 @@ -72,6 +72,8 @@
9090  #include <linux/cpuset.h>
9091  #include <linux/audit.h>
9092  #include <linux/poll.h>
9093 +#include <linux/vs_network.h>
9094 +#include <linux/vs_pid.h>
9095  #include "internal.h"
9096  
9097  /*
9098 @@ -121,6 +123,8 @@ enum pid_directory_inos {
9099         PROC_TGID_ATTR_EXEC,
9100         PROC_TGID_ATTR_FSCREATE,
9101  #endif
9102 +       PROC_TGID_VX_INFO,
9103 +       PROC_TGID_IP_INFO,
9104  #ifdef CONFIG_AUDITSYSCALL
9105         PROC_TGID_LOGINUID,
9106  #endif
9107 @@ -161,6 +165,8 @@ enum pid_directory_inos {
9108         PROC_TID_ATTR_EXEC,
9109         PROC_TID_ATTR_FSCREATE,
9110  #endif
9111 +       PROC_TID_VX_INFO,
9112 +       PROC_TID_IP_INFO,
9113  #ifdef CONFIG_AUDITSYSCALL
9114         PROC_TID_LOGINUID,
9115  #endif
9116 @@ -216,6 +222,8 @@ static struct pid_entry tgid_base_stuff[
9117  #ifdef CONFIG_CPUSETS
9118         E(PROC_TGID_CPUSET,    "cpuset",  S_IFREG|S_IRUGO),
9119  #endif
9120 +       E(PROC_TGID_VX_INFO,   "vinfo",   S_IFREG|S_IRUGO),
9121 +       E(PROC_TGID_IP_INFO,   "ninfo",   S_IFREG|S_IRUGO),
9122         E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
9123         E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
9124  #ifdef CONFIG_AUDITSYSCALL
9125 @@ -258,6 +266,8 @@ static struct pid_entry tid_base_stuff[]
9126  #ifdef CONFIG_CPUSETS
9127         E(PROC_TID_CPUSET,     "cpuset",  S_IFREG|S_IRUGO),
9128  #endif
9129 +       E(PROC_TID_VX_INFO,    "vinfo",   S_IFREG|S_IRUGO),
9130 +       E(PROC_TID_IP_INFO,    "ninfo",   S_IFREG|S_IRUGO),
9131         E(PROC_TID_OOM_SCORE,  "oom_score",S_IFREG|S_IRUGO),
9132         E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
9133  #ifdef CONFIG_AUDITSYSCALL
9134 @@ -541,6 +551,11 @@ static int proc_check_chroot(struct dent
9135         struct dentry *de, *base;
9136         struct vfsmount *our_vfsmnt, *mnt;
9137         int res = 0;
9138 +
9139 +       /* context admin override */
9140 +       if (capable(CAP_CONTEXT))
9141 +               goto override;
9142 +
9143         read_lock(&current->fs->lock);
9144         our_vfsmnt = mntget(current->fs->rootmnt);
9145         base = dget(current->fs->root);
9146 @@ -550,11 +565,11 @@ static int proc_check_chroot(struct dent
9147         de = root;
9148         mnt = vfsmnt;
9149  
9150 -       while (vfsmnt != our_vfsmnt) {
9151 -               if (vfsmnt == vfsmnt->mnt_parent)
9152 +       while (mnt != our_vfsmnt) {
9153 +               if (mnt == mnt->mnt_parent)
9154                         goto out;
9155 -               de = vfsmnt->mnt_mountpoint;
9156 -               vfsmnt = vfsmnt->mnt_parent;
9157 +               de = mnt->mnt_mountpoint;
9158 +               mnt = mnt->mnt_parent;
9159         }
9160  
9161         if (!is_subdir(de, base))
9162 @@ -564,8 +579,9 @@ static int proc_check_chroot(struct dent
9163  exit:
9164         dput(base);
9165         mntput(our_vfsmnt);
9166 +override:
9167         dput(root);
9168 -       mntput(mnt);
9169 +       mntput(vfsmnt);
9170         return res;
9171  out:
9172         spin_unlock(&vfsmount_lock);
9173 @@ -1225,7 +1241,7 @@ static int proc_pident_readdir(struct fi
9174         struct inode *inode = dentry->d_inode;
9175         struct pid_entry *p;
9176         ino_t ino;
9177 -       int ret;
9178 +       int ret, hide;
9179  
9180         ret = -ENOENT;
9181         if (!pid_alive(proc_task(inode)))
9182 @@ -1256,11 +1272,20 @@ static int proc_pident_readdir(struct fi
9183                         goto out;
9184                 }
9185                 p = ents + i;
9186 +               hide = vx_flags(VXF_INFO_HIDE, 0);
9187                 while (p->name) {
9188 +                       if (hide) {
9189 +                               switch (p->type) {
9190 +                               case PROC_TGID_VX_INFO:
9191 +                               case PROC_TGID_IP_INFO:
9192 +                                       goto skip;
9193 +                               }
9194 +                       }
9195                         if (filldir(dirent, p->name, p->len, filp->f_pos,
9196                                     fake_ino(pid, p->type), p->mode >> 12) < 0)
9197                                 goto out;
9198                         filp->f_pos++;
9199 +               skip:
9200                         p++;
9201                 }
9202         }
9203 @@ -1334,6 +1359,8 @@ static struct inode *proc_pid_make_inode
9204                 inode->i_uid = task->euid;
9205                 inode->i_gid = task->egid;
9206         }
9207 +       /* procfs is xid tagged */
9208 +       inode->i_tag = (tag_t)vx_task_xid(task);
9209         security_task_to_inode(task, inode);
9210  
9211  out:
9212 @@ -1359,6 +1386,11 @@ static int pid_revalidate(struct dentry 
9213  {
9214         struct inode *inode = dentry->d_inode;
9215         struct task_struct *task = proc_task(inode);
9216 +
9217 +       if (!vx_check(vx_task_xid(task), VX_IDENT))
9218 +               goto out_drop;
9219 +       /* discard wrong fakeinit */
9220 +
9221         if (pid_alive(task)) {
9222                 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) {
9223                         inode->i_uid = task->euid;
9224 @@ -1370,6 +1402,7 @@ static int pid_revalidate(struct dentry 
9225                 security_task_to_inode(task, inode);
9226                 return 1;
9227         }
9228 +out_drop:
9229         d_drop(dentry);
9230         return 0;
9231  }
9232 @@ -1609,6 +1642,9 @@ static struct file_operations proc_tgid_
9233  static struct inode_operations proc_tgid_attr_inode_operations;
9234  #endif
9235  
9236 +extern int proc_pid_vx_info(struct task_struct *, char *);
9237 +extern int proc_pid_nx_info(struct task_struct *, char *);
9238 +
9239  static int get_tid_list(int index, unsigned int *tids, struct inode *dir);
9240  
9241  /* SMP-safe */
9242 @@ -1796,15 +1832,33 @@ static struct dentry *proc_pident_lookup
9243                         inode->i_fop = &proc_loginuid_operations;
9244                         break;
9245  #endif
9246 +               case PROC_TID_VX_INFO:
9247 +               case PROC_TGID_VX_INFO:
9248 +                       if (task_vx_flags(task, VXF_INFO_HIDE, 0))
9249 +                               goto out_noent;
9250 +                       inode->i_fop = &proc_info_file_operations;
9251 +                       ei->op.proc_read = proc_pid_vx_info;
9252 +                       break;
9253 +               case PROC_TID_IP_INFO:
9254 +               case PROC_TGID_IP_INFO:
9255 +                       if (task_vx_flags(task, VXF_INFO_HIDE, 0))
9256 +                               goto out_noent;
9257 +                       inode->i_fop = &proc_info_file_operations;
9258 +                       ei->op.proc_read = proc_pid_nx_info;
9259 +                       break;
9260                 default:
9261                         printk("procfs: impossible type (%d)",p->type);
9262 -                       iput(inode);
9263 -                       return ERR_PTR(-EINVAL);
9264 +                       error = -EINVAL;
9265 +                       goto out_put;
9266         }
9267         dentry->d_op = &pid_dentry_operations;
9268         d_add(dentry, inode);
9269         return NULL;
9270  
9271 +out_noent:
9272 +       error=-ENOENT;
9273 +out_put:
9274 +       iput(inode);
9275  out:
9276         return ERR_PTR(error);
9277  }
9278 @@ -1888,14 +1942,14 @@ static int proc_self_readlink(struct den
9279                               int buflen)
9280  {
9281         char tmp[30];
9282 -       sprintf(tmp, "%d", current->tgid);
9283 +       sprintf(tmp, "%d", vx_map_tgid(current->tgid));
9284         return vfs_readlink(dentry,buffer,buflen,tmp);
9285  }
9286  
9287  static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
9288  {
9289         char tmp[30];
9290 -       sprintf(tmp, "%d", current->tgid);
9291 +       sprintf(tmp, "%d", vx_map_tgid(current->tgid));
9292         return ERR_PTR(vfs_follow_link(nd,tmp));
9293  }      
9294  
9295 @@ -1958,6 +2012,20 @@ void proc_pid_flush(struct dentry *proc_
9296         }
9297  }
9298  
9299 +#define VXF_FAKE_INIT  (VXF_INFO_INIT|VXF_STATE_INIT)
9300 +
9301 +static inline int proc_pid_visible(struct task_struct *task, int pid)
9302 +{
9303 +       if ((pid == 1) &&
9304 +               !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
9305 +               goto visible;
9306 +       if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
9307 +               goto visible;
9308 +       return 0;
9309 +visible:
9310 +       return 1;
9311 +}
9312 +
9313  /* SMP-safe */
9314  struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
9315  {
9316 @@ -1994,13 +2062,14 @@ struct dentry *proc_pid_lookup(struct in
9317         if (!task)
9318                 goto out;
9319  
9320 -       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
9321 +       /* check for context visibility */
9322 +       if (!proc_pid_visible(task, tgid))
9323 +               goto out_drop_task;
9324  
9325 +       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
9326 +       if (!inode)
9327 +               goto out_drop_task;
9328  
9329 -       if (!inode) {
9330 -               put_task_struct(task);
9331 -               goto out;
9332 -       }
9333         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
9334         inode->i_op = &proc_tgid_base_inode_operations;
9335         inode->i_fop = &proc_tgid_base_operations;
9336 @@ -2029,6 +2098,8 @@ struct dentry *proc_pid_lookup(struct in
9337                 goto out;
9338         }
9339         return NULL;
9340 +out_drop_task:
9341 +       put_task_struct(task);
9342  out:
9343         return ERR_PTR(-ENOENT);
9344  }
9345 @@ -2044,6 +2115,8 @@ static struct dentry *proc_task_lookup(s
9346         tid = name_to_int(dentry);
9347         if (tid == ~0U)
9348                 goto out;
9349 +       if (vx_current_initpid(tid))
9350 +               goto out;
9351  
9352         read_lock(&tasklist_lock);
9353         task = find_task_by_pid(tid);
9354 @@ -2055,11 +2128,14 @@ static struct dentry *proc_task_lookup(s
9355         if (leader->tgid != task->tgid)
9356                 goto out_drop_task;
9357  
9358 -       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
9359 -
9360 +       /* check for context visibility */
9361 +       if (!proc_pid_visible(task, tid))
9362 +               goto out_drop_task;
9363  
9364 +       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
9365         if (!inode)
9366                 goto out_drop_task;
9367 +
9368         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
9369         inode->i_op = &proc_tid_base_inode_operations;
9370         inode->i_fop = &proc_tid_base_operations;
9371 @@ -2099,7 +2175,7 @@ static int get_tgid_list(int index, unsi
9372         read_lock(&tasklist_lock);
9373         p = NULL;
9374         if (version) {
9375 -               p = find_task_by_pid(version);
9376 +               p = find_task_by_real_pid(version);
9377                 if (p && !thread_group_leader(p))
9378                         p = NULL;
9379         }
9380 @@ -2111,11 +2187,15 @@ static int get_tgid_list(int index, unsi
9381  
9382         for ( ; p != &init_task; p = next_task(p)) {
9383                 int tgid = p->pid;
9384 +
9385                 if (!pid_alive(p))
9386                         continue;
9387 +               /* check for context visibility */
9388 +               if (!proc_pid_visible(p, tgid))
9389 +                       continue;
9390                 if (--index >= 0)
9391                         continue;
9392 -               tgids[nr_tgids] = tgid;
9393 +               tgids[nr_tgids] = vx_map_tgid(tgid);
9394                 nr_tgids++;
9395                 if (nr_tgids >= PROC_MAXPIDS)
9396                         break;
9397 @@ -2145,10 +2225,13 @@ static int get_tid_list(int index, unsig
9398         if (pid_alive(task)) do {
9399                 int tid = task->pid;
9400  
9401 +               /* check for context visibility */
9402 +               if (!proc_pid_visible(task, tid))
9403 +                       continue;
9404                 if (--index >= 0)
9405                         continue;
9406                 if (tids != NULL)
9407 -                       tids[nr_tids] = tid;
9408 +                       tids[nr_tids] = vx_map_pid(tid);
9409                 nr_tids++;
9410                 if (nr_tids >= PROC_MAXPIDS)
9411                         break;
9412 @@ -2224,11 +2307,14 @@ static int proc_task_readdir(struct file
9413         unsigned int nr_tids, i;
9414         struct dentry *dentry = filp->f_dentry;
9415         struct inode *inode = dentry->d_inode;
9416 +       struct task_struct *task = proc_task(inode);
9417         int retval = -ENOENT;
9418         ino_t ino;
9419         unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
9420  
9421 -       if (!pid_alive(proc_task(inode)))
9422 +       if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
9423 +               goto out;
9424 +       if (!pid_alive(task))
9425                 goto out;
9426         retval = 0;
9427  
9428 diff -NurpP --minimal linux-2.6.16.20/fs/proc/generic.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/generic.c
9429 --- linux-2.6.16.20/fs/proc/generic.c   2006-02-18 14:40:26 +0100
9430 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/generic.c      2006-04-26 19:07:00 +0200
9431 @@ -19,6 +19,7 @@
9432  #include <linux/idr.h>
9433  #include <linux/namei.h>
9434  #include <linux/bitops.h>
9435 +#include <linux/vserver/inode.h>
9436  #include <asm/uaccess.h>
9437  
9438  #include "internal.h"
9439 @@ -385,11 +386,15 @@ struct dentry *proc_lookup(struct inode 
9440                 for (de = de->subdir; de ; de = de->next) {
9441                         if (de->namelen != dentry->d_name.len)
9442                                 continue;
9443 +                       if (!vx_hide_check(0, de->vx_flags))
9444 +                               continue;
9445                         if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
9446                                 unsigned int ino = de->low_ino;
9447  
9448                                 error = -EINVAL;
9449                                 inode = proc_get_inode(dir->i_sb, ino, de);
9450 +                               /* generic proc entries belong to the host */
9451 +                               inode->i_tag = 0;
9452                                 break;
9453                         }
9454                 }
9455 @@ -461,9 +466,12 @@ int proc_readdir(struct file * filp,
9456                         }
9457  
9458                         do {
9459 +                               if (!vx_hide_check(0, de->vx_flags))
9460 +                                       goto skip;
9461                                 if (filldir(dirent, de->name, de->namelen, filp->f_pos,
9462                                             de->low_ino, de->mode >> 12) < 0)
9463                                         goto out;
9464 +                       skip:
9465                                 filp->f_pos++;
9466                                 de = de->next;
9467                         } while (de);
9468 @@ -581,6 +589,7 @@ static struct proc_dir_entry *proc_creat
9469         ent->namelen = len;
9470         ent->mode = mode;
9471         ent->nlink = nlink;
9472 +       ent->vx_flags = IATTR_PROC_DEFAULT;
9473   out:
9474         return ent;
9475  }
9476 @@ -601,7 +610,8 @@ struct proc_dir_entry *proc_symlink(cons
9477                                 kfree(ent->data);
9478                                 kfree(ent);
9479                                 ent = NULL;
9480 -                       }
9481 +                       } else
9482 +                               ent->vx_flags = IATTR_PROC_SYMLINK;
9483                 } else {
9484                         kfree(ent);
9485                         ent = NULL;
9486 diff -NurpP --minimal linux-2.6.16.20/fs/proc/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/inode.c
9487 --- linux-2.6.16.20/fs/proc/inode.c     2006-04-09 13:49:54 +0200
9488 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/inode.c        2006-04-26 19:07:00 +0200
9489 @@ -170,6 +170,8 @@ struct inode *proc_get_inode(struct supe
9490                         inode->i_uid = de->uid;
9491                         inode->i_gid = de->gid;
9492                 }
9493 +               if (de->vx_flags)
9494 +                       PROC_I(inode)->vx_flags = de->vx_flags;
9495                 if (de->size)
9496                         inode->i_size = de->size;
9497                 if (de->nlink)
9498 diff -NurpP --minimal linux-2.6.16.20/fs/proc/proc_misc.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/proc_misc.c
9499 --- linux-2.6.16.20/fs/proc/proc_misc.c 2006-05-11 21:25:36 +0200
9500 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/proc_misc.c    2006-05-03 16:06:42 +0200
9501 @@ -53,6 +53,8 @@
9502  #include <asm/div64.h>
9503  #include "internal.h"
9504  
9505 +#include <linux/vs_cvirt.h>
9506 +
9507  #define LOAD_INT(x) ((x) >> FSHIFT)
9508  #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
9509  /*
9510 @@ -82,17 +84,32 @@ static int proc_calc_metrics(char *page,
9511  static int loadavg_read_proc(char *page, char **start, off_t off,
9512                                  int count, int *eof, void *data)
9513  {
9514 +       unsigned int running, threads;
9515         int a, b, c;
9516         int len;
9517  
9518 -       a = avenrun[0] + (FIXED_1/200);
9519 -       b = avenrun[1] + (FIXED_1/200);
9520 -       c = avenrun[2] + (FIXED_1/200);
9521 -       len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
9522 +       if (vx_flags(VXF_VIRT_LOAD, 0)) {
9523 +               struct vx_info *vxi = current->vx_info;
9524 +
9525 +               a = vxi->cvirt.load[0] + (FIXED_1/200);
9526 +               b = vxi->cvirt.load[1] + (FIXED_1/200);
9527 +               c = vxi->cvirt.load[2] + (FIXED_1/200);
9528 +
9529 +               running = atomic_read(&vxi->cvirt.nr_running);
9530 +               threads = atomic_read(&vxi->cvirt.nr_threads);
9531 +       } else {
9532 +               a = avenrun[0] + (FIXED_1/200);
9533 +               b = avenrun[1] + (FIXED_1/200);
9534 +               c = avenrun[2] + (FIXED_1/200);
9535 +
9536 +               running = nr_running();
9537 +               threads = nr_threads;
9538 +       }
9539 +       len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n",
9540                 LOAD_INT(a), LOAD_FRAC(a),
9541                 LOAD_INT(b), LOAD_FRAC(b),
9542                 LOAD_INT(c), LOAD_FRAC(c),
9543 -               nr_running(), nr_threads, last_pid);
9544 +               running, threads, last_pid);
9545         return proc_calc_metrics(page, start, off, count, eof, len);
9546  }
9547  
9548 @@ -106,6 +123,9 @@ static int uptime_read_proc(char *page, 
9549  
9550         do_posix_clock_monotonic_gettime(&uptime);
9551         cputime_to_timespec(idletime, &idle);
9552 +       if (vx_flags(VXF_VIRT_UPTIME, 0))
9553 +               vx_vsi_uptime(&uptime, &idle);
9554 +
9555         len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
9556                         (unsigned long) uptime.tv_sec,
9557                         (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
9558 @@ -143,7 +163,7 @@ static int meminfo_read_proc(char *page,
9559                 * sysctl_overcommit_ratio / 100) + total_swap_pages;
9560  
9561         cached = get_page_cache_size() - total_swapcache_pages - i.bufferram;
9562 -       if (cached < 0)
9563 +       if (cached < 0 || vx_flags(VXF_VIRT_MEM, 0))
9564                 cached = 0;
9565  
9566         get_vmalloc_info(&vmi);
9567 @@ -238,8 +258,9 @@ static int version_read_proc(char *page,
9568  {
9569         int len;
9570  
9571 -       strcpy(page, linux_banner);
9572 -       len = strlen(page);
9573 +       len = sprintf(page, vx_linux_banner,
9574 +               vx_new_uts(release),
9575 +               vx_new_uts(version));
9576         return proc_calc_metrics(page, start, off, count, eof, len);
9577  }
9578  
9579 diff -NurpP --minimal linux-2.6.16.20/fs/proc/root.c linux-2.6.16.20-vs2.1.1-rc22/fs/proc/root.c
9580 --- linux-2.6.16.20/fs/proc/root.c      2006-04-09 13:49:54 +0200
9581 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/proc/root.c 2006-04-26 19:07:00 +0200
9582 @@ -25,6 +25,9 @@ struct proc_dir_entry *proc_net, *proc_n
9583  #ifdef CONFIG_SYSCTL
9584  struct proc_dir_entry *proc_sys_root;
9585  #endif
9586 +struct proc_dir_entry *proc_virtual;
9587 +
9588 +extern void proc_vx_init(void);
9589  
9590  static struct super_block *proc_get_sb(struct file_system_type *fs_type,
9591         int flags, const char *dev_name, void *data)
9592 @@ -78,6 +81,7 @@ void __init proc_root_init(void)
9593         proc_device_tree_init();
9594  #endif
9595         proc_bus = proc_mkdir("bus", NULL);
9596 +       proc_vx_init();
9597  }
9598  
9599  static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
9600 diff -NurpP --minimal linux-2.6.16.20/fs/quota.c linux-2.6.16.20-vs2.1.1-rc22/fs/quota.c
9601 --- linux-2.6.16.20/fs/quota.c  2006-02-18 14:40:26 +0100
9602 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/quota.c     2006-04-27 21:28:28 +0200
9603 @@ -17,47 +17,50 @@
9604  #include <linux/buffer_head.h>
9605  #include <linux/capability.h>
9606  #include <linux/quotaops.h>
9607 +#include <linux/major.h>
9608 +#include <linux/blkdev.h>
9609 +#include <linux/vserver/debug.h>
9610  
9611  /* Check validity of generic quotactl commands */
9612 -static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id)
9613 +static int generic_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id)
9614  {
9615         if (type >= MAXQUOTAS)
9616                 return -EINVAL;
9617 -       if (!sb && cmd != Q_SYNC)
9618 +       if (!hash && cmd != Q_SYNC)
9619                 return -ENODEV;
9620         /* Is operation supported? */
9621 -       if (sb && !sb->s_qcop)
9622 +       if (hash && !hash->dqh_qcop)
9623                 return -ENOSYS;
9624  
9625         switch (cmd) {
9626                 case Q_GETFMT:
9627                         break;
9628                 case Q_QUOTAON:
9629 -                       if (!sb->s_qcop->quota_on)
9630 +                       if (!hash->dqh_qcop->quota_on)
9631                                 return -ENOSYS;
9632                         break;
9633                 case Q_QUOTAOFF:
9634 -                       if (!sb->s_qcop->quota_off)
9635 +                       if (!hash->dqh_qcop->quota_off)
9636                                 return -ENOSYS;
9637                         break;
9638                 case Q_SETINFO:
9639 -                       if (!sb->s_qcop->set_info)
9640 +                       if (!hash->dqh_qcop->set_info)
9641                                 return -ENOSYS;
9642                         break;
9643                 case Q_GETINFO:
9644 -                       if (!sb->s_qcop->get_info)
9645 +                       if (!hash->dqh_qcop->get_info)
9646                                 return -ENOSYS;
9647                         break;
9648                 case Q_SETQUOTA:
9649 -                       if (!sb->s_qcop->set_dqblk)
9650 +                       if (!hash->dqh_qcop->set_dqblk)
9651                                 return -ENOSYS;
9652                         break;
9653                 case Q_GETQUOTA:
9654 -                       if (!sb->s_qcop->get_dqblk)
9655 +                       if (!hash->dqh_qcop->get_dqblk)
9656                                 return -ENOSYS;
9657                         break;
9658                 case Q_SYNC:
9659 -                       if (sb && !sb->s_qcop->quota_sync)
9660 +                       if (hash && !hash->dqh_qcop->quota_sync)
9661                                 return -ENOSYS;
9662                         break;
9663                 default:
9664 @@ -73,7 +76,7 @@ static int generic_quotactl_valid(struct
9665                 case Q_SETQUOTA:
9666                 case Q_GETQUOTA:
9667                         /* This is just informative test so we are satisfied without a lock */
9668 -                       if (!sb_has_quota_enabled(sb, type))
9669 +                       if (!dqh_has_quota_enabled(hash, type))
9670                                 return -ESRCH;
9671         }
9672  
9673 @@ -81,47 +84,47 @@ static int generic_quotactl_valid(struct
9674         if (cmd == Q_GETQUOTA) {
9675                 if (((type == USRQUOTA && current->euid != id) ||
9676                      (type == GRPQUOTA && !in_egroup_p(id))) &&
9677 -                   !capable(CAP_SYS_ADMIN))
9678 +                   !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
9679                         return -EPERM;
9680         }
9681         else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO)
9682 -               if (!capable(CAP_SYS_ADMIN))
9683 +               if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
9684                         return -EPERM;
9685  
9686         return 0;
9687  }
9688  
9689  /* Check validity of XFS Quota Manager commands */
9690 -static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id)
9691 +static int xqm_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id)
9692  {
9693         if (type >= XQM_MAXQUOTAS)
9694                 return -EINVAL;
9695 -       if (!sb)
9696 +       if (!hash)
9697                 return -ENODEV;
9698 -       if (!sb->s_qcop)
9699 +       if (!hash->dqh_qcop)
9700                 return -ENOSYS;
9701  
9702         switch (cmd) {
9703                 case Q_XQUOTAON:
9704                 case Q_XQUOTAOFF:
9705                 case Q_XQUOTARM:
9706 -                       if (!sb->s_qcop->set_xstate)
9707 +                       if (!hash->dqh_qcop->set_xstate)
9708                                 return -ENOSYS;
9709                         break;
9710                 case Q_XGETQSTAT:
9711 -                       if (!sb->s_qcop->get_xstate)
9712 +                       if (!hash->dqh_qcop->get_xstate)
9713                                 return -ENOSYS;
9714                         break;
9715                 case Q_XSETQLIM:
9716 -                       if (!sb->s_qcop->set_xquota)
9717 +                       if (!hash->dqh_qcop->set_xquota)
9718                                 return -ENOSYS;
9719                         break;
9720                 case Q_XGETQUOTA:
9721 -                       if (!sb->s_qcop->get_xquota)
9722 +                       if (!hash->dqh_qcop->get_xquota)
9723                                 return -ENOSYS;
9724                         break;
9725                 case Q_XQUOTASYNC:
9726 -                       if (!sb->s_qcop->quota_sync)
9727 +                       if (!hash->dqh_qcop->quota_sync)
9728                                 return -ENOSYS;
9729                         break;
9730                 default:
9731 @@ -132,57 +135,68 @@ static int xqm_quotactl_valid(struct sup
9732         if (cmd == Q_XGETQUOTA) {
9733                 if (((type == XQM_USRQUOTA && current->euid != id) ||
9734                      (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
9735 -                    !capable(CAP_SYS_ADMIN))
9736 +                    !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
9737                         return -EPERM;
9738         } else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
9739 -               if (!capable(CAP_SYS_ADMIN))
9740 +               if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
9741                         return -EPERM;
9742         }
9743  
9744         return 0;
9745  }
9746  
9747 -static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t id)
9748 +static int check_quotactl_valid(struct dqhash *hash, int type, int cmd, qid_t id)
9749  {
9750         int error;
9751  
9752         if (XQM_COMMAND(cmd))
9753 -               error = xqm_quotactl_valid(sb, type, cmd, id);
9754 +               error = xqm_quotactl_valid(hash, type, cmd, id);
9755         else
9756 -               error = generic_quotactl_valid(sb, type, cmd, id);
9757 +               error = generic_quotactl_valid(hash, type, cmd, id);
9758         if (!error)
9759 -               error = security_quotactl(cmd, type, id, sb);
9760 +               error = security_quotactl(cmd, type, id, hash);
9761         return error;
9762  }
9763  
9764 -static void quota_sync_sb(struct super_block *sb, int type)
9765 +static void quota_sync_sb(struct super_block *sb)
9766  {
9767 -       int cnt;
9768 -       struct inode *discard[MAXQUOTAS];
9769 -
9770 -       sb->s_qcop->quota_sync(sb, type);
9771         /* This is not very clever (and fast) but currently I don't know about
9772          * any other simple way of getting quota data to disk and we must get
9773          * them there for userspace to be visible... */
9774         if (sb->s_op->sync_fs)
9775                 sb->s_op->sync_fs(sb, 1);
9776         sync_blockdev(sb->s_bdev);
9777 +}
9778 +
9779 +static void quota_sync_dqh(struct dqhash *hash, int type)
9780 +{
9781 +       int cnt;
9782 +       struct inode *discard[MAXQUOTAS];
9783 +
9784 +       vxdprintk(VXD_CBIT(quota, 1),
9785 +               "quota_sync_dqh(%p,%d)", hash, type);
9786 +       hash->dqh_qcop->quota_sync(hash, type);
9787 +
9788 +       quota_sync_sb(hash->dqh_sb);
9789  
9790         /* Now when everything is written we can discard the pagecache so
9791          * that userspace sees the changes. We need i_mutex and so we could
9792          * not do it inside dqonoff_sem. Moreover we need to be carefull
9793          * about races with quotaoff() (that is the reason why we have own
9794          * reference to inode). */
9795 -       down(&sb_dqopt(sb)->dqonoff_sem);
9796 +       down(&dqh_dqopt(hash)->dqonoff_sem);
9797         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
9798                 discard[cnt] = NULL;
9799                 if (type != -1 && cnt != type)
9800                         continue;
9801 -               if (!sb_has_quota_enabled(sb, cnt))
9802 +               if (!dqh_has_quota_enabled(hash, cnt))
9803                         continue;
9804 -               discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]);
9805 +               vxdprintk(VXD_CBIT(quota, 0),
9806 +                       "quota_sync_dqh(%p,%d) discard inode %p",
9807 +                       hash, type, dqh_dqopt(hash)->files[cnt]);
9808 +               discard[cnt] = igrab(dqh_dqopt(hash)->files[cnt]);
9809         }
9810 -       up(&sb_dqopt(sb)->dqonoff_sem);
9811 +       up(&dqh_dqopt(hash)->dqonoff_sem);
9812         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
9813                 if (discard[cnt]) {
9814                         mutex_lock(&discard[cnt]->i_mutex);
9815 @@ -193,67 +207,59 @@ static void quota_sync_sb(struct super_b
9816         }
9817  }
9818  
9819 -void sync_dquots(struct super_block *sb, int type)
9820 +void sync_dquots_dqh(struct dqhash *hash, int type)
9821  {
9822 -       int cnt, dirty;
9823 +       vxdprintk(VXD_CBIT(quota, 1),
9824 +               "sync_dquots_dqh(%p,%d)", hash, type);
9825  
9826 -       if (sb) {
9827 -               if (sb->s_qcop->quota_sync)
9828 -                       quota_sync_sb(sb, type);
9829 -               return;
9830 -       }
9831 +       if (hash->dqh_qcop->quota_sync)
9832 +               quota_sync_dqh(hash, type);
9833 +}
9834  
9835 -       spin_lock(&sb_lock);
9836 -restart:
9837 -       list_for_each_entry(sb, &super_blocks, s_list) {
9838 -               /* This test just improves performance so it needn't be reliable... */
9839 -               for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
9840 -                       if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
9841 -                           && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
9842 -                               dirty = 1;
9843 -               if (!dirty)
9844 -                       continue;
9845 -               sb->s_count++;
9846 -               spin_unlock(&sb_lock);
9847 -               down_read(&sb->s_umount);
9848 -               if (sb->s_root && sb->s_qcop->quota_sync)
9849 -                       quota_sync_sb(sb, type);
9850 -               up_read(&sb->s_umount);
9851 -               spin_lock(&sb_lock);
9852 -               if (__put_super_and_need_restart(sb))
9853 -                       goto restart;
9854 +void sync_dquots(struct dqhash *hash, int type)
9855 +
9856 +{
9857 +       vxdprintk(VXD_CBIT(quota, 1),
9858 +               "sync_dquots(%p,%d)", hash, type);
9859 +
9860 +       if (hash) {
9861 +               if (hash->dqh_qcop->quota_sync)
9862 +                       quota_sync_dqh(hash, type);
9863 +               return;
9864         }
9865 -       spin_unlock(&sb_lock);
9866  }
9867  
9868  /* Copy parameters and call proper function */
9869 -static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void __user *addr)
9870 +static int do_quotactl(struct dqhash *hash, int type, int cmd, qid_t id, void __user *addr)
9871  {
9872         int ret;
9873  
9874 +       vxdprintk(VXD_CBIT(quota, 3),
9875 +               "do_quotactl(%p,%d,cmd=%d,id=%d,%p)", hash, type, cmd, id, addr);
9876 +
9877         switch (cmd) {
9878                 case Q_QUOTAON: {
9879                         char *pathname;
9880  
9881                         if (IS_ERR(pathname = getname(addr)))
9882                                 return PTR_ERR(pathname);
9883 -                       ret = sb->s_qcop->quota_on(sb, type, id, pathname);
9884 +                       ret = hash->dqh_qcop->quota_on(hash, type, id, pathname);
9885                         putname(pathname);
9886                         return ret;
9887                 }
9888                 case Q_QUOTAOFF:
9889 -                       return sb->s_qcop->quota_off(sb, type);
9890 +                       return hash->dqh_qcop->quota_off(hash, type);
9891  
9892                 case Q_GETFMT: {
9893                         __u32 fmt;
9894  
9895 -                       down_read(&sb_dqopt(sb)->dqptr_sem);
9896 -                       if (!sb_has_quota_enabled(sb, type)) {
9897 -                               up_read(&sb_dqopt(sb)->dqptr_sem);
9898 +                       down_read(&dqh_dqopt(hash)->dqptr_sem);
9899 +                       if (!dqh_has_quota_enabled(hash, type)) {
9900 +                               up_read(&dqh_dqopt(hash)->dqptr_sem);
9901                                 return -ESRCH;
9902                         }
9903 -                       fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id;
9904 -                       up_read(&sb_dqopt(sb)->dqptr_sem);
9905 +                       fmt = dqh_dqopt(hash)->info[type].dqi_format->qf_fmt_id;
9906 +                       up_read(&dqh_dqopt(hash)->dqptr_sem);
9907                         if (copy_to_user(addr, &fmt, sizeof(fmt)))
9908                                 return -EFAULT;
9909                         return 0;
9910 @@ -261,7 +267,7 @@ static int do_quotactl(struct super_bloc
9911                 case Q_GETINFO: {
9912                         struct if_dqinfo info;
9913  
9914 -                       if ((ret = sb->s_qcop->get_info(sb, type, &info)))
9915 +                       if ((ret = hash->dqh_qcop->get_info(hash, type, &info)))
9916                                 return ret;
9917                         if (copy_to_user(addr, &info, sizeof(info)))
9918                                 return -EFAULT;
9919 @@ -272,12 +278,12 @@ static int do_quotactl(struct super_bloc
9920  
9921                         if (copy_from_user(&info, addr, sizeof(info)))
9922                                 return -EFAULT;
9923 -                       return sb->s_qcop->set_info(sb, type, &info);
9924 +                       return hash->dqh_qcop->set_info(hash, type, &info);
9925                 }
9926                 case Q_GETQUOTA: {
9927                         struct if_dqblk idq;
9928  
9929 -                       if ((ret = sb->s_qcop->get_dqblk(sb, type, id, &idq)))
9930 +                       if ((ret = hash->dqh_qcop->get_dqblk(hash, type, id, &idq)))
9931                                 return ret;
9932                         if (copy_to_user(addr, &idq, sizeof(idq)))
9933                                 return -EFAULT;
9934 @@ -288,10 +294,10 @@ static int do_quotactl(struct super_bloc
9935  
9936                         if (copy_from_user(&idq, addr, sizeof(idq)))
9937                                 return -EFAULT;
9938 -                       return sb->s_qcop->set_dqblk(sb, type, id, &idq);
9939 +                       return hash->dqh_qcop->set_dqblk(hash, type, id, &idq);
9940                 }
9941                 case Q_SYNC:
9942 -                       sync_dquots(sb, type);
9943 +                       sync_dquots_dqh(hash, type);
9944                         return 0;
9945  
9946                 case Q_XQUOTAON:
9947 @@ -301,12 +307,12 @@ static int do_quotactl(struct super_bloc
9948  
9949                         if (copy_from_user(&flags, addr, sizeof(flags)))
9950                                 return -EFAULT;
9951 -                       return sb->s_qcop->set_xstate(sb, flags, cmd);
9952 +                       return hash->dqh_qcop->set_xstate(hash, flags, cmd);
9953                 }
9954                 case Q_XGETQSTAT: {
9955                         struct fs_quota_stat fqs;
9956                 
9957 -                       if ((ret = sb->s_qcop->get_xstate(sb, &fqs)))
9958 +                       if ((ret = hash->dqh_qcop->get_xstate(hash, &fqs)))
9959                                 return ret;
9960                         if (copy_to_user(addr, &fqs, sizeof(fqs)))
9961                                 return -EFAULT;
9962 @@ -317,19 +323,19 @@ static int do_quotactl(struct super_bloc
9963  
9964                         if (copy_from_user(&fdq, addr, sizeof(fdq)))
9965                                 return -EFAULT;
9966 -                      return sb->s_qcop->set_xquota(sb, type, id, &fdq);
9967 +                      return hash->dqh_qcop->set_xquota(hash, type, id, &fdq);
9968                 }
9969                 case Q_XGETQUOTA: {
9970                         struct fs_disk_quota fdq;
9971  
9972 -                       if ((ret = sb->s_qcop->get_xquota(sb, type, id, &fdq)))
9973 +                       if ((ret = hash->dqh_qcop->get_xquota(hash, type, id, &fdq)))
9974                                 return ret;
9975                         if (copy_to_user(addr, &fdq, sizeof(fdq)))
9976                                 return -EFAULT;
9977                         return 0;
9978                 }
9979                 case Q_XQUOTASYNC:
9980 -                       return sb->s_qcop->quota_sync(sb, type);
9981 +                       return hash->dqh_qcop->quota_sync(hash, type);
9982                 /* We never reach here unless validity check is broken */
9983                 default:
9984                         BUG();
9985 @@ -337,6 +343,43 @@ static int do_quotactl(struct super_bloc
9986         return 0;
9987  }
9988  
9989 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
9990 +
9991 +#include <linux/vroot.h>
9992 +#include <linux/kallsyms.h>
9993 +
9994 +static vroot_grb_func *vroot_get_real_bdev = NULL;
9995 +
9996 +static spinlock_t vroot_grb_lock = SPIN_LOCK_UNLOCKED;
9997 +
9998 +int register_vroot_grb(vroot_grb_func *func) {
9999 +       int ret = -EBUSY;
10000 +
10001 +       spin_lock(&vroot_grb_lock);
10002 +       if (!vroot_get_real_bdev) {
10003 +               vroot_get_real_bdev = func;
10004 +               ret = 0;
10005 +       }
10006 +       spin_unlock(&vroot_grb_lock);
10007 +       return ret;
10008 +}
10009 +EXPORT_SYMBOL(register_vroot_grb);
10010 +
10011 +int unregister_vroot_grb(vroot_grb_func *func) {
10012 +       int ret = -EINVAL;
10013 +
10014 +       spin_lock(&vroot_grb_lock);
10015 +       if (vroot_get_real_bdev) {
10016 +               vroot_get_real_bdev = NULL;
10017 +               ret = 0;
10018 +       }
10019 +       spin_unlock(&vroot_grb_lock);
10020 +       return ret;
10021 +}
10022 +EXPORT_SYMBOL(unregister_vroot_grb);
10023 +
10024 +#endif
10025 +
10026  /*
10027   * This is the system call interface. This communicates with
10028   * the user-level programs. Currently this only supports diskquota
10029 @@ -347,6 +390,7 @@ asmlinkage long sys_quotactl(unsigned in
10030  {
10031         uint cmds, type;
10032         struct super_block *sb = NULL;
10033 +       struct dqhash *dqh = NULL;
10034         struct block_device *bdev;
10035         char *tmp;
10036         int ret;
10037 @@ -362,15 +406,33 @@ asmlinkage long sys_quotactl(unsigned in
10038                 putname(tmp);
10039                 if (IS_ERR(bdev))
10040                         return PTR_ERR(bdev);
10041 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
10042 +               if (bdev && bdev->bd_inode &&
10043 +                       imajor(bdev->bd_inode) == VROOT_MAJOR) {
10044 +                       struct block_device *bdnew = (void *)-EINVAL;
10045 +
10046 +                       if (vroot_get_real_bdev)
10047 +                               bdnew = vroot_get_real_bdev(bdev);
10048 +                       else
10049 +                               vxdprintk(VXD_CBIT(misc, 0),
10050 +                                       "vroot_get_real_bdev not set");
10051 +
10052 +                       bdput(bdev);
10053 +                       if (IS_ERR(bdnew))
10054 +                               return PTR_ERR(bdnew);
10055 +                       bdev = bdnew;
10056 +               }
10057 +#endif
10058                 sb = get_super(bdev);
10059                 bdput(bdev);
10060                 if (!sb)
10061                         return -ENODEV;
10062         }
10063 -
10064 -       ret = check_quotactl_valid(sb, type, cmds, id);
10065 +       if (sb)
10066 +               dqh = sb->s_dqh;
10067 +       ret = check_quotactl_valid(dqh, type, cmds, id);
10068         if (ret >= 0)
10069 -               ret = do_quotactl(sb, type, cmds, id, addr);
10070 +               ret = do_quotactl(dqh, type, cmds, id, addr);
10071         if (sb)
10072                 drop_super(sb);
10073  
10074 diff -NurpP --minimal linux-2.6.16.20/fs/quota_v1.c linux-2.6.16.20-vs2.1.1-rc22/fs/quota_v1.c
10075 --- linux-2.6.16.20/fs/quota_v1.c       2005-03-02 12:38:45 +0100
10076 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/quota_v1.c  2006-04-26 19:07:00 +0200
10077 @@ -42,12 +42,13 @@ static int v1_read_dqblk(struct dquot *d
10078         int type = dquot->dq_type;
10079         struct v1_disk_dqblk dqblk;
10080  
10081 -       if (!sb_dqopt(dquot->dq_sb)->files[type])
10082 +       if (!dqh_dqopt(dquot->dq_dqh)->files[type])
10083                 return -EINVAL;
10084  
10085         /* Set structure to 0s in case read fails/is after end of file */
10086         memset(&dqblk, 0, sizeof(struct v1_disk_dqblk));
10087 -       dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
10088 +       dquot->dq_dqh->dqh_sb->s_op->quota_read(dquot->dq_dqh, type,
10089 +               (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
10090  
10091         v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk);
10092         if (dquot->dq_dqb.dqb_bhardlimit == 0 && dquot->dq_dqb.dqb_bsoftlimit == 0 &&
10093 @@ -66,16 +67,16 @@ static int v1_commit_dqblk(struct dquot 
10094  
10095         v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
10096         if (dquot->dq_id == 0) {
10097 -               dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace;
10098 -               dqblk.dqb_itime = sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace;
10099 +               dqblk.dqb_btime = dqh_dqopt(dquot->dq_dqh)->info[type].dqi_bgrace;
10100 +               dqblk.dqb_itime = dqh_dqopt(dquot->dq_dqh)->info[type].dqi_igrace;
10101         }
10102         ret = 0;
10103 -       if (sb_dqopt(dquot->dq_sb)->files[type])
10104 -               ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, (char *)&dqblk,
10105 -                                       sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
10106 +       if (dqh_dqopt(dquot->dq_dqh)->files[type])
10107 +               ret = dquot->dq_dqh->dqh_sb->s_op->quota_write(dquot->dq_dqh, type,
10108 +                       (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
10109         if (ret != sizeof(struct v1_disk_dqblk)) {
10110                 printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
10111 -                       dquot->dq_sb->s_id);
10112 +                       dquot->dq_dqh->dqh_sb->s_id);
10113                 if (ret >= 0)
10114                         ret = -EIO;
10115                 goto out;
10116 @@ -100,9 +101,9 @@ struct v2_disk_dqheader {
10117         __le32 dqh_version;      /* File version */
10118  };
10119  
10120 -static int v1_check_quota_file(struct super_block *sb, int type)
10121 +static int v1_check_quota_file(struct dqhash *hash, int type)
10122  {
10123 -       struct inode *inode = sb_dqopt(sb)->files[type];
10124 +       struct inode *inode = dqh_dqopt(hash)->files[type];
10125         ulong blocks;
10126         size_t off; 
10127         struct v2_disk_dqheader dqhead;
10128 @@ -118,22 +119,26 @@ static int v1_check_quota_file(struct su
10129         if ((blocks % sizeof(struct v1_disk_dqblk) * BLOCK_SIZE + off) % sizeof(struct v1_disk_dqblk))
10130                 return 0;
10131         /* Doublecheck whether we didn't get file with new format - with old quotactl() this could happen */
10132 -       size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
10133 +       size = hash->dqh_sb->s_op->quota_read(hash, type,
10134 +               (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
10135         if (size != sizeof(struct v2_disk_dqheader))
10136                 return 1;       /* Probably not new format */
10137         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type])
10138                 return 1;       /* Definitely not new format */
10139 -       printk(KERN_INFO "VFS: %s: Refusing to turn on old quota format on given file. It probably contains newer quota format.\n", sb->s_id);
10140 +       printk(KERN_INFO "VFS: %s: Refusing to turn on old quota format on given file."
10141 +               " It probably contains newer quota format.\n", hash->dqh_sb->s_id);
10142          return 0;              /* Seems like a new format file -> refuse it */
10143  }
10144  
10145 -static int v1_read_file_info(struct super_block *sb, int type)
10146 +static int v1_read_file_info(struct dqhash *hash, int type)
10147  {
10148 -       struct quota_info *dqopt = sb_dqopt(sb);
10149 +       struct quota_info *dqopt = dqh_dqopt(hash);
10150         struct v1_disk_dqblk dqblk;
10151         int ret;
10152  
10153 -       if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk, sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) {
10154 +       if ((ret = hash->dqh_sb->s_op->quota_read(hash, type,
10155 +               (char *)&dqblk, sizeof(struct v1_disk_dqblk),
10156 +               v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) {
10157                 if (ret >= 0)
10158                         ret = -EIO;
10159                 goto out;
10160 @@ -145,14 +150,14 @@ out:
10161         return ret;
10162  }
10163  
10164 -static int v1_write_file_info(struct super_block *sb, int type)
10165 +static int v1_write_file_info(struct dqhash *hash, int type)
10166  {
10167 -       struct quota_info *dqopt = sb_dqopt(sb);
10168 +       struct quota_info *dqopt = dqh_dqopt(hash);
10169         struct v1_disk_dqblk dqblk;
10170         int ret;
10171  
10172         dqopt->info[type].dqi_flags &= ~DQF_INFO_DIRTY;
10173 -       if ((ret = sb->s_op->quota_read(sb, type, (char *)&dqblk,
10174 +       if ((ret = hash->dqh_sb->s_op->quota_read(hash, type, (char *)&dqblk,
10175             sizeof(struct v1_disk_dqblk), v1_dqoff(0))) != sizeof(struct v1_disk_dqblk)) {
10176                 if (ret >= 0)
10177                         ret = -EIO;
10178 @@ -160,7 +165,7 @@ static int v1_write_file_info(struct sup
10179         }
10180         dqblk.dqb_itime = dqopt->info[type].dqi_igrace;
10181         dqblk.dqb_btime = dqopt->info[type].dqi_bgrace;
10182 -       ret = sb->s_op->quota_write(sb, type, (char *)&dqblk,
10183 +       ret = hash->dqh_sb->s_op->quota_write(hash, type, (char *)&dqblk,
10184               sizeof(struct v1_disk_dqblk), v1_dqoff(0));
10185         if (ret == sizeof(struct v1_disk_dqblk))
10186                 ret = 0;
10187 diff -NurpP --minimal linux-2.6.16.20/fs/quota_v2.c linux-2.6.16.20-vs2.1.1-rc22/fs/quota_v2.c
10188 --- linux-2.6.16.20/fs/quota_v2.c       2006-04-09 13:49:54 +0200
10189 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/quota_v2.c  2006-04-26 19:07:00 +0200
10190 @@ -26,14 +26,15 @@ typedef char *dqbuf_t;
10191  #define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
10192  
10193  /* Check whether given file is really vfsv0 quotafile */
10194 -static int v2_check_quota_file(struct super_block *sb, int type)
10195 +static int v2_check_quota_file(struct dqhash *hash, int type)
10196  {
10197         struct v2_disk_dqheader dqhead;
10198         ssize_t size;
10199         static const uint quota_magics[] = V2_INITQMAGICS;
10200         static const uint quota_versions[] = V2_INITQVERSIONS;
10201   
10202 -       size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
10203 +       size = hash->dqh_sb->s_op->quota_read(hash, type,
10204 +               (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
10205         if (size != sizeof(struct v2_disk_dqheader)) {
10206                 printk("quota_v2: failed read expected=%zd got=%zd\n",
10207                         sizeof(struct v2_disk_dqheader), size);
10208 @@ -46,17 +47,17 @@ static int v2_check_quota_file(struct su
10209  }
10210  
10211  /* Read information header from quota file */
10212 -static int v2_read_file_info(struct super_block *sb, int type)
10213 +static int v2_read_file_info(struct dqhash *hash, int type)
10214  {
10215         struct v2_disk_dqinfo dinfo;
10216 -       struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
10217 +       struct mem_dqinfo *info = dqh_dqopt(hash)->info+type;
10218         ssize_t size;
10219  
10220 -       size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
10221 -              sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
10222 +       size = hash->dqh_sb->s_op->quota_read(hash, type,
10223 +               (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
10224         if (size != sizeof(struct v2_disk_dqinfo)) {
10225                 printk(KERN_WARNING "Can't read info structure on device %s.\n",
10226 -                       sb->s_id);
10227 +                       hash->dqh_sb->s_id);
10228                 return -1;
10229         }
10230         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
10231 @@ -69,10 +70,10 @@ static int v2_read_file_info(struct supe
10232  }
10233  
10234  /* Write information header to quota file */
10235 -static int v2_write_file_info(struct super_block *sb, int type)
10236 +static int v2_write_file_info(struct dqhash *hash, int type)
10237  {
10238         struct v2_disk_dqinfo dinfo;
10239 -       struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
10240 +       struct mem_dqinfo *info = dqh_dqopt(hash)->info+type;
10241         ssize_t size;
10242  
10243         spin_lock(&dq_data_lock);
10244 @@ -84,11 +85,11 @@ static int v2_write_file_info(struct sup
10245         dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks);
10246         dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk);
10247         dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry);
10248 -       size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
10249 +       size = hash->dqh_sb->s_op->quota_write(hash, type, (char *)&dinfo,
10250                sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
10251         if (size != sizeof(struct v2_disk_dqinfo)) {
10252                 printk(KERN_WARNING "Can't write info structure on device %s.\n",
10253 -                       sb->s_id);
10254 +                       hash->dqh_sb->s_id);
10255                 return -1;
10256         }
10257         return 0;
10258 @@ -132,24 +133,24 @@ static inline void freedqbuf(dqbuf_t buf
10259         kfree(buf);
10260  }
10261  
10262 -static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
10263 +static inline ssize_t read_blk(struct dqhash *hash, int type, uint blk, dqbuf_t buf)
10264  {
10265         memset(buf, 0, V2_DQBLKSIZE);
10266 -       return sb->s_op->quota_read(sb, type, (char *)buf,
10267 -              V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
10268 +       return hash->dqh_sb->s_op->quota_read(hash, type,
10269 +               (char *)buf, V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
10270  }
10271  
10272 -static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
10273 +static inline ssize_t write_blk(struct dqhash *hash, int type, uint blk, dqbuf_t buf)
10274  {
10275 -       return sb->s_op->quota_write(sb, type, (char *)buf,
10276 -              V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
10277 +       return hash->dqh_sb->s_op->quota_write(hash, type,
10278 +               (char *)buf, V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
10279  }
10280  
10281  /* Remove empty block from list and return it */
10282 -static int get_free_dqblk(struct super_block *sb, int type)
10283 +static int get_free_dqblk(struct dqhash *hash, int type)
10284  {
10285         dqbuf_t buf = getdqbuf();
10286 -       struct mem_dqinfo *info = sb_dqinfo(sb, type);
10287 +       struct mem_dqinfo *info = dqh_dqinfo(hash, type);
10288         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
10289         int ret, blk;
10290  
10291 @@ -157,18 +158,18 @@ static int get_free_dqblk(struct super_b
10292                 return -ENOMEM;
10293         if (info->u.v2_i.dqi_free_blk) {
10294                 blk = info->u.v2_i.dqi_free_blk;
10295 -               if ((ret = read_blk(sb, type, blk, buf)) < 0)
10296 +               if ((ret = read_blk(hash, type, blk, buf)) < 0)
10297                         goto out_buf;
10298                 info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
10299         }
10300         else {
10301                 memset(buf, 0, V2_DQBLKSIZE);
10302                 /* Assure block allocation... */
10303 -               if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0)
10304 +               if ((ret = write_blk(hash, type, info->u.v2_i.dqi_blocks, buf)) < 0)
10305                         goto out_buf;
10306                 blk = info->u.v2_i.dqi_blocks++;
10307         }
10308 -       mark_info_dirty(sb, type);
10309 +       mark_info_dirty(hash, type);
10310         ret = blk;
10311  out_buf:
10312         freedqbuf(buf);
10313 @@ -176,9 +177,9 @@ out_buf:
10314  }
10315  
10316  /* Insert empty block to the list */
10317 -static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk)
10318 +static int put_free_dqblk(struct dqhash *hash, int type, dqbuf_t buf, uint blk)
10319  {
10320 -       struct mem_dqinfo *info = sb_dqinfo(sb, type);
10321 +       struct mem_dqinfo *info = dqh_dqinfo(hash, type);
10322         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
10323         int err;
10324  
10325 @@ -186,18 +187,18 @@ static int put_free_dqblk(struct super_b
10326         dh->dqdh_prev_free = cpu_to_le32(0);
10327         dh->dqdh_entries = cpu_to_le16(0);
10328         info->u.v2_i.dqi_free_blk = blk;
10329 -       mark_info_dirty(sb, type);
10330 +       mark_info_dirty(hash, type);
10331         /* Some strange block. We had better leave it... */
10332 -       if ((err = write_blk(sb, type, blk, buf)) < 0)
10333 +       if ((err = write_blk(hash, type, blk, buf)) < 0)
10334                 return err;
10335         return 0;
10336  }
10337  
10338  /* Remove given block from the list of blocks with free entries */
10339 -static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
10340 +static int remove_free_dqentry(struct dqhash *hash, int type, dqbuf_t buf, uint blk)
10341  {
10342         dqbuf_t tmpbuf = getdqbuf();
10343 -       struct mem_dqinfo *info = sb_dqinfo(sb, type);
10344 +       struct mem_dqinfo *info = dqh_dqinfo(hash, type);
10345         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
10346         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free);
10347         int err;
10348 @@ -205,27 +206,27 @@ static int remove_free_dqentry(struct su
10349         if (!tmpbuf)
10350                 return -ENOMEM;
10351         if (nextblk) {
10352 -               if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0)
10353 +               if ((err = read_blk(hash, type, nextblk, tmpbuf)) < 0)
10354                         goto out_buf;
10355                 ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free;
10356 -               if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0)
10357 +               if ((err = write_blk(hash, type, nextblk, tmpbuf)) < 0)
10358                         goto out_buf;
10359         }
10360         if (prevblk) {
10361 -               if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0)
10362 +               if ((err = read_blk(hash, type, prevblk, tmpbuf)) < 0)
10363                         goto out_buf;
10364                 ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free;
10365 -               if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0)
10366 +               if ((err = write_blk(hash, type, prevblk, tmpbuf)) < 0)
10367                         goto out_buf;
10368         }
10369         else {
10370                 info->u.v2_i.dqi_free_entry = nextblk;
10371 -               mark_info_dirty(sb, type);
10372 +               mark_info_dirty(hash, type);
10373         }
10374         freedqbuf(tmpbuf);
10375         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
10376         /* No matter whether write succeeds block is out of list */
10377 -       if (write_blk(sb, type, blk, buf) < 0)
10378 +       if (write_blk(hash, type, blk, buf) < 0)
10379                 printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
10380         return 0;
10381  out_buf:
10382 @@ -234,10 +235,10 @@ out_buf:
10383  }
10384  
10385  /* Insert given block to the beginning of list with free entries */
10386 -static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
10387 +static int insert_free_dqentry(struct dqhash *hash, int type, dqbuf_t buf, uint blk)
10388  {
10389         dqbuf_t tmpbuf = getdqbuf();
10390 -       struct mem_dqinfo *info = sb_dqinfo(sb, type);
10391 +       struct mem_dqinfo *info = dqh_dqinfo(hash, type);
10392         struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
10393         int err;
10394  
10395 @@ -245,18 +246,18 @@ static int insert_free_dqentry(struct su
10396                 return -ENOMEM;
10397         dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry);
10398         dh->dqdh_prev_free = cpu_to_le32(0);
10399 -       if ((err = write_blk(sb, type, blk, buf)) < 0)
10400 +       if ((err = write_blk(hash, type, blk, buf)) < 0)
10401                 goto out_buf;
10402         if (info->u.v2_i.dqi_free_entry) {
10403 -               if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
10404 +               if ((err = read_blk(hash, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
10405                         goto out_buf;
10406                 ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk);
10407 -               if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
10408 +               if ((err = write_blk(hash, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
10409                         goto out_buf;
10410         }
10411         freedqbuf(tmpbuf);
10412         info->u.v2_i.dqi_free_entry = blk;
10413 -       mark_info_dirty(sb, type);
10414 +       mark_info_dirty(hash, type);
10415         return 0;
10416  out_buf:
10417         freedqbuf(tmpbuf);
10418 @@ -266,8 +267,9 @@ out_buf:
10419  /* Find space for dquot */
10420  static uint find_free_dqentry(struct dquot *dquot, int *err)
10421  {
10422 -       struct super_block *sb = dquot->dq_sb;
10423 -       struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
10424 +       // struct super_block *sb = dquot->dq_sb;
10425 +       struct dqhash *dqh = dquot->dq_dqh;
10426 +       struct mem_dqinfo *info = dqh_dqopt(dqh)->info+dquot->dq_type;
10427         uint blk, i;
10428         struct v2_disk_dqdbheader *dh;
10429         struct v2_disk_dqblk *ddquot;
10430 @@ -283,11 +285,11 @@ static uint find_free_dqentry(struct dqu
10431         ddquot = GETENTRIES(buf);
10432         if (info->u.v2_i.dqi_free_entry) {
10433                 blk = info->u.v2_i.dqi_free_entry;
10434 -               if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0)
10435 +               if ((*err = read_blk(dqh, dquot->dq_type, blk, buf)) < 0)
10436                         goto out_buf;
10437         }
10438         else {
10439 -               blk = get_free_dqblk(sb, dquot->dq_type);
10440 +               blk = get_free_dqblk(dqh, dquot->dq_type);
10441                 if ((int)blk < 0) {
10442                         *err = blk;
10443                         freedqbuf(buf);
10444 @@ -296,10 +298,10 @@ static uint find_free_dqentry(struct dqu
10445                 memset(buf, 0, V2_DQBLKSIZE);
10446                 /* This is enough as block is already zeroed and entry list is empty... */
10447                 info->u.v2_i.dqi_free_entry = blk;
10448 -               mark_info_dirty(sb, dquot->dq_type);
10449 +               mark_info_dirty(dqh, dquot->dq_type);
10450         }
10451         if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)   /* Block will be full? */
10452 -               if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
10453 +               if ((*err = remove_free_dqentry(dqh, dquot->dq_type, buf, blk)) < 0) {
10454                         printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
10455                         goto out_buf;
10456                 }
10457 @@ -314,7 +316,7 @@ static uint find_free_dqentry(struct dqu
10458                 goto out_buf;
10459         }
10460  #endif
10461 -       if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) {
10462 +       if ((*err = write_blk(dqh, dquot->dq_type, blk, buf)) < 0) {
10463                 printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
10464                 goto out_buf;
10465         }
10466 @@ -329,7 +331,7 @@ out_buf:
10467  /* Insert reference to structure into the trie */
10468  static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth)
10469  {
10470 -       struct super_block *sb = dquot->dq_sb;
10471 +       struct dqhash *dqh = dquot->dq_dqh;
10472         dqbuf_t buf;
10473         int ret = 0, newson = 0, newact = 0;
10474         __le32 *ref;
10475 @@ -338,7 +340,7 @@ static int do_insert_tree(struct dquot *
10476         if (!(buf = getdqbuf()))
10477                 return -ENOMEM;
10478         if (!*treeblk) {
10479 -               ret = get_free_dqblk(sb, dquot->dq_type);
10480 +               ret = get_free_dqblk(dqh, dquot->dq_type);
10481                 if (ret < 0)
10482                         goto out_buf;
10483                 *treeblk = ret;
10484 @@ -346,7 +348,7 @@ static int do_insert_tree(struct dquot *
10485                 newact = 1;
10486         }
10487         else {
10488 -               if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) {
10489 +               if ((ret = read_blk(dqh, dquot->dq_type, *treeblk, buf)) < 0) {
10490                         printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk);
10491                         goto out_buf;
10492                 }
10493 @@ -369,10 +371,10 @@ static int do_insert_tree(struct dquot *
10494                 ret = do_insert_tree(dquot, &newblk, depth+1);
10495         if (newson && ret >= 0) {
10496                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
10497 -               ret = write_blk(sb, dquot->dq_type, *treeblk, buf);
10498 +               ret = write_blk(dqh, dquot->dq_type, *treeblk, buf);
10499         }
10500         else if (newact && ret < 0)
10501 -               put_free_dqblk(sb, dquot->dq_type, buf, *treeblk);
10502 +               put_free_dqblk(dqh, dquot->dq_type, buf, *treeblk);
10503  out_buf:
10504         freedqbuf(buf);
10505         return ret;
10506 @@ -409,10 +411,11 @@ static int v2_write_dquot(struct dquot *
10507         if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
10508                 ddquot.dqb_itime = cpu_to_le64(1);
10509         spin_unlock(&dq_data_lock);
10510 -       ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
10511 +       ret = dquot->dq_dqh->dqh_sb->s_op->quota_write(dquot->dq_dqh, type,
10512               (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
10513         if (ret != sizeof(struct v2_disk_dqblk)) {
10514 -               printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
10515 +               printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
10516 +                       dquot->dq_dqh->dqh_sb->s_id);
10517                 if (ret >= 0)
10518                         ret = -ENOSPC;
10519         }
10520 @@ -426,7 +429,8 @@ static int v2_write_dquot(struct dquot *
10521  /* Free dquot entry in data block */
10522  static int free_dqentry(struct dquot *dquot, uint blk)
10523  {
10524 -       struct super_block *sb = dquot->dq_sb;
10525 +       // struct super_block *sb = dquot->dq_sb;
10526 +       struct dqhash *dqh = dquot->dq_dqh;
10527         int type = dquot->dq_type;
10528         struct v2_disk_dqdbheader *dh;
10529         dqbuf_t buf = getdqbuf();
10530 @@ -440,15 +444,15 @@ static int free_dqentry(struct dquot *dq
10531                   (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS));
10532                 goto out_buf;
10533         }
10534 -       if ((ret = read_blk(sb, type, blk, buf)) < 0) {
10535 +       if ((ret = read_blk(dqh, type, blk, buf)) < 0) {
10536                 printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
10537                 goto out_buf;
10538         }
10539         dh = (struct v2_disk_dqdbheader *)buf;
10540         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1);
10541         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
10542 -               if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
10543 -                   (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
10544 +               if ((ret = remove_free_dqentry(dqh, type, buf, blk)) < 0 ||
10545 +                   (ret = put_free_dqblk(dqh, type, buf, blk)) < 0) {
10546                         printk(KERN_ERR "VFS: Can't move quota data block (%u) "
10547                           "to free list.\n", blk);
10548                         goto out_buf;
10549 @@ -459,13 +463,13 @@ static int free_dqentry(struct dquot *dq
10550                   sizeof(struct v2_disk_dqblk));
10551                 if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
10552                         /* Insert will write block itself */
10553 -                       if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
10554 +                       if ((ret = insert_free_dqentry(dqh, type, buf, blk)) < 0) {
10555                                 printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
10556                                 goto out_buf;
10557                         }
10558                 }
10559                 else
10560 -                       if ((ret = write_blk(sb, type, blk, buf)) < 0) {
10561 +                       if ((ret = write_blk(dqh, type, blk, buf)) < 0) {
10562                                 printk(KERN_ERR "VFS: Can't write quota data "
10563                                   "block %u\n", blk);
10564                                 goto out_buf;
10565 @@ -480,7 +484,7 @@ out_buf:
10566  /* Remove reference to dquot from tree */
10567  static int remove_tree(struct dquot *dquot, uint *blk, int depth)
10568  {
10569 -       struct super_block *sb = dquot->dq_sb;
10570 +       struct dqhash *dqh = dquot->dq_dqh;
10571         int type = dquot->dq_type;
10572         dqbuf_t buf = getdqbuf();
10573         int ret = 0;
10574 @@ -489,7 +493,7 @@ static int remove_tree(struct dquot *dqu
10575         
10576         if (!buf)
10577                 return -ENOMEM;
10578 -       if ((ret = read_blk(sb, type, *blk, buf)) < 0) {
10579 +       if ((ret = read_blk(dqh, type, *blk, buf)) < 0) {
10580                 printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
10581                 goto out_buf;
10582         }
10583 @@ -506,11 +510,11 @@ static int remove_tree(struct dquot *dqu
10584                 for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++);  /* Block got empty? */
10585                 /* Don't put the root block into the free block list */
10586                 if (i == V2_DQBLKSIZE && *blk != V2_DQTREEOFF) {
10587 -                       put_free_dqblk(sb, type, buf, *blk);
10588 +                       put_free_dqblk(dqh, type, buf, *blk);
10589                         *blk = 0;
10590                 }
10591                 else
10592 -                       if ((ret = write_blk(sb, type, *blk, buf)) < 0)
10593 +                       if ((ret = write_blk(dqh, type, *blk, buf)) < 0)
10594                                 printk(KERN_ERR "VFS: Can't write quota tree "
10595                                   "block %u.\n", *blk);
10596         }
10597 @@ -539,7 +543,7 @@ static loff_t find_block_dqentry(struct 
10598  
10599         if (!buf)
10600                 return -ENOMEM;
10601 -       if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
10602 +       if ((ret = read_blk(dquot->dq_dqh, dquot->dq_type, blk, buf)) < 0) {
10603                 printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
10604                 goto out_buf;
10605         }
10606 @@ -578,7 +582,7 @@ static loff_t find_tree_dqentry(struct d
10607  
10608         if (!buf)
10609                 return -ENOMEM;
10610 -       if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
10611 +       if ((ret = read_blk(dquot->dq_dqh, dquot->dq_type, blk, buf)) < 0) {
10612                 printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
10613                 goto out_buf;
10614         }
10615 @@ -610,7 +614,7 @@ static int v2_read_dquot(struct dquot *d
10616  
10617  #ifdef __QUOTA_V2_PARANOIA
10618         /* Invalidated quota? */
10619 -       if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
10620 +       if (!dquot->dq_dqh || !dqh_dqopt(dquot->dq_dqh)->files[type]) {
10621                 printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
10622                 return -EIO;
10623         }
10624 @@ -627,7 +631,7 @@ static int v2_read_dquot(struct dquot *d
10625         }
10626         else {
10627                 dquot->dq_off = offset;
10628 -               if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
10629 +               if ((ret = dquot->dq_dqh->dqh_sb->s_op->quota_read(dquot->dq_dqh, type,
10630                     (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
10631                     != sizeof(struct v2_disk_dqblk)) {
10632                         if (ret >= 0)
10633 diff -NurpP --minimal linux-2.6.16.20/fs/read_write.c linux-2.6.16.20-vs2.1.1-rc22/fs/read_write.c
10634 --- linux-2.6.16.20/fs/read_write.c     2006-04-09 13:49:54 +0200
10635 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/read_write.c        2006-04-26 19:07:00 +0200
10636 @@ -636,12 +636,77 @@ sys_writev(unsigned long fd, const struc
10637         return ret;
10638  }
10639  
10640 +ssize_t vfs_sendfile(struct file *out_file, struct file *in_file, loff_t *ppos,
10641 +                    size_t count, loff_t max)
10642 +{
10643 +       struct inode * in_inode, * out_inode;
10644 +       loff_t pos;
10645 +       ssize_t ret;
10646 +
10647 +       /* verify in_file */
10648 +       in_inode = in_file->f_dentry->d_inode;
10649 +       if (!in_inode)
10650 +               return -EINVAL;
10651 +       if (!in_file->f_op || !in_file->f_op->sendfile)
10652 +               return -EINVAL;
10653 +
10654 +       if (!ppos)
10655 +               ppos = &in_file->f_pos;
10656 +       else
10657 +               if (!(in_file->f_mode & FMODE_PREAD))
10658 +                       return -ESPIPE;
10659 +
10660 +       ret = rw_verify_area(READ, in_file, ppos, count);
10661 +       if (ret < 0)
10662 +               return ret;
10663 +       count = ret;
10664 +
10665 +       /* verify out_file */
10666 +       out_inode = out_file->f_dentry->d_inode;
10667 +       if (!out_inode)
10668 +               return -EINVAL;
10669 +       if (!out_file->f_op || !out_file->f_op->sendpage)
10670 +               return -EINVAL;
10671 +
10672 +       ret = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
10673 +       if (ret < 0)
10674 +               return ret;
10675 +       count = ret;
10676 +
10677 +       ret = security_file_permission (out_file, MAY_WRITE);
10678 +       if (ret)
10679 +               return ret;
10680 +
10681 +       if (!max)
10682 +               max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
10683 +
10684 +       pos = *ppos;
10685 +       if (unlikely(pos < 0))
10686 +               return -EINVAL;
10687 +       if (unlikely(pos + count > max)) {
10688 +               if (pos >= max)
10689 +                       return -EOVERFLOW;
10690 +               count = max - pos;
10691 +       }
10692 +
10693 +       ret = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
10694 +
10695 +       if (ret > 0) {
10696 +               current->rchar += ret;
10697 +               current->wchar += ret;
10698 +       }
10699 +
10700 +       if (*ppos > max)
10701 +               return -EOVERFLOW;
10702 +       return ret;
10703 +}
10704 +
10705 +EXPORT_SYMBOL(vfs_sendfile);
10706 +
10707  static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
10708                            size_t count, loff_t max)
10709  {
10710         struct file * in_file, * out_file;
10711 -       struct inode * in_inode, * out_inode;
10712 -       loff_t pos;
10713         ssize_t retval;
10714         int fput_needed_in, fput_needed_out;
10715  
10716 @@ -654,22 +719,6 @@ static ssize_t do_sendfile(int out_fd, i
10717                 goto out;
10718         if (!(in_file->f_mode & FMODE_READ))
10719                 goto fput_in;
10720 -       retval = -EINVAL;
10721 -       in_inode = in_file->f_dentry->d_inode;
10722 -       if (!in_inode)
10723 -               goto fput_in;
10724 -       if (!in_file->f_op || !in_file->f_op->sendfile)
10725 -               goto fput_in;
10726 -       retval = -ESPIPE;
10727 -       if (!ppos)
10728 -               ppos = &in_file->f_pos;
10729 -       else
10730 -               if (!(in_file->f_mode & FMODE_PREAD))
10731 -                       goto fput_in;
10732 -       retval = rw_verify_area(READ, in_file, ppos, count);
10733 -       if (retval < 0)
10734 -               goto fput_in;
10735 -       count = retval;
10736  
10737         retval = security_file_permission (in_file, MAY_READ);
10738         if (retval)
10739 @@ -684,45 +733,12 @@ static ssize_t do_sendfile(int out_fd, i
10740                 goto fput_in;
10741         if (!(out_file->f_mode & FMODE_WRITE))
10742                 goto fput_out;
10743 -       retval = -EINVAL;
10744 -       if (!out_file->f_op || !out_file->f_op->sendpage)
10745 -               goto fput_out;
10746 -       out_inode = out_file->f_dentry->d_inode;
10747 -       retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
10748 -       if (retval < 0)
10749 -               goto fput_out;
10750 -       count = retval;
10751 -
10752 -       retval = security_file_permission (out_file, MAY_WRITE);
10753 -       if (retval)
10754 -               goto fput_out;
10755 -
10756 -       if (!max)
10757 -               max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
10758 -
10759 -       pos = *ppos;
10760 -       retval = -EINVAL;
10761 -       if (unlikely(pos < 0))
10762 -               goto fput_out;
10763 -       if (unlikely(pos + count > max)) {
10764 -               retval = -EOVERFLOW;
10765 -               if (pos >= max)
10766 -                       goto fput_out;
10767 -               count = max - pos;
10768 -       }
10769  
10770 -       retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
10771 +       retval = vfs_sendfile(out_file, in_file, ppos, count, max);
10772  
10773 -       if (retval > 0) {
10774 -               current->rchar += retval;
10775 -               current->wchar += retval;
10776 -       }
10777         current->syscr++;
10778         current->syscw++;
10779  
10780 -       if (*ppos > max)
10781 -               retval = -EOVERFLOW;
10782 -
10783  fput_out:
10784         fput_light(out_file, fput_needed_out);
10785  fput_in:
10786 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/bitmap.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/bitmap.c
10787 --- linux-2.6.16.20/fs/reiserfs/bitmap.c        2005-08-29 22:25:33 +0200
10788 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/bitmap.c   2006-04-26 19:07:00 +0200
10789 @@ -13,6 +13,7 @@
10790  #include <linux/reiserfs_fs_sb.h>
10791  #include <linux/reiserfs_fs_i.h>
10792  #include <linux/quotaops.h>
10793 +#include <linux/vs_dlimit.h>
10794  
10795  #define PREALLOCATION_SIZE 9
10796  
10797 @@ -411,8 +412,10 @@ static void _reiserfs_free_block(struct 
10798         set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
10799  
10800         journal_mark_dirty(th, s, sbh);
10801 -       if (for_unformatted)
10802 +       if (for_unformatted) {
10803 +               DLIMIT_FREE_BLOCK(inode, 1);
10804                 DQUOT_FREE_BLOCK_NODIRTY(inode, 1);
10805 +       }
10806  }
10807  
10808  void reiserfs_free_block(struct reiserfs_transaction_handle *th,
10809 @@ -1021,6 +1024,7 @@ static inline int blocknrs_and_prealloc_
10810         int passno = 0;
10811         int nr_allocated = 0;
10812         int bigalloc = 0;
10813 +       int blocks;
10814  
10815         determine_prealloc_size(hint);
10816         if (!hint->formatted_node) {
10817 @@ -1030,19 +1034,30 @@ static inline int blocknrs_and_prealloc_
10818                                "reiserquota: allocating %d blocks id=%u",
10819                                amount_needed, hint->inode->i_uid);
10820  #endif
10821 -               quota_ret =
10822 -                   DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed);
10823 -               if (quota_ret)  /* Quota exceeded? */
10824 +               quota_ret = DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode,
10825 +                       amount_needed);
10826 +               if (quota_ret)
10827                         return QUOTA_EXCEEDED;
10828 +               if (DLIMIT_ALLOC_BLOCK(hint->inode, amount_needed)) {
10829 +                       DQUOT_FREE_BLOCK_NODIRTY(hint->inode,
10830 +                               amount_needed);
10831 +                       return NO_DISK_SPACE;
10832 +               }
10833 +
10834                 if (hint->preallocate && hint->prealloc_size) {
10835  #ifdef REISERQUOTA_DEBUG
10836                         reiserfs_debug(s, REISERFS_DEBUG_CODE,
10837                                        "reiserquota: allocating (prealloc) %d blocks id=%u",
10838                                        hint->prealloc_size, hint->inode->i_uid);
10839  #endif
10840 -                       quota_ret =
10841 -                           DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode,
10842 -                                                        hint->prealloc_size);
10843 +                       quota_ret = DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode,
10844 +                               hint->prealloc_size);
10845 +                       if (!quota_ret &&
10846 +                               DLIMIT_ALLOC_BLOCK(hint->inode, hint->prealloc_size)) {
10847 +                               DQUOT_FREE_BLOCK_NODIRTY(hint->inode,
10848 +                                       hint->prealloc_size);
10849 +                               quota_ret = 1;
10850 +                       }
10851                         if (quota_ret)
10852                                 hint->preallocate = hint->prealloc_size = 0;
10853                 }
10854 @@ -1093,7 +1108,10 @@ static inline int blocknrs_and_prealloc_
10855                                                nr_allocated,
10856                                                hint->inode->i_uid);
10857  #endif
10858 -                               DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated);      /* Free not allocated blocks */
10859 +                               /* Free not allocated blocks */
10860 +                               blocks = amount_needed + hint->prealloc_size - nr_allocated;
10861 +                               DLIMIT_FREE_BLOCK(hint->inode, blocks);
10862 +                               DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks);
10863                         }
10864                         while (nr_allocated--)
10865                                 reiserfs_free_block(hint->th, hint->inode,
10866 @@ -1125,10 +1143,10 @@ static inline int blocknrs_and_prealloc_
10867                                REISERFS_I(hint->inode)->i_prealloc_count,
10868                                hint->inode->i_uid);
10869  #endif
10870 -               DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed +
10871 -                                        hint->prealloc_size - nr_allocated -
10872 -                                        REISERFS_I(hint->inode)->
10873 -                                        i_prealloc_count);
10874 +               blocks = amount_needed + hint->prealloc_size - nr_allocated -
10875 +                       REISERFS_I(hint->inode)->i_prealloc_count;
10876 +               DLIMIT_FREE_BLOCK(hint->inode, blocks);
10877 +               DQUOT_FREE_BLOCK_NODIRTY(hint->inode, blocks);
10878         }
10879  
10880         return CARRY_ON;
10881 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/file.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/file.c
10882 --- linux-2.6.16.20/fs/reiserfs/file.c  2006-04-09 13:49:54 +0200
10883 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/file.c     2006-04-26 19:07:00 +0200
10884 @@ -1574,6 +1574,7 @@ struct file_operations reiserfs_file_ope
10885         .release = reiserfs_file_release,
10886         .fsync = reiserfs_sync_file,
10887         .sendfile = generic_file_sendfile,
10888 +       .sendpage = generic_file_sendpage,
10889         .aio_read = generic_file_aio_read,
10890         .aio_write = reiserfs_aio_write,
10891  };
10892 @@ -1586,4 +1587,5 @@ struct inode_operations reiserfs_file_in
10893         .listxattr = reiserfs_listxattr,
10894         .removexattr = reiserfs_removexattr,
10895         .permission = reiserfs_permission,
10896 +       .sync_flags = reiserfs_sync_flags,
10897  };
10898 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/inode.c
10899 --- linux-2.6.16.20/fs/reiserfs/inode.c 2006-04-09 13:49:55 +0200
10900 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/inode.c    2006-04-26 19:07:00 +0200
10901 @@ -17,6 +17,8 @@
10902  #include <linux/mpage.h>
10903  #include <linux/writeback.h>
10904  #include <linux/quotaops.h>
10905 +#include <linux/vs_dlimit.h>
10906 +#include <linux/vserver/tag.h>
10907  
10908  extern int reiserfs_default_io_size;   /* default io size devuned in super.c */
10909  
10910 @@ -57,6 +59,7 @@ void reiserfs_delete_inode(struct inode 
10911                  * stat data deletion */
10912                 if (!err) 
10913                         DQUOT_FREE_INODE(inode);
10914 +               DLIMIT_FREE_INODE(inode);
10915  
10916                 if (journal_end(&th, inode->i_sb, jbegin_count)) {
10917                         mutex_unlock(&inode->i_mutex);
10918 @@ -1126,6 +1129,8 @@ static void init_inode(struct inode *ino
10919         struct buffer_head *bh;
10920         struct item_head *ih;
10921         __u32 rdev;
10922 +       uid_t uid;
10923 +       gid_t gid;
10924         //int version = ITEM_VERSION_1;
10925  
10926         bh = PATH_PLAST_BUFFER(path);
10927 @@ -1149,12 +1154,13 @@ static void init_inode(struct inode *ino
10928                     (struct stat_data_v1 *)B_I_PITEM(bh, ih);
10929                 unsigned long blocks;
10930  
10931 +               uid = sd_v1_uid(sd);
10932 +               gid = sd_v1_gid(sd);
10933 +
10934                 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
10935                 set_inode_sd_version(inode, STAT_DATA_V1);
10936                 inode->i_mode = sd_v1_mode(sd);
10937                 inode->i_nlink = sd_v1_nlink(sd);
10938 -               inode->i_uid = sd_v1_uid(sd);
10939 -               inode->i_gid = sd_v1_gid(sd);
10940                 inode->i_size = sd_v1_size(sd);
10941                 inode->i_atime.tv_sec = sd_v1_atime(sd);
10942                 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
10943 @@ -1196,11 +1202,12 @@ static void init_inode(struct inode *ino
10944                 // (directories and symlinks)
10945                 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
10946  
10947 +               uid    = sd_v2_uid(sd);
10948 +               gid    = sd_v2_gid(sd);
10949 +
10950                 inode->i_mode = sd_v2_mode(sd);
10951                 inode->i_nlink = sd_v2_nlink(sd);
10952 -               inode->i_uid = sd_v2_uid(sd);
10953                 inode->i_size = sd_v2_size(sd);
10954 -               inode->i_gid = sd_v2_gid(sd);
10955                 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
10956                 inode->i_atime.tv_sec = sd_v2_atime(sd);
10957                 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
10958 @@ -1230,6 +1237,10 @@ static void init_inode(struct inode *ino
10959                 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
10960         }
10961  
10962 +       inode->i_uid = INOTAG_UID(DX_TAG(inode), uid, gid);
10963 +       inode->i_gid = INOTAG_GID(DX_TAG(inode), uid, gid);
10964 +       inode->i_tag = INOTAG_TAG(DX_TAG(inode), uid, gid, 0);
10965 +
10966         pathrelse(path);
10967         if (S_ISREG(inode->i_mode)) {
10968                 inode->i_op = &reiserfs_file_inode_operations;
10969 @@ -1252,13 +1263,15 @@ static void init_inode(struct inode *ino
10970  static void inode2sd(void *sd, struct inode *inode, loff_t size)
10971  {
10972         struct stat_data *sd_v2 = (struct stat_data *)sd;
10973 +       uid_t uid = TAGINO_UID(DX_TAG(inode), inode->i_uid, inode->i_tag);
10974 +       gid_t gid = TAGINO_GID(DX_TAG(inode), inode->i_gid, inode->i_tag);
10975         __u16 flags;
10976  
10977 +       set_sd_v2_uid(sd_v2, uid);
10978 +       set_sd_v2_gid(sd_v2, gid);
10979         set_sd_v2_mode(sd_v2, inode->i_mode);
10980         set_sd_v2_nlink(sd_v2, inode->i_nlink);
10981 -       set_sd_v2_uid(sd_v2, inode->i_uid);
10982         set_sd_v2_size(sd_v2, size);
10983 -       set_sd_v2_gid(sd_v2, inode->i_gid);
10984         set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
10985         set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
10986         set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
10987 @@ -1789,6 +1802,10 @@ int reiserfs_new_inode(struct reiserfs_t
10988  
10989         BUG_ON(!th->t_trans_id);
10990  
10991 +       if (DLIMIT_ALLOC_INODE(inode)) {
10992 +               err = -ENOSPC;
10993 +               goto out_bad_dlimit;
10994 +       }
10995         if (DQUOT_ALLOC_INODE(inode)) {
10996                 err = -EDQUOT;
10997                 goto out_end_trans;
10998 @@ -1974,6 +1991,9 @@ int reiserfs_new_inode(struct reiserfs_t
10999         DQUOT_FREE_INODE(inode);
11000  
11001        out_end_trans:
11002 +       DLIMIT_FREE_INODE(inode);
11003 +
11004 +      out_bad_dlimit:
11005         journal_end(th, th->t_super, th->t_blocks_allocated);
11006         /* Drop can be outside and it needs more credits so it's better to have it outside */
11007         DQUOT_DROP(inode);
11008 @@ -2701,6 +2721,14 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs,
11009                         inode->i_flags |= S_IMMUTABLE;
11010                 else
11011                         inode->i_flags &= ~S_IMMUTABLE;
11012 +               if (sd_attrs & REISERFS_IUNLINK_FL)
11013 +                       inode->i_flags |= S_IUNLINK;
11014 +               else
11015 +                       inode->i_flags &= ~S_IUNLINK;
11016 +               if (sd_attrs & REISERFS_BARRIER_FL)
11017 +                       inode->i_flags |= S_BARRIER;
11018 +               else
11019 +                       inode->i_flags &= ~S_BARRIER;
11020                 if (sd_attrs & REISERFS_APPEND_FL)
11021                         inode->i_flags |= S_APPEND;
11022                 else
11023 @@ -2723,6 +2751,14 @@ void i_attrs_to_sd_attrs(struct inode *i
11024                         *sd_attrs |= REISERFS_IMMUTABLE_FL;
11025                 else
11026                         *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
11027 +               if (inode->i_flags & S_IUNLINK)
11028 +                       *sd_attrs |= REISERFS_IUNLINK_FL;
11029 +               else
11030 +                       *sd_attrs &= ~REISERFS_IUNLINK_FL;
11031 +               if (inode->i_flags & S_BARRIER)
11032 +                       *sd_attrs |= REISERFS_BARRIER_FL;
11033 +               else
11034 +                       *sd_attrs &= ~REISERFS_BARRIER_FL;
11035                 if (inode->i_flags & S_SYNC)
11036                         *sd_attrs |= REISERFS_SYNC_FL;
11037                 else
11038 @@ -2900,6 +2936,22 @@ static ssize_t reiserfs_direct_IO(int rw
11039                                   reiserfs_get_blocks_direct_io, NULL);
11040  }
11041  
11042 +int reiserfs_sync_flags(struct inode *inode)
11043 +{
11044 +       u16 oldflags, newflags;
11045 +
11046 +       oldflags = REISERFS_I(inode)->i_attrs;
11047 +       newflags = oldflags;
11048 +       i_attrs_to_sd_attrs(inode, &newflags);
11049 +
11050 +       if (oldflags ^ newflags) {
11051 +               REISERFS_I(inode)->i_attrs = newflags;
11052 +               inode->i_ctime = CURRENT_TIME_SEC;
11053 +               mark_inode_dirty(inode);
11054 +       }
11055 +       return 0;
11056 +}
11057 +
11058  int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
11059  {
11060         struct inode *inode = dentry->d_inode;
11061 @@ -2944,9 +2996,11 @@ int reiserfs_setattr(struct dentry *dent
11062         }
11063  
11064         error = inode_change_ok(inode, attr);
11065 +
11066         if (!error) {
11067                 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
11068 -                   (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
11069 +                   (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid) ||
11070 +                   (ia_valid & ATTR_TAG && attr->ia_tag != inode->i_tag)) {
11071                         error = reiserfs_chown_xattrs(inode, attr);
11072  
11073                         if (!error) {
11074 @@ -2976,6 +3030,9 @@ int reiserfs_setattr(struct dentry *dent
11075                                         inode->i_uid = attr->ia_uid;
11076                                 if (attr->ia_valid & ATTR_GID)
11077                                         inode->i_gid = attr->ia_gid;
11078 +                               if ((attr->ia_valid & ATTR_TAG) &&
11079 +                                       IS_TAGGED(inode))
11080 +                                       inode->i_tag = attr->ia_tag;
11081                                 mark_inode_dirty(inode);
11082                                 error =
11083                                     journal_end(&th, inode->i_sb, jbegin_count);
11084 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/ioctl.c
11085 --- linux-2.6.16.20/fs/reiserfs/ioctl.c 2006-04-09 13:49:55 +0200
11086 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/ioctl.c    2006-04-26 19:07:00 +0200
11087 @@ -4,6 +4,7 @@
11088  
11089  #include <linux/capability.h>
11090  #include <linux/fs.h>
11091 +#include <linux/mount.h>
11092  #include <linux/reiserfs_fs.h>
11093  #include <linux/time.h>
11094  #include <asm/uaccess.h>
11095 @@ -23,7 +24,7 @@ static int reiserfs_unpack(struct inode 
11096  int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
11097                    unsigned long arg)
11098  {
11099 -       unsigned int flags;
11100 +       unsigned int flags, oldflags;
11101  
11102         switch (cmd) {
11103         case REISERFS_IOC_UNPACK:
11104 @@ -42,12 +43,14 @@ int reiserfs_ioctl(struct inode *inode, 
11105  
11106                 flags = REISERFS_I(inode)->i_attrs;
11107                 i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
11108 +               flags &= REISERFS_FL_USER_VISIBLE;
11109                 return put_user(flags, (int __user *)arg);
11110         case REISERFS_IOC_SETFLAGS:{
11111                         if (!reiserfs_attrs(inode->i_sb))
11112                                 return -ENOTTY;
11113  
11114 -                       if (IS_RDONLY(inode))
11115 +                       if (IS_RDONLY(inode) ||
11116 +                               (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
11117                                 return -EROFS;
11118  
11119                         if ((current->fsuid != inode->i_uid)
11120 @@ -57,10 +60,12 @@ int reiserfs_ioctl(struct inode *inode, 
11121                         if (get_user(flags, (int __user *)arg))
11122                                 return -EFAULT;
11123  
11124 -                       if (((flags ^ REISERFS_I(inode)->
11125 -                             i_attrs) & (REISERFS_IMMUTABLE_FL |
11126 -                                         REISERFS_APPEND_FL))
11127 -                           && !capable(CAP_LINUX_IMMUTABLE))
11128 +                       oldflags = REISERFS_I(inode) -> i_attrs;
11129 +                       if (((oldflags & REISERFS_IMMUTABLE_FL) ||
11130 +                               ((flags ^ oldflags) &
11131 +                               (REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL |
11132 +                                REISERFS_APPEND_FL))) &&
11133 +                               !capable(CAP_LINUX_IMMUTABLE))
11134                                 return -EPERM;
11135  
11136                         if ((flags & REISERFS_NOTAIL_FL) &&
11137 @@ -71,6 +76,9 @@ int reiserfs_ioctl(struct inode *inode, 
11138                                 if (result)
11139                                         return result;
11140                         }
11141 +
11142 +                       flags = flags & REISERFS_FL_USER_MODIFIABLE;
11143 +                       flags |= oldflags & ~REISERFS_FL_USER_MODIFIABLE;
11144                         sd_attrs_to_i_attrs(flags, inode);
11145                         REISERFS_I(inode)->i_attrs = flags;
11146                         inode->i_ctime = CURRENT_TIME_SEC;
11147 @@ -82,7 +90,8 @@ int reiserfs_ioctl(struct inode *inode, 
11148         case REISERFS_IOC_SETVERSION:
11149                 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
11150                         return -EPERM;
11151 -               if (IS_RDONLY(inode))
11152 +               if (IS_RDONLY(inode) ||
11153 +                       (filp && MNT_IS_RDONLY(filp->f_vfsmnt)))
11154                         return -EROFS;
11155                 if (get_user(inode->i_generation, (int __user *)arg))
11156                         return -EFAULT;
11157 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/namei.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/namei.c
11158 --- linux-2.6.16.20/fs/reiserfs/namei.c 2006-04-09 13:49:55 +0200
11159 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/namei.c    2006-04-26 19:07:00 +0200
11160 @@ -19,6 +19,7 @@
11161  #include <linux/reiserfs_xattr.h>
11162  #include <linux/smp_lock.h>
11163  #include <linux/quotaops.h>
11164 +#include <linux/vs_tag.h>
11165  
11166  #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
11167  #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--;
11168 @@ -365,6 +366,7 @@ static struct dentry *reiserfs_lookup(st
11169                         reiserfs_write_unlock(dir->i_sb);
11170                         return ERR_PTR(-EACCES);
11171                 }
11172 +               dx_propagate_tag(nd, inode);
11173  
11174                 /* Propogate the priv_object flag so we know we're in the priv tree */
11175                 if (is_reiserfs_priv_object(dir))
11176 @@ -600,6 +602,7 @@ static int new_inode_init(struct inode *
11177         } else {
11178                 inode->i_gid = current->fsgid;
11179         }
11180 +       inode->i_tag = dx_current_fstag(inode->i_sb);
11181         DQUOT_INIT(inode);
11182         return 0;
11183  }
11184 @@ -1546,6 +1549,7 @@ struct inode_operations reiserfs_dir_ino
11185         .listxattr = reiserfs_listxattr,
11186         .removexattr = reiserfs_removexattr,
11187         .permission = reiserfs_permission,
11188 +       .sync_flags = reiserfs_sync_flags,
11189  };
11190  
11191  /*
11192 @@ -1562,6 +1566,7 @@ struct inode_operations reiserfs_symlink
11193         .listxattr = reiserfs_listxattr,
11194         .removexattr = reiserfs_removexattr,
11195         .permission = reiserfs_permission,
11196 +       .sync_flags = reiserfs_sync_flags,
11197  
11198  };
11199  
11200 @@ -1575,5 +1580,6 @@ struct inode_operations reiserfs_special
11201         .listxattr = reiserfs_listxattr,
11202         .removexattr = reiserfs_removexattr,
11203         .permission = reiserfs_permission,
11204 +       .sync_flags = reiserfs_sync_flags,
11205  
11206  };
11207 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/stree.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/stree.c
11208 --- linux-2.6.16.20/fs/reiserfs/stree.c 2005-08-29 22:25:33 +0200
11209 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/stree.c    2006-04-26 19:07:00 +0200
11210 @@ -57,6 +57,7 @@
11211  #include <linux/smp_lock.h>
11212  #include <linux/buffer_head.h>
11213  #include <linux/quotaops.h>
11214 +#include <linux/vs_dlimit.h>
11215  
11216  /* Does the buffer contain a disk block which is in the tree. */
11217  inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh)
11218 @@ -1365,6 +1366,7 @@ int reiserfs_delete_item(struct reiserfs
11219                        "reiserquota delete_item(): freeing %u, id=%u type=%c",
11220                        quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
11221  #endif
11222 +       DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes);
11223         DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
11224  
11225         /* Return deleted body length */
11226 @@ -1453,6 +1455,7 @@ void reiserfs_delete_solid_item(struct r
11227  #endif
11228                                 DQUOT_FREE_SPACE_NODIRTY(inode,
11229                                                          quota_cut_bytes);
11230 +                               DLIMIT_FREE_SPACE(inode, quota_cut_bytes);
11231                         }
11232                         break;
11233                 }
11234 @@ -1808,6 +1811,7 @@ int reiserfs_cut_from_item(struct reiser
11235                        "reiserquota cut_from_item(): freeing %u id=%u type=%c",
11236                        quota_cut_bytes, p_s_inode->i_uid, '?');
11237  #endif
11238 +       DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes);
11239         DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
11240         return n_ret_value;
11241  }
11242 @@ -2048,6 +2052,11 @@ int reiserfs_paste_into_item(struct reis
11243                 pathrelse(p_s_search_path);
11244                 return -EDQUOT;
11245         }
11246 +       if (DLIMIT_ALLOC_SPACE(inode, n_pasted_size)) {
11247 +               DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
11248 +               pathrelse(p_s_search_path);
11249 +               return -ENOSPC;
11250 +       }
11251         init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path,
11252                        n_pasted_size);
11253  #ifdef DISPLACE_NEW_PACKING_LOCALITIES
11254 @@ -2100,6 +2109,7 @@ int reiserfs_paste_into_item(struct reis
11255                        n_pasted_size, inode->i_uid,
11256                        key2type(&(p_s_key->on_disk_key)));
11257  #endif
11258 +       DLIMIT_FREE_SPACE(inode, n_pasted_size);
11259         DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
11260         return retval;
11261  }
11262 @@ -2137,6 +2147,11 @@ int reiserfs_insert_item(struct reiserfs
11263                         pathrelse(p_s_path);
11264                         return -EDQUOT;
11265                 }
11266 +               if (DLIMIT_ALLOC_SPACE(inode, quota_bytes)) {
11267 +                       DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
11268 +                       pathrelse(p_s_path);
11269 +                       return -ENOSPC;
11270 +               }
11271         }
11272         init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path,
11273                        IH_SIZE + ih_item_len(p_s_ih));
11274 @@ -2184,7 +2199,9 @@ int reiserfs_insert_item(struct reiserfs
11275                        "reiserquota insert_item(): freeing %u id=%u type=%c",
11276                        quota_bytes, inode->i_uid, head2type(p_s_ih));
11277  #endif
11278 -       if (inode)
11279 +       if (inode) {
11280 +               DLIMIT_FREE_SPACE(inode, quota_bytes);
11281                 DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
11282 +       }
11283         return retval;
11284  }
11285 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/super.c
11286 --- linux-2.6.16.20/fs/reiserfs/super.c 2006-02-18 14:40:26 +0100
11287 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/super.c    2006-04-26 19:07:00 +0200
11288 @@ -138,7 +138,7 @@ static int remove_save_link_only(struct 
11289  }
11290  
11291  #ifdef CONFIG_QUOTA
11292 -static int reiserfs_quota_on_mount(struct super_block *, int);
11293 +static int reiserfs_quota_on_mount(struct dqhash *, int);
11294  #endif
11295  
11296  /* look for uncompleted unlinks and truncates and complete them */
11297 @@ -178,7 +178,7 @@ static int finish_unfinished(struct supe
11298         /* Turn on quotas so that they are updated correctly */
11299         for (i = 0; i < MAXQUOTAS; i++) {
11300                 if (REISERFS_SB(s)->s_qf_names[i]) {
11301 -                       int ret = reiserfs_quota_on_mount(s, i);
11302 +                       int ret = reiserfs_quota_on_mount(s->s_dqh, i);
11303                         if (ret < 0)
11304                                 reiserfs_warning(s,
11305                                                  "reiserfs: cannot turn on journalled quota: error %d",
11306 @@ -292,8 +292,8 @@ static int finish_unfinished(struct supe
11307  #ifdef CONFIG_QUOTA
11308         /* Turn quotas off */
11309         for (i = 0; i < MAXQUOTAS; i++) {
11310 -               if (sb_dqopt(s)->files[i])
11311 -                       vfs_quota_off_mount(s, i);
11312 +               if (dqh_dqopt(s->s_dqh)->files[i])
11313 +                       vfs_quota_off_mount(s->s_dqh, i);
11314         }
11315         if (ms_active_set)
11316                 /* Restore the flag back */
11317 @@ -578,9 +578,9 @@ static void reiserfs_clear_inode(struct 
11318  }
11319  
11320  #ifdef CONFIG_QUOTA
11321 -static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
11322 +static ssize_t reiserfs_quota_write(struct dqhash *, int, const char *,
11323                                     size_t, loff_t);
11324 -static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t,
11325 +static ssize_t reiserfs_quota_read(struct dqhash *, int, char *, size_t,
11326                                    loff_t);
11327  #endif
11328  
11329 @@ -613,8 +613,8 @@ static int reiserfs_write_dquot(struct d
11330  static int reiserfs_acquire_dquot(struct dquot *);
11331  static int reiserfs_release_dquot(struct dquot *);
11332  static int reiserfs_mark_dquot_dirty(struct dquot *);
11333 -static int reiserfs_write_info(struct super_block *, int);
11334 -static int reiserfs_quota_on(struct super_block *, int, int, char *);
11335 +static int reiserfs_write_info(struct dqhash *, int);
11336 +static int reiserfs_quota_on(struct dqhash *, int, int, char *);
11337  
11338  static struct dquot_operations reiserfs_quota_operations = {
11339         .initialize = reiserfs_dquot_initialize,
11340 @@ -882,6 +882,14 @@ static int reiserfs_parse_options(struct
11341                 {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
11342                 {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
11343  #endif
11344 +#ifndef CONFIG_TAGGING_NONE
11345 +               {"tagxid",.setmask = 1 << REISERFS_TAGGED},
11346 +               {"tag",.setmask = 1 << REISERFS_TAGGED},
11347 +               {"notag",.clrmask = 1 << REISERFS_TAGGED},
11348 +#endif
11349 +#ifdef CONFIG_PROPAGATE
11350 +               {"tag",.arg_required = 'T',.values = NULL},
11351 +#endif
11352  #ifdef CONFIG_REISERFS_FS_POSIX_ACL
11353                 {"acl",.setmask = 1 << REISERFS_POSIXACL},
11354                 {"noacl",.clrmask = 1 << REISERFS_POSIXACL},
11355 @@ -989,7 +997,7 @@ static int reiserfs_parse_options(struct
11356                 if (c == 'u' || c == 'g') {
11357                         int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
11358  
11359 -                       if (sb_any_quota_enabled(s)) {
11360 +                       if (dqh_any_quota_enabled(s->s_dqh)) {
11361                                 reiserfs_warning(s,
11362                                                  "reiserfs_parse_options: cannot change journalled quota options when quota turned on.");
11363                                 return 0;
11364 @@ -1052,7 +1060,7 @@ static int reiserfs_parse_options(struct
11365         }
11366         /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
11367         if (!(*mount_options & (1 << REISERFS_QUOTA))
11368 -           && sb_any_quota_enabled(s)) {
11369 +           && dqh_any_quota_enabled(s->s_dqh)) {
11370                 reiserfs_warning(s,
11371                                  "reiserfs_parse_options: quota options must be present when quota is turned on.");
11372                 return 0;
11373 @@ -1154,6 +1162,12 @@ static int reiserfs_remount(struct super
11374                 return -EINVAL;
11375         }
11376  
11377 +       if ((mount_options & (1 << REISERFS_TAGGED)) &&
11378 +               !(s->s_flags & MS_TAGGED)) {
11379 +               reiserfs_warning(s, "reiserfs: tagging not permitted on remount.");
11380 +               return -EINVAL;
11381 +       }
11382 +
11383         handle_attrs(s);
11384  
11385         /* Add options that are safe here */
11386 @@ -1456,7 +1470,7 @@ static int read_super_block(struct super
11387         s->s_export_op = &reiserfs_export_ops;
11388  #ifdef CONFIG_QUOTA
11389         s->s_qcop = &reiserfs_qctl_operations;
11390 -       s->dq_op = &reiserfs_quota_operations;
11391 +       s->s_qop = &reiserfs_quota_operations;
11392  #endif
11393  
11394         /* new format is limited by the 32 bit wide i_blocks field, want to
11395 @@ -1729,6 +1743,10 @@ static int reiserfs_fill_super(struct su
11396                 goto error;
11397         }
11398  
11399 +       /* map mount option tagxid */
11400 +       if (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TAGGED))
11401 +               s->s_flags |= MS_TAGGED;
11402 +
11403         rs = SB_DISK_SUPER_BLOCK(s);
11404         /* Let's do basic sanity check to verify that underlying device is not
11405            smaller than the filesystem. If the check fails then abort and scream,
11406 @@ -2003,16 +2021,16 @@ static int reiserfs_write_dquot(struct d
11407         struct reiserfs_transaction_handle th;
11408         int ret, err;
11409  
11410 -       reiserfs_write_lock(dquot->dq_sb);
11411 +       reiserfs_write_lock(dquot->dq_dqh->dqh_sb);
11412         ret =
11413 -           journal_begin(&th, dquot->dq_sb,
11414 -                         REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
11415 +           journal_begin(&th, dquot->dq_dqh->dqh_sb,
11416 +               REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb));
11417         if (ret)
11418                 goto out;
11419         ret = dquot_commit(dquot);
11420         err =
11421 -           journal_end(&th, dquot->dq_sb,
11422 -                       REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
11423 +           journal_end(&th, dquot->dq_dqh->dqh_sb,
11424 +               REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_dqh->dqh_sb));
11425         if (!ret && err)
11426                 ret = err;
11427        out:
11428 @@ -2025,20 +2043,20 @@ static int reiserfs_acquire_dquot(struct
11429         struct reiserfs_transaction_handle th;
11430         int ret, err;
11431  
11432 -       reiserfs_write_lock(dquot->dq_sb);
11433 +       reiserfs_write_lock(dquot->dq_dqh->dqh_sb);
11434         ret =
11435 -           journal_begin(&th, dquot->dq_sb,
11436 -                         REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
11437 +           journal_begin(&th, dquot->dq_dqh->dqh_sb,
11438 +               REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb));
11439         if (ret)
11440                 goto out;
11441         ret = dquot_acquire(dquot);
11442         err =
11443 -           journal_end(&th, dquot->dq_sb,
11444 -                       REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
11445 +           journal_end(&th, dquot->dq_dqh->dqh_sb,
11446 +               REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_dqh->dqh_sb));
11447         if (!ret && err)
11448                 ret = err;
11449        out:
11450 -       reiserfs_write_unlock(dquot->dq_sb);
11451 +       reiserfs_write_unlock(dquot->dq_dqh->dqh_sb);
11452         return ret;
11453  }
11454  
11455 @@ -2047,37 +2065,38 @@ static int reiserfs_release_dquot(struct
11456         struct reiserfs_transaction_handle th;
11457         int ret, err;
11458  
11459 -       reiserfs_write_lock(dquot->dq_sb);
11460 +       reiserfs_write_lock(dquot->dq_dqh->dqh_sb);
11461         ret =
11462 -           journal_begin(&th, dquot->dq_sb,
11463 -                         REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
11464 +           journal_begin(&th, dquot->dq_dqh->dqh_sb,
11465 +               REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb));
11466         if (ret)
11467                 goto out;
11468         ret = dquot_release(dquot);
11469         err =
11470 -           journal_end(&th, dquot->dq_sb,
11471 -                       REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
11472 +           journal_end(&th, dquot->dq_dqh->dqh_sb,
11473 +               REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_dqh->dqh_sb));
11474         if (!ret && err)
11475                 ret = err;
11476        out:
11477 -       reiserfs_write_unlock(dquot->dq_sb);
11478 +       reiserfs_write_unlock(dquot->dq_dqh->dqh_sb);
11479         return ret;
11480  }
11481  
11482  static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
11483  {
11484         /* Are we journalling quotas? */
11485 -       if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
11486 -           REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
11487 +       if (REISERFS_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[USRQUOTA] ||
11488 +           REISERFS_SB(dquot->dq_dqh->dqh_sb)->s_qf_names[GRPQUOTA]) {
11489                 dquot_mark_dquot_dirty(dquot);
11490                 return reiserfs_write_dquot(dquot);
11491         } else
11492                 return dquot_mark_dquot_dirty(dquot);
11493  }
11494  
11495 -static int reiserfs_write_info(struct super_block *sb, int type)
11496 +static int reiserfs_write_info(struct dqhash *hash, int type)
11497  {
11498         struct reiserfs_transaction_handle th;
11499 +       struct super_block *sb = hash->dqh_sb;
11500         int ret, err;
11501  
11502         /* Data block + inode block */
11503 @@ -2085,7 +2104,7 @@ static int reiserfs_write_info(struct su
11504         ret = journal_begin(&th, sb, 2);
11505         if (ret)
11506                 goto out;
11507 -       ret = dquot_commit_info(sb, type);
11508 +       ret = dquot_commit_info(hash, type);
11509         err = journal_end(&th, sb, 2);
11510         if (!ret && err)
11511                 ret = err;
11512 @@ -2097,18 +2116,21 @@ static int reiserfs_write_info(struct su
11513  /*
11514   * Turn on quotas during mount time - we need to find the quota file and such...
11515   */
11516 -static int reiserfs_quota_on_mount(struct super_block *sb, int type)
11517 +static int reiserfs_quota_on_mount(struct dqhash *hash, int type)
11518  {
11519 -       return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
11520 +       struct super_block *sb = hash->dqh_sb;
11521 +
11522 +       return vfs_quota_on_mount(hash, REISERFS_SB(sb)->s_qf_names[type],
11523                                   REISERFS_SB(sb)->s_jquota_fmt, type);
11524  }
11525  
11526  /*
11527   * Standard function to be called on quota_on
11528   */
11529 -static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
11530 +static int reiserfs_quota_on(struct dqhash *hash, int type, int format_id,
11531                              char *path)
11532  {
11533 +       struct super_block *sb = hash->dqh_sb;
11534         int err;
11535         struct nameidata nd;
11536  
11537 @@ -2133,7 +2155,7 @@ static int reiserfs_quota_on(struct supe
11538         if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
11539             !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
11540                 path_release(&nd);
11541 -               return vfs_quota_on(sb, type, format_id, path);
11542 +               return vfs_quota_on(hash, type, format_id, path);
11543         }
11544         /* Quotafile not of fs root? */
11545         if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
11546 @@ -2141,17 +2163,18 @@ static int reiserfs_quota_on(struct supe
11547                                  "reiserfs: Quota file not on filesystem root. "
11548                                  "Journalled quota will not work.");
11549         path_release(&nd);
11550 -       return vfs_quota_on(sb, type, format_id, path);
11551 +       return vfs_quota_on(hash, type, format_id, path);
11552  }
11553  
11554  /* Read data from quotafile - avoid pagecache and such because we cannot afford
11555   * acquiring the locks... As quota files are never truncated and quota code
11556   * itself serializes the operations (and noone else should touch the files)
11557   * we don't have to be afraid of races */
11558 -static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
11559 +static ssize_t reiserfs_quota_read(struct dqhash *hash, int type, char *data,
11560                                    size_t len, loff_t off)
11561  {
11562 -       struct inode *inode = sb_dqopt(sb)->files[type];
11563 +       struct inode *inode = dqh_dqopt(hash)->files[type];
11564 +       struct super_block *sb = hash->dqh_sb;
11565         unsigned long blk = off >> sb->s_blocksize_bits;
11566         int err = 0, offset = off & (sb->s_blocksize - 1), tocopy;
11567         size_t toread;
11568 @@ -2193,10 +2216,11 @@ static ssize_t reiserfs_quota_read(struc
11569  
11570  /* Write to quotafile (we know the transaction is already started and has
11571   * enough credits) */
11572 -static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
11573 +static ssize_t reiserfs_quota_write(struct dqhash *hash, int type,
11574                                     const char *data, size_t len, loff_t off)
11575  {
11576 -       struct inode *inode = sb_dqopt(sb)->files[type];
11577 +       struct inode *inode = dqh_dqopt(hash)->files[type];
11578 +       struct super_block *sb = hash->dqh_sb;
11579         unsigned long blk = off >> sb->s_blocksize_bits;
11580         int err = 0, offset = off & (sb->s_blocksize - 1), tocopy;
11581         int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL;
11582 diff -NurpP --minimal linux-2.6.16.20/fs/reiserfs/xattr.c linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/xattr.c
11583 --- linux-2.6.16.20/fs/reiserfs/xattr.c 2006-02-18 14:40:26 +0100
11584 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/reiserfs/xattr.c    2006-04-26 19:07:00 +0200
11585 @@ -35,6 +35,7 @@
11586  #include <linux/namei.h>
11587  #include <linux/errno.h>
11588  #include <linux/fs.h>
11589 +#include <linux/mount.h>
11590  #include <linux/file.h>
11591  #include <linux/pagemap.h>
11592  #include <linux/xattr.h>
11593 @@ -824,7 +825,7 @@ int reiserfs_delete_xattrs(struct inode 
11594         if (dir->d_inode->i_nlink <= 2) {
11595                 root = get_xa_root(inode->i_sb);
11596                 reiserfs_write_lock_xattrs(inode->i_sb);
11597 -               err = vfs_rmdir(root->d_inode, dir);
11598 +               err = vfs_rmdir(root->d_inode, dir, NULL);
11599                 reiserfs_write_unlock_xattrs(inode->i_sb);
11600                 dput(root);
11601         } else {
11602 diff -NurpP --minimal linux-2.6.16.20/fs/stat.c linux-2.6.16.20-vs2.1.1-rc22/fs/stat.c
11603 --- linux-2.6.16.20/fs/stat.c   2006-02-18 14:40:26 +0100
11604 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/stat.c      2006-04-26 19:07:00 +0200
11605 @@ -27,6 +27,7 @@ void generic_fillattr(struct inode *inod
11606         stat->nlink = inode->i_nlink;
11607         stat->uid = inode->i_uid;
11608         stat->gid = inode->i_gid;
11609 +       stat->tag = inode->i_tag;
11610         stat->rdev = inode->i_rdev;
11611         stat->atime = inode->i_atime;
11612         stat->mtime = inode->i_mtime;
11613 diff -NurpP --minimal linux-2.6.16.20/fs/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/super.c
11614 --- linux-2.6.16.20/fs/super.c  2006-04-09 13:49:55 +0200
11615 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/super.c     2006-04-27 21:28:48 +0200
11616 @@ -37,6 +37,8 @@
11617  #include <linux/writeback.h>           /* for the emergency remount stuff */
11618  #include <linux/idr.h>
11619  #include <linux/kobject.h>
11620 +#include <linux/devpts_fs.h>
11621 +#include <linux/proc_fs.h>
11622  #include <asm/uaccess.h>
11623  
11624  
11625 @@ -77,15 +79,14 @@ static struct super_block *alloc_super(v
11626                 s->s_count = S_BIAS;
11627                 atomic_set(&s->s_active, 1);
11628                 sema_init(&s->s_vfs_rename_sem,1);
11629 -               sema_init(&s->s_dquot.dqio_sem, 1);
11630 -               sema_init(&s->s_dquot.dqonoff_sem, 1);
11631 -               init_rwsem(&s->s_dquot.dqptr_sem);
11632                 init_waitqueue_head(&s->s_wait_unfrozen);
11633                 s->s_maxbytes = MAX_NON_LFS;
11634 -               s->dq_op = sb_dquot_ops;
11635 +               s->s_qop = sb_dquot_ops;
11636                 s->s_qcop = sb_quotactl_ops;
11637                 s->s_op = &default_op;
11638                 s->s_time_gran = 1000000000;
11639 +               /* quick hack to make dqhash id unique, sufficient for now */
11640 +               s->s_dqh = new_dqhash(s, (unsigned long)s);
11641         }
11642  out:
11643         return s;
11644 @@ -100,6 +101,7 @@ out:
11645  static inline void destroy_super(struct super_block *s)
11646  {
11647         security_sb_free(s);
11648 +       dqhput(s->s_dqh);
11649         kfree(s);
11650  }
11651  
11652 @@ -171,7 +173,7 @@ void deactivate_super(struct super_block
11653         if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
11654                 s->s_count -= S_BIAS-1;
11655                 spin_unlock(&sb_lock);
11656 -               DQUOT_OFF(s);
11657 +               DQUOT_OFF(s->s_dqh);
11658                 down_write(&s->s_umount);
11659                 fs->kill_sb(s);
11660                 put_filesystem(fs);
11661 @@ -803,7 +805,7 @@ struct vfsmount *
11662  do_kern_mount(const char *fstype, int flags, const char *name, void *data)
11663  {
11664         struct file_system_type *type = get_fs_type(fstype);
11665 -       struct super_block *sb = ERR_PTR(-ENOMEM);
11666 +       struct super_block *sb;
11667         struct vfsmount *mnt;
11668         int error;
11669         char *secdata = NULL;
11670 @@ -811,6 +813,12 @@ do_kern_mount(const char *fstype, int fl
11671         if (!type)
11672                 return ERR_PTR(-ENODEV);
11673  
11674 +       sb = ERR_PTR(-EPERM);
11675 +       if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
11676 +               !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
11677 +               goto out;
11678 +
11679 +       sb = ERR_PTR(-ENOMEM);
11680         mnt = alloc_vfsmnt(name);
11681         if (!mnt)
11682                 goto out;
11683 @@ -832,6 +840,13 @@ do_kern_mount(const char *fstype, int fl
11684         sb = type->get_sb(type, flags, name, data);
11685         if (IS_ERR(sb))
11686                 goto out_free_secdata;
11687 +
11688 +       error = -EPERM;
11689 +       if (!capable(CAP_SYS_ADMIN) && !sb->s_bdev &&
11690 +               (sb->s_magic != PROC_SUPER_MAGIC) &&
11691 +               (sb->s_magic != DEVPTS_SUPER_MAGIC))
11692 +               goto out_sb;
11693 +
11694         error = security_sb_kern_mount(sb, secdata);
11695         if (error)
11696                 goto out_sb;
11697 diff -NurpP --minimal linux-2.6.16.20/fs/sysfs/mount.c linux-2.6.16.20-vs2.1.1-rc22/fs/sysfs/mount.c
11698 --- linux-2.6.16.20/fs/sysfs/mount.c    2005-08-29 22:25:33 +0200
11699 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/sysfs/mount.c       2006-04-26 19:07:00 +0200
11700 @@ -11,8 +11,6 @@
11701  
11702  #include "sysfs.h"
11703  
11704 -/* Random magic number */
11705 -#define SYSFS_MAGIC 0x62656572
11706  
11707  struct vfsmount *sysfs_mount;
11708  struct super_block * sysfs_sb = NULL;
11709 @@ -38,7 +36,7 @@ static int sysfs_fill_super(struct super
11710  
11711         sb->s_blocksize = PAGE_CACHE_SIZE;
11712         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
11713 -       sb->s_magic = SYSFS_MAGIC;
11714 +       sb->s_magic = SYSFS_SUPER_MAGIC;
11715         sb->s_op = &sysfs_ops;
11716         sb->s_time_gran = 1;
11717         sysfs_sb = sb;
11718 diff -NurpP --minimal linux-2.6.16.20/fs/udf/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/udf/super.c
11719 --- linux-2.6.16.20/fs/udf/super.c      2006-04-09 13:49:55 +0200
11720 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/udf/super.c 2006-04-26 19:07:00 +0200
11721 @@ -1575,7 +1575,7 @@ static int udf_fill_super(struct super_b
11722  
11723         /* Fill in the rest of the superblock */
11724         sb->s_op = &udf_sb_ops;
11725 -       sb->dq_op = NULL;
11726 +       sb->s_qop = NULL;
11727         sb->s_dirt = 0;
11728         sb->s_magic = UDF_SUPER_MAGIC;
11729         sb->s_time_gran = 1000;
11730 diff -NurpP --minimal linux-2.6.16.20/fs/ufs/super.c linux-2.6.16.20-vs2.1.1-rc22/fs/ufs/super.c
11731 --- linux-2.6.16.20/fs/ufs/super.c      2006-04-09 13:49:55 +0200
11732 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/ufs/super.c 2006-04-26 19:07:00 +0200
11733 @@ -873,7 +873,7 @@ magic_found:
11734          * Read ufs_super_block into internal data structures
11735          */
11736         sb->s_op = &ufs_super_ops;
11737 -       sb->dq_op = NULL; /***/
11738 +       sb->s_qop = NULL; /***/
11739         sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic);
11740  
11741         uspi->s_sblkno = fs32_to_cpu(sb, usb1->fs_sblkno);
11742 @@ -1198,8 +1198,8 @@ static void destroy_inodecache(void)
11743  }
11744  
11745  #ifdef CONFIG_QUOTA
11746 -static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t);
11747 -static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t);
11748 +static ssize_t ufs_quota_read(struct dqhash *, int, char *,size_t, loff_t);
11749 +static ssize_t ufs_quota_write(struct dqhash *, int, const char *, size_t, loff_t);
11750  #endif
11751  
11752  static struct super_operations ufs_super_ops = {
11753 @@ -1224,10 +1224,11 @@ static struct super_operations ufs_super
11754   * acquiring the locks... As quota files are never truncated and quota code
11755   * itself serializes the operations (and noone else should touch the files)
11756   * we don't have to be afraid of races */
11757 -static ssize_t ufs_quota_read(struct super_block *sb, int type, char *data,
11758 +static ssize_t ufs_quota_read(struct dqhash *hash, int type, char *data,
11759                                size_t len, loff_t off)
11760  {
11761 -       struct inode *inode = sb_dqopt(sb)->files[type];
11762 +       struct inode *inode = dqh_dqopt(hash)->files[type];
11763 +       struct super_block *sb = hash->dqh_sb;
11764         sector_t blk = off >> sb->s_blocksize_bits;
11765         int err = 0;
11766         int offset = off & (sb->s_blocksize - 1);
11767 @@ -1263,10 +1264,11 @@ static ssize_t ufs_quota_read(struct sup
11768  }
11769  
11770  /* Write to quotafile */
11771 -static ssize_t ufs_quota_write(struct super_block *sb, int type,
11772 +static ssize_t ufs_quota_write(struct dqhash *hash, int type,
11773                                 const char *data, size_t len, loff_t off)
11774  {
11775 -       struct inode *inode = sb_dqopt(sb)->files[type];
11776 +       struct inode *inode = dqh_dqopt(hash)->files[type];
11777 +       struct super_block *sb = hash->dqh_sb;
11778         sector_t blk = off >> sb->s_blocksize_bits;
11779         int err = 0;
11780         int offset = off & (sb->s_blocksize - 1);
11781 diff -NurpP --minimal linux-2.6.16.20/fs/xattr.c linux-2.6.16.20-vs2.1.1-rc22/fs/xattr.c
11782 --- linux-2.6.16.20/fs/xattr.c  2006-02-18 14:40:27 +0100
11783 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xattr.c     2006-04-26 19:07:00 +0200
11784 @@ -17,6 +17,7 @@
11785  #include <linux/syscalls.h>
11786  #include <linux/module.h>
11787  #include <linux/fsnotify.h>
11788 +#include <linux/mount.h>
11789  #include <asm/uaccess.h>
11790  
11791  
11792 @@ -167,7 +168,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
11793   */
11794  static long
11795  setxattr(struct dentry *d, char __user *name, void __user *value,
11796 -        size_t size, int flags)
11797 +        size_t size, int flags, struct vfsmount *mnt)
11798  {
11799         int error;
11800         void *kvalue = NULL;
11801 @@ -194,6 +195,9 @@ setxattr(struct dentry *d, char __user *
11802                 }
11803         }
11804  
11805 +       if (MNT_IS_RDONLY(mnt))
11806 +               return -EROFS;
11807 +
11808         error = vfs_setxattr(d, kname, kvalue, size, flags);
11809         kfree(kvalue);
11810         return error;
11811 @@ -209,7 +213,7 @@ sys_setxattr(char __user *path, char __u
11812         error = user_path_walk(path, &nd);
11813         if (error)
11814                 return error;
11815 -       error = setxattr(nd.dentry, name, value, size, flags);
11816 +       error = setxattr(nd.dentry, name, value, size, flags, nd.mnt);
11817         path_release(&nd);
11818         return error;
11819  }
11820 @@ -224,7 +228,7 @@ sys_lsetxattr(char __user *path, char __
11821         error = user_path_walk_link(path, &nd);
11822         if (error)
11823                 return error;
11824 -       error = setxattr(nd.dentry, name, value, size, flags);
11825 +       error = setxattr(nd.dentry, name, value, size, flags, nd.mnt);
11826         path_release(&nd);
11827         return error;
11828  }
11829 @@ -239,7 +243,7 @@ sys_fsetxattr(int fd, char __user *name,
11830         f = fget(fd);
11831         if (!f)
11832                 return error;
11833 -       error = setxattr(f->f_dentry, name, value, size, flags);
11834 +       error = setxattr(f->f_dentry, name, value, size, flags, f->f_vfsmnt);
11835         fput(f);
11836         return error;
11837  }
11838 @@ -412,7 +416,7 @@ sys_flistxattr(int fd, char __user *list
11839   * Extended attribute REMOVE operations
11840   */
11841  static long
11842 -removexattr(struct dentry *d, char __user *name)
11843 +removexattr(struct dentry *d, char __user *name, struct vfsmount *mnt)
11844  {
11845         int error;
11846         char kname[XATTR_NAME_MAX + 1];
11847 @@ -423,6 +427,9 @@ removexattr(struct dentry *d, char __use
11848         if (error < 0)
11849                 return error;
11850  
11851 +       if (MNT_IS_RDONLY(mnt))
11852 +               return -EROFS;
11853 +
11854         return vfs_removexattr(d, kname);
11855  }
11856  
11857 @@ -435,7 +442,7 @@ sys_removexattr(char __user *path, char 
11858         error = user_path_walk(path, &nd);
11859         if (error)
11860                 return error;
11861 -       error = removexattr(nd.dentry, name);
11862 +       error = removexattr(nd.dentry, name, nd.mnt);
11863         path_release(&nd);
11864         return error;
11865  }
11866 @@ -449,7 +456,7 @@ sys_lremovexattr(char __user *path, char
11867         error = user_path_walk_link(path, &nd);
11868         if (error)
11869                 return error;
11870 -       error = removexattr(nd.dentry, name);
11871 +       error = removexattr(nd.dentry, name, nd.mnt);
11872         path_release(&nd);
11873         return error;
11874  }
11875 @@ -463,7 +470,7 @@ sys_fremovexattr(int fd, char __user *na
11876         f = fget(fd);
11877         if (!f)
11878                 return error;
11879 -       error = removexattr(f->f_dentry, name);
11880 +       error = removexattr(f->f_dentry, name, f->f_vfsmnt);
11881         fput(f);
11882         return error;
11883  }
11884 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_file.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_file.c
11885 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_file.c 2006-04-09 13:49:55 +0200
11886 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_file.c    2006-04-26 19:07:00 +0200
11887 @@ -536,6 +536,7 @@ struct file_operations linvfs_file_opera
11888         .aio_read       = linvfs_aio_read,
11889         .aio_write      = linvfs_aio_write,
11890         .sendfile       = linvfs_sendfile,
11891 +       .sendpage       = generic_file_sendpage,
11892         .unlocked_ioctl = linvfs_ioctl,
11893  #ifdef CONFIG_COMPAT
11894         .compat_ioctl   = linvfs_compat_ioctl,
11895 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_ioctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_ioctl.c
11896 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_ioctl.c        2006-02-18 14:40:27 +0100
11897 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_ioctl.c   2006-04-26 19:07:00 +0200
11898 @@ -1100,6 +1100,8 @@ xfs_ioc_fsgeometry(
11899  #define LINUX_XFLAG_APPEND     0x00000020 /* writes to file may only append */
11900  #define LINUX_XFLAG_NODUMP     0x00000040 /* do not dump file */
11901  #define LINUX_XFLAG_NOATIME    0x00000080 /* do not update atime */
11902 +#define LINUX_XFLAG_BARRIER    0x04000000 /* chroot() barrier */
11903 +#define LINUX_XFLAG_IUNLINK    0x08000000 /* immutable unlink */
11904  
11905  STATIC unsigned int
11906  xfs_merge_ioc_xflags(
11907 @@ -1140,6 +1142,10 @@ xfs_di2lxflags(
11908  
11909         if (di_flags & XFS_DIFLAG_IMMUTABLE)
11910                 flags |= LINUX_XFLAG_IMMUTABLE;
11911 +       if (di_flags & XFS_DIFLAG_IUNLINK)
11912 +               flags |= LINUX_XFLAG_IUNLINK;
11913 +       if (di_flags & XFS_DIFLAG_BARRIER)
11914 +               flags |= LINUX_XFLAG_BARRIER;
11915         if (di_flags & XFS_DIFLAG_APPEND)
11916                 flags |= LINUX_XFLAG_APPEND;
11917         if (di_flags & XFS_DIFLAG_SYNC)
11918 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_iops.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_iops.c
11919 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_iops.c 2006-05-11 21:25:36 +0200
11920 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_iops.c    2006-04-26 19:07:00 +0200
11921 @@ -55,6 +55,7 @@
11922  #include <linux/xattr.h>
11923  #include <linux/namei.h>
11924  #include <linux/security.h>
11925 +#include <linux/vserver/tag.h>
11926  
11927  /*
11928   * Get a XFS inode from a given vnode.
11929 @@ -410,6 +411,7 @@ linvfs_lookup(
11930                 d_add(dentry, NULL);
11931                 return NULL;
11932         }
11933 +       dx_propagate_tag(nd, LINVFS_GET_IP(cvp));
11934  
11935         return d_splice_alias(LINVFS_GET_IP(cvp), dentry);
11936  }
11937 @@ -646,6 +648,41 @@ linvfs_getattr(
11938  }
11939  
11940  STATIC int
11941 +linvfs_sync_flags(struct inode *inode)
11942 +{
11943 +       unsigned int oldflags, newflags;
11944 +       vattr_t         vattr;
11945 +       int             flags = 0;
11946 +       int             error;
11947 +       vnode_t         *vp = LINVFS_GET_VP(inode);
11948 +
11949 +       memset(&vattr, 0, sizeof(vattr_t));
11950 +
11951 +       vattr.va_mask = XFS_AT_XFLAGS;
11952 +       VOP_GETATTR(vp, &vattr, 0, NULL, error);
11953 +       if (error)
11954 +               return error;
11955 +       oldflags = vattr.va_xflags;
11956 +       newflags = oldflags & ~(XFS_XFLAG_IMMUTABLE |
11957 +               XFS_XFLAG_IUNLINK | XFS_XFLAG_BARRIER);
11958 +
11959 +       if (IS_IMMUTABLE(inode))
11960 +               newflags |= XFS_XFLAG_IMMUTABLE;
11961 +       if (IS_IUNLINK(inode))
11962 +               newflags |= XFS_XFLAG_IUNLINK;
11963 +       if (IS_BARRIER(inode))
11964 +               newflags |= XFS_XFLAG_BARRIER;
11965 +
11966 +       if (oldflags ^ newflags) {
11967 +               vattr.va_xflags = newflags;
11968 +               vattr.va_mask |= XFS_AT_XFLAGS;
11969 +               VOP_SETATTR(vp, &vattr, flags, NULL, error);
11970 +       }
11971 +       vn_revalidate(vp);
11972 +       return error;
11973 +}
11974 +
11975 +STATIC int
11976  linvfs_setattr(
11977         struct dentry   *dentry,
11978         struct iattr    *attr)
11979 @@ -657,6 +694,10 @@ linvfs_setattr(
11980         int             flags = 0;
11981         int             error;
11982  
11983 +       error = inode_change_ok(inode, attr);
11984 +       if (error)
11985 +               return error;
11986 +
11987         memset(&vattr, 0, sizeof(vattr_t));
11988         if (ia_valid & ATTR_UID) {
11989                 vattr.va_mask |= XFS_AT_UID;
11990 @@ -666,6 +707,10 @@ linvfs_setattr(
11991                 vattr.va_mask |= XFS_AT_GID;
11992                 vattr.va_gid = attr->ia_gid;
11993         }
11994 +       if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode)) {
11995 +               vattr.va_mask |= XFS_AT_TAG;
11996 +               vattr.va_tag = attr->ia_tag;
11997 +       }
11998         if (ia_valid & ATTR_SIZE) {
11999                 vattr.va_mask |= XFS_AT_SIZE;
12000                 vattr.va_size = attr->ia_size;
12001 @@ -824,6 +869,7 @@ struct inode_operations linvfs_file_inod
12002         .getxattr               = linvfs_getxattr,
12003         .listxattr              = linvfs_listxattr,
12004         .removexattr            = linvfs_removexattr,
12005 +       .sync_flags             = linvfs_sync_flags,
12006  };
12007  
12008  struct inode_operations linvfs_dir_inode_operations = {
12009 @@ -843,6 +889,7 @@ struct inode_operations linvfs_dir_inode
12010         .getxattr               = linvfs_getxattr,
12011         .listxattr              = linvfs_listxattr,
12012         .removexattr            = linvfs_removexattr,
12013 +       .sync_flags             = linvfs_sync_flags,
12014  };
12015  
12016  struct inode_operations linvfs_symlink_inode_operations = {
12017 @@ -856,4 +903,5 @@ struct inode_operations linvfs_symlink_i
12018         .getxattr               = linvfs_getxattr,
12019         .listxattr              = linvfs_listxattr,
12020         .removexattr            = linvfs_removexattr,
12021 +       .sync_flags             = linvfs_sync_flags,
12022  };
12023 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_linux.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_linux.h
12024 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_linux.h        2006-02-18 14:40:27 +0100
12025 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_linux.h   2006-04-26 19:07:00 +0200
12026 @@ -133,6 +133,7 @@ BUFFER_FNS(PrivateStart, unwritten);
12027  #define current_pid()          (current->pid)
12028  #define current_fsuid(cred)    (current->fsuid)
12029  #define current_fsgid(cred)    (current->fsgid)
12030 +#define current_fstag(cred,vp) (dx_current_fstag(LINVFS_GET_IP(vp)->i_sb))
12031  
12032  #define NBPP           PAGE_SIZE
12033  #define DPPSHFT                (PAGE_SHIFT - 9)
12034 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_super.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_super.c
12035 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_super.c        2006-04-09 13:49:55 +0200
12036 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_super.c   2006-04-26 19:07:00 +0200
12037 @@ -161,6 +161,7 @@ xfs_revalidate_inode(
12038         inode->i_nlink  = ip->i_d.di_nlink;
12039         inode->i_uid    = ip->i_d.di_uid;
12040         inode->i_gid    = ip->i_d.di_gid;
12041 +       inode->i_tag    = ip->i_d.di_tag;
12042  
12043         switch (inode->i_mode & S_IFMT) {
12044         case S_IFBLK:
12045 @@ -189,6 +190,14 @@ xfs_revalidate_inode(
12046                 inode->i_flags |= S_IMMUTABLE;
12047         else
12048                 inode->i_flags &= ~S_IMMUTABLE;
12049 +       if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK)
12050 +               inode->i_flags |= S_IUNLINK;
12051 +       else
12052 +               inode->i_flags &= ~S_IUNLINK;
12053 +       if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER)
12054 +               inode->i_flags |= S_BARRIER;
12055 +       else
12056 +               inode->i_flags &= ~S_BARRIER;
12057         if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
12058                 inode->i_flags |= S_APPEND;
12059         else
12060 @@ -729,6 +738,12 @@ linvfs_remount(
12061         int                     error;
12062  
12063         VFS_PARSEARGS(vfsp, options, args, 1, error);
12064 +       if ((args->flags2 & XFSMNT2_TAGGED) &&
12065 +               !(sb->s_flags & MS_TAGGED)) {
12066 +               printk("XFS: %s: tagging not permitted on remount.\n",
12067 +                       sb->s_id);
12068 +               error = EINVAL;
12069 +       }
12070         if (!error)
12071                 VFS_MNTUPDATE(vfsp, flags, args, error);
12072         kmem_free(args, sizeof(*args));
12073 @@ -756,9 +771,10 @@ linvfs_show_options(
12074  
12075  STATIC int
12076  linvfs_quotasync(
12077 -       struct super_block      *sb,
12078 +       struct dqhash           *hash,
12079         int                     type)
12080  {
12081 +       struct super_block      *sb = hash->dqh_sb;
12082         struct vfs              *vfsp = LINVFS_GET_VFS(sb);
12083         int                     error;
12084  
12085 @@ -768,10 +784,10 @@ linvfs_quotasync(
12086  
12087  STATIC int
12088  linvfs_getxstate(
12089 -       struct super_block      *sb,
12090 +       struct dqhash           *hash,
12091         struct fs_quota_stat    *fqs)
12092  {
12093 -       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
12094 +       struct vfs              *vfsp = LINVFS_GET_VFS(hash->dqh_sb);
12095         int                     error;
12096  
12097         VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
12098 @@ -780,11 +796,11 @@ linvfs_getxstate(
12099  
12100  STATIC int
12101  linvfs_setxstate(
12102 -       struct super_block      *sb,
12103 +       struct dqhash           *hash,
12104         unsigned int            flags,
12105         int                     op)
12106  {
12107 -       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
12108 +       struct vfs              *vfsp = LINVFS_GET_VFS(hash->dqh_sb);
12109         int                     error;
12110  
12111         VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
12112 @@ -793,12 +809,12 @@ linvfs_setxstate(
12113  
12114  STATIC int
12115  linvfs_getxquota(
12116 -       struct super_block      *sb,
12117 +       struct dqhash           *hash,
12118         int                     type,
12119         qid_t                   id,
12120         struct fs_disk_quota    *fdq)
12121  {
12122 -       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
12123 +       struct vfs              *vfsp = LINVFS_GET_VFS(hash->dqh_sb);
12124         int                     error, getmode;
12125  
12126         getmode = (type == USRQUOTA) ? Q_XGETQUOTA :
12127 @@ -809,12 +825,12 @@ linvfs_getxquota(
12128  
12129  STATIC int
12130  linvfs_setxquota(
12131 -       struct super_block      *sb,
12132 +       struct dqhash           *hash,
12133         int                     type,
12134         qid_t                   id,
12135         struct fs_disk_quota    *fdq)
12136  {
12137 -       struct vfs              *vfsp = LINVFS_GET_VFS(sb);
12138 +       struct vfs              *vfsp = LINVFS_GET_VFS(hash->dqh_sb);
12139         int                     error, setmode;
12140  
12141         setmode = (type == USRQUOTA) ? Q_XSETQLIM :
12142 @@ -852,6 +868,9 @@ linvfs_fill_super(
12143         sb->s_export_op = &linvfs_export_ops;
12144  #endif
12145         sb->s_qcop = &linvfs_qops;
12146 +#ifdef CONFIG_QUOTA
12147 +       sb->s_dqh->dqh_qcop = &linvfs_qops;
12148 +#endif
12149         sb->s_op = &linvfs_sops;
12150  
12151         VFS_MOUNT(vfsp, args, NULL, error);
12152 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_sysctl.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_sysctl.c
12153 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_sysctl.c       2006-01-03 17:29:59 +0100
12154 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_sysctl.c  2006-04-26 19:07:00 +0200
12155 @@ -58,74 +58,74 @@ xfs_stats_clear_proc_handler(
12156  STATIC ctl_table xfs_table[] = {
12157         {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val,
12158         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12159 -       &sysctl_intvec, NULL,
12160 +       NULL, &sysctl_intvec, NULL,
12161         &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max},
12162  
12163         {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val,
12164         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12165 -       &sysctl_intvec, NULL,
12166 +       NULL, &sysctl_intvec, NULL,
12167         &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max},
12168  
12169         {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val,
12170         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12171 -       &sysctl_intvec, NULL,
12172 +       NULL, &sysctl_intvec, NULL,
12173         &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max},
12174  
12175         {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val,
12176         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12177 -       &sysctl_intvec, NULL,
12178 +       NULL, &sysctl_intvec, NULL,
12179         &xfs_params.panic_mask.min, &xfs_params.panic_mask.max},
12180  
12181         {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val,
12182         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12183 -       &sysctl_intvec, NULL,
12184 +       NULL, &sysctl_intvec, NULL,
12185         &xfs_params.error_level.min, &xfs_params.error_level.max},
12186  
12187         {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val,
12188         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12189 -       &sysctl_intvec, NULL,
12190 +       NULL, &sysctl_intvec, NULL,
12191         &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max},
12192  
12193         {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val,
12194         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12195 -       &sysctl_intvec, NULL,
12196 +       NULL, &sysctl_intvec, NULL,
12197         &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max},
12198  
12199         {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val,
12200         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12201 -       &sysctl_intvec, NULL,
12202 +       NULL, &sysctl_intvec, NULL,
12203         &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max},
12204  
12205         {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val,
12206         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12207 -       &sysctl_intvec, NULL,
12208 +       NULL, &sysctl_intvec, NULL,
12209         &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max},
12210  
12211         {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val,
12212         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12213 -       &sysctl_intvec, NULL,
12214 +       NULL, &sysctl_intvec, NULL,
12215         &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max},
12216  
12217         {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val,
12218         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12219 -       &sysctl_intvec, NULL,
12220 +       NULL, &sysctl_intvec, NULL,
12221         &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max},
12222  
12223         {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val,
12224         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12225 -       &sysctl_intvec, NULL,
12226 +       NULL, &sysctl_intvec, NULL,
12227         &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max},
12228  
12229         {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val,
12230         sizeof(int), 0644, NULL, &proc_dointvec_minmax,
12231 -       &sysctl_intvec, NULL,
12232 +       NULL, &sysctl_intvec, NULL,
12233         &xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
12234  
12235         /* please keep this the last entry */
12236  #ifdef CONFIG_PROC_FS
12237         {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
12238         sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler,
12239 -       &sysctl_intvec, NULL,
12240 +       NULL, &sysctl_intvec, NULL,
12241         &xfs_params.stats_clear.min, &xfs_params.stats_clear.max},
12242  #endif /* CONFIG_PROC_FS */
12243  
12244 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_vnode.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_vnode.c
12245 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_vnode.c        2006-02-18 14:40:27 +0100
12246 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_vnode.c   2006-04-26 19:07:00 +0200
12247 @@ -103,6 +103,7 @@ vn_revalidate_core(
12248         inode->i_nlink      = vap->va_nlink;
12249         inode->i_uid        = vap->va_uid;
12250         inode->i_gid        = vap->va_gid;
12251 +       inode->i_tag        = vap->va_tag;
12252         inode->i_blocks     = vap->va_nblocks;
12253         inode->i_mtime      = vap->va_mtime;
12254         inode->i_ctime      = vap->va_ctime;
12255 @@ -111,6 +112,14 @@ vn_revalidate_core(
12256                 inode->i_flags |= S_IMMUTABLE;
12257         else
12258                 inode->i_flags &= ~S_IMMUTABLE;
12259 +       if (vap->va_xflags & XFS_XFLAG_IUNLINK)
12260 +               inode->i_flags |= S_IUNLINK;
12261 +       else
12262 +               inode->i_flags &= ~S_IUNLINK;
12263 +       if (vap->va_xflags & XFS_XFLAG_BARRIER)
12264 +               inode->i_flags |= S_BARRIER;
12265 +       else
12266 +               inode->i_flags &= ~S_BARRIER;
12267         if (vap->va_xflags & XFS_XFLAG_APPEND)
12268                 inode->i_flags |= S_APPEND;
12269         else
12270 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/linux-2.6/xfs_vnode.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_vnode.h
12271 --- linux-2.6.16.20/fs/xfs/linux-2.6/xfs_vnode.h        2006-02-18 14:40:27 +0100
12272 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/linux-2.6/xfs_vnode.h   2006-04-26 19:07:00 +0200
12273 @@ -386,6 +386,7 @@ typedef struct vattr {
12274         xfs_nlink_t     va_nlink;       /* number of references to file */
12275         uid_t           va_uid;         /* owner user id */
12276         gid_t           va_gid;         /* owner group id */
12277 +       tag_t           va_tag;         /* owner group id */
12278         xfs_ino_t       va_nodeid;      /* file id */
12279         xfs_off_t       va_size;        /* file size in bytes */
12280         u_long          va_blocksize;   /* blocksize preferred for i/o */
12281 @@ -434,13 +435,15 @@ typedef struct vattr {
12282  #define XFS_AT_PROJID          0x04000000
12283  #define XFS_AT_SIZE_NOPERM     0x08000000
12284  #define XFS_AT_GENCOUNT                0x10000000
12285 +#define XFS_AT_TAG             0x20000000
12286  
12287  #define XFS_AT_ALL     (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
12288                 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
12289                 XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
12290                 XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
12291                 XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
12292 -               XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
12293 +               XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT\
12294 +               XFS_AT_TAG)
12295  
12296  #define XFS_AT_STAT    (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
12297                 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
12298 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/quota/xfs_qm_syscalls.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/quota/xfs_qm_syscalls.c
12299 --- linux-2.6.16.20/fs/xfs/quota/xfs_qm_syscalls.c      2006-04-09 13:49:55 +0200
12300 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/quota/xfs_qm_syscalls.c 2006-04-27 21:30:23 +0200
12301 @@ -215,7 +215,7 @@ xfs_qm_scall_quotaoff(
12302         xfs_qoff_logitem_t      *qoffstart;
12303         int                     nculprits;
12304  
12305 -       if (!force && !capable(CAP_SYS_ADMIN))
12306 +       if (!force && !vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
12307                 return XFS_ERROR(EPERM);
12308         /*
12309          * No file system can have quotas enabled on disk but not in core.
12310 @@ -384,7 +384,7 @@ xfs_qm_scall_trunc_qfiles(
12311         int             error;
12312         xfs_inode_t     *qip;
12313  
12314 -       if (!capable(CAP_SYS_ADMIN))
12315 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
12316                 return XFS_ERROR(EPERM);
12317         error = 0;
12318         if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) {
12319 @@ -429,7 +429,7 @@ xfs_qm_scall_quotaon(
12320         uint            accflags;
12321         __int64_t       sbflags;
12322  
12323 -       if (!capable(CAP_SYS_ADMIN))
12324 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
12325                 return XFS_ERROR(EPERM);
12326  
12327         flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
12328 @@ -600,7 +600,7 @@ xfs_qm_scall_setqlim(
12329         int                     error;
12330         xfs_qcnt_t              hard, soft;
12331  
12332 -       if (!capable(CAP_SYS_ADMIN))
12333 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
12334                 return XFS_ERROR(EPERM);
12335  
12336         if ((newlim->d_fieldmask &
12337 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_clnt.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_clnt.h
12338 --- linux-2.6.16.20/fs/xfs/xfs_clnt.h   2006-02-18 14:40:27 +0100
12339 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_clnt.h      2006-04-26 19:07:00 +0200
12340 @@ -100,5 +100,7 @@ struct xfs_mount_args {
12341   */
12342  #define XFSMNT2_COMPAT_IOSIZE  0x00000001      /* don't report large preferred
12343                                                  * I/O size in stat(2) */
12344 +#define XFSMNT2_TAGGED         0x80000000      /* context tagging */
12345 +
12346  
12347  #endif /* __XFS_CLNT_H__ */
12348 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_dinode.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_dinode.h
12349 --- linux-2.6.16.20/fs/xfs/xfs_dinode.h 2006-04-09 13:49:55 +0200
12350 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_dinode.h    2006-04-26 19:07:00 +0200
12351 @@ -53,7 +53,8 @@ typedef struct xfs_dinode_core
12352         __uint32_t      di_gid;         /* owner's group id */
12353         __uint32_t      di_nlink;       /* number of links to file */
12354         __uint16_t      di_projid;      /* owner's project id */
12355 -       __uint8_t       di_pad[8];      /* unused, zeroed space */
12356 +       __uint16_t      di_tag;         /* context tagging */
12357 +       __uint8_t       di_pad[6];      /* unused, zeroed space */
12358         __uint16_t      di_flushiter;   /* incremented on flush */
12359         xfs_timestamp_t di_atime;       /* time last accessed */
12360         xfs_timestamp_t di_mtime;       /* time last modified */
12361 @@ -257,6 +258,9 @@ typedef enum xfs_dinode_fmt
12362  #define XFS_DIFLAG_NOSYMLINKS_BIT   10 /* disallow symlink creation */
12363  #define XFS_DIFLAG_EXTSIZE_BIT      11 /* inode extent size allocator hint */
12364  #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */
12365 +#define XFS_DIFLAG_BARRIER_BIT 13      /* chroot() barrier */
12366 +#define XFS_DIFLAG_IUNLINK_BIT 14      /* immutable unlink */
12367 +
12368  #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
12369  #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
12370  #define XFS_DIFLAG_NEWRTBM       (1 << XFS_DIFLAG_NEWRTBM_BIT)
12371 @@ -270,12 +274,14 @@ typedef enum xfs_dinode_fmt
12372  #define XFS_DIFLAG_NOSYMLINKS    (1 << XFS_DIFLAG_NOSYMLINKS_BIT)
12373  #define XFS_DIFLAG_EXTSIZE       (1 << XFS_DIFLAG_EXTSIZE_BIT)
12374  #define XFS_DIFLAG_EXTSZINHERIT  (1 << XFS_DIFLAG_EXTSZINHERIT_BIT)
12375 +#define XFS_DIFLAG_BARRIER      (1 << XFS_DIFLAG_BARRIER_BIT)
12376 +#define XFS_DIFLAG_IUNLINK      (1 << XFS_DIFLAG_IUNLINK_BIT)
12377  
12378  #define XFS_DIFLAG_ANY \
12379         (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
12380          XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
12381          XFS_DIFLAG_NOATIME | XFS_DIFLAG_NODUMP | XFS_DIFLAG_RTINHERIT | \
12382          XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
12383 -        XFS_DIFLAG_EXTSZINHERIT)
12384 +        XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_BARRIER | XFS_DIFLAG_IUNLINK)
12385  
12386  #endif /* __XFS_DINODE_H__ */
12387 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_fs.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_fs.h
12388 --- linux-2.6.16.20/fs/xfs/xfs_fs.h     2006-04-09 13:49:55 +0200
12389 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_fs.h        2006-04-26 19:07:00 +0200
12390 @@ -67,6 +67,8 @@ struct fsxattr {
12391  #define XFS_XFLAG_NOSYMLINKS   0x00000400      /* disallow symlink creation */
12392  #define XFS_XFLAG_EXTSIZE      0x00000800      /* extent size allocator hint */
12393  #define XFS_XFLAG_EXTSZINHERIT 0x00001000      /* inherit inode extent size */
12394 +#define XFS_XFLAG_BARRIER      0x00004000      /* chroot() barrier */
12395 +#define XFS_XFLAG_IUNLINK      0x00008000      /* immutable unlink */
12396  #define XFS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
12397  
12398  /*
12399 @@ -295,7 +297,8 @@ typedef struct xfs_bstat {
12400         __s32           bs_extents;     /* number of extents            */
12401         __u32           bs_gen;         /* generation count             */
12402         __u16           bs_projid;      /* project id                   */
12403 -       unsigned char   bs_pad[14];     /* pad space, unused            */
12404 +       __u16           bs_tag;         /* context tagging              */
12405 +       unsigned char   bs_pad[12];     /* pad space, unused            */
12406         __u32           bs_dmevmask;    /* DMIG event mask              */
12407         __u16           bs_dmstate;     /* DMIG state info              */
12408         __u16           bs_aextents;    /* attribute number of extents  */
12409 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_inode.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_inode.c
12410 --- linux-2.6.16.20/fs/xfs/xfs_inode.c  2006-02-18 14:40:27 +0100
12411 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_inode.c     2006-04-26 19:07:00 +0200
12412 @@ -52,6 +52,7 @@
12413  #include "xfs_mac.h"
12414  #include "xfs_acl.h"
12415  
12416 +#include <linux/vs_tag.h>
12417  
12418  kmem_zone_t *xfs_ifork_zone;
12419  kmem_zone_t *xfs_inode_zone;
12420 @@ -734,20 +735,35 @@ xfs_xlate_dinode_core(
12421         xfs_dinode_core_t       *buf_core = (xfs_dinode_core_t *)buf;
12422         xfs_dinode_core_t       *mem_core = (xfs_dinode_core_t *)dip;
12423         xfs_arch_t              arch = ARCH_CONVERT;
12424 +       uint32_t                uid = 0, gid = 0;
12425 +       uint16_t                tag = 0;
12426  
12427         ASSERT(dir);
12428  
12429 +       if (dir < 0) {
12430 +               tag = mem_core->di_tag;
12431 +               /* FIXME: supposed to use superblock flag */
12432 +               uid = TAGINO_UID(1, mem_core->di_uid, tag);
12433 +               gid = TAGINO_GID(1, mem_core->di_gid, tag);
12434 +               tag = TAGINO_TAG(1, tag);
12435 +       }
12436 +
12437         INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch);
12438         INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch);
12439         INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch);
12440         INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch);
12441         INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch);
12442 -       INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch);
12443 -       INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch);
12444 +       INT_XLATE(buf_core->di_uid, uid, dir, arch);
12445 +       INT_XLATE(buf_core->di_gid, gid, dir, arch);
12446 +       INT_XLATE(buf_core->di_tag, tag, dir, arch);
12447         INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch);
12448         INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch);
12449  
12450         if (dir > 0) {
12451 +               /* FIXME: supposed to use superblock flag */
12452 +               mem_core->di_uid = INOTAG_UID(1, uid, gid);
12453 +               mem_core->di_gid = INOTAG_GID(1, uid, gid);
12454 +               mem_core->di_tag = INOTAG_TAG(1, uid, gid, tag);
12455                 memcpy(mem_core->di_pad, buf_core->di_pad,
12456                         sizeof(buf_core->di_pad));
12457         } else {
12458 @@ -796,6 +812,10 @@ _xfs_dic2xflags(
12459                         flags |= XFS_XFLAG_PREALLOC;
12460                 if (di_flags & XFS_DIFLAG_IMMUTABLE)
12461                         flags |= XFS_XFLAG_IMMUTABLE;
12462 +               if (di_flags & XFS_DIFLAG_IUNLINK)
12463 +                       flags |= XFS_XFLAG_IUNLINK;
12464 +               if (di_flags & XFS_DIFLAG_BARRIER)
12465 +                       flags |= XFS_XFLAG_BARRIER;
12466                 if (di_flags & XFS_DIFLAG_APPEND)
12467                         flags |= XFS_XFLAG_APPEND;
12468                 if (di_flags & XFS_DIFLAG_SYNC)
12469 @@ -1125,6 +1145,7 @@ xfs_ialloc(
12470         ASSERT(ip->i_d.di_nlink == nlink);
12471         ip->i_d.di_uid = current_fsuid(cr);
12472         ip->i_d.di_gid = current_fsgid(cr);
12473 +       ip->i_d.di_tag = current_fstag(cr, vp);
12474         ip->i_d.di_projid = prid;
12475         memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
12476  
12477 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_itable.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_itable.c
12478 --- linux-2.6.16.20/fs/xfs/xfs_itable.c 2006-02-18 14:40:27 +0100
12479 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_itable.c    2006-04-26 19:07:00 +0200
12480 @@ -85,6 +85,7 @@ xfs_bulkstat_one_iget(
12481         buf->bs_mode = dic->di_mode;
12482         buf->bs_uid = dic->di_uid;
12483         buf->bs_gid = dic->di_gid;
12484 +       buf->bs_tag = dic->di_tag;
12485         buf->bs_size = dic->di_size;
12486         vn_atime_to_bstime(vp, &buf->bs_atime);
12487         buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
12488 @@ -159,6 +160,7 @@ xfs_bulkstat_one_dinode(
12489         buf->bs_mode = INT_GET(dic->di_mode, ARCH_CONVERT);
12490         buf->bs_uid = INT_GET(dic->di_uid, ARCH_CONVERT);
12491         buf->bs_gid = INT_GET(dic->di_gid, ARCH_CONVERT);
12492 +       buf->bs_tag = INT_GET(dic->di_tag, ARCH_CONVERT);
12493         buf->bs_size = INT_GET(dic->di_size, ARCH_CONVERT);
12494         buf->bs_atime.tv_sec = INT_GET(dic->di_atime.t_sec, ARCH_CONVERT);
12495         buf->bs_atime.tv_nsec = INT_GET(dic->di_atime.t_nsec, ARCH_CONVERT);
12496 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_mount.h linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_mount.h
12497 --- linux-2.6.16.20/fs/xfs/xfs_mount.h  2006-02-18 14:40:27 +0100
12498 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_mount.h     2006-04-26 19:07:00 +0200
12499 @@ -412,6 +412,7 @@ typedef struct xfs_mount {
12500  #define XFS_MOUNT_COMPAT_IOSIZE        (1ULL << 22)    /* don't report large preferred
12501                                                  * I/O size in stat() */
12502  
12503 +#define XFS_MOUNT_TAGGED       (1ULL << 31)    /* context tagging */
12504  
12505  /*
12506   * Default minimum read and write sizes.
12507 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_vfsops.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_vfsops.c
12508 --- linux-2.6.16.20/fs/xfs/xfs_vfsops.c 2006-02-18 14:40:27 +0100
12509 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_vfsops.c    2006-04-26 19:07:00 +0200
12510 @@ -296,6 +296,8 @@ xfs_start_flags(
12511  
12512         if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
12513                 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
12514 +       if (ap->flags2 & XFSMNT2_TAGGED)
12515 +               mp->m_flags |= XFS_MOUNT_TAGGED;
12516  
12517         /*
12518          * no recovery flag requires a read-only mount
12519 @@ -390,6 +392,8 @@ xfs_finish_flags(
12520                         return XFS_ERROR(EINVAL);
12521         }
12522  
12523 +       if (ap->flags2 & XFSMNT2_TAGGED)
12524 +               vfs->vfs_super->s_flags |= MS_TAGGED;
12525         return 0;
12526  }
12527  
12528 @@ -1653,6 +1657,9 @@ xfs_vget(
12529                                          * in stat(). */
12530  #define MNTOPT_ATTR2   "attr2"         /* do use attr2 attribute format */
12531  #define MNTOPT_NOATTR2 "noattr2"       /* do not use attr2 attribute format */
12532 +#define MNTOPT_TAGXID  "tagxid"        /* context tagging for inodes */
12533 +#define MNTOPT_TAGGED  "tag"           /* context tagging for inodes */
12534 +#define MNTOPT_NOTAGTAG        "notag"         /* do not use context tagging */
12535  
12536  STATIC unsigned long
12537  suffix_strtoul(const char *cp, char **endp, unsigned int base)
12538 @@ -1829,6 +1836,19 @@ xfs_parseargs(
12539                         args->flags |= XFSMNT_ATTR2;
12540                 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
12541                         args->flags &= ~XFSMNT_ATTR2;
12542 +#ifndef CONFIG_TAGGING_NONE
12543 +               } else if (!strcmp(this_char, MNTOPT_TAGGED)) {
12544 +                       args->flags2 |= XFSMNT2_TAGGED;
12545 +               } else if (!strcmp(this_char, MNTOPT_NOTAGTAG)) {
12546 +                       args->flags2 &= ~XFSMNT2_TAGGED;
12547 +               } else if (!strcmp(this_char, MNTOPT_TAGXID)) {
12548 +                       args->flags2 |= XFSMNT2_TAGGED;
12549 +#endif
12550 +#ifdef CONFIG_PROPAGATE
12551 +               } else if (!strcmp(this_char, MNTOPT_TAGGED)) {
12552 +                       /* use value */
12553 +                       args->flags2 |= XFSMNT2_TAGGED;
12554 +#endif
12555                 } else if (!strcmp(this_char, "osyncisdsync")) {
12556                         /* no-op, this is now the default */
12557  printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
12558 diff -NurpP --minimal linux-2.6.16.20/fs/xfs/xfs_vnodeops.c linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_vnodeops.c
12559 --- linux-2.6.16.20/fs/xfs/xfs_vnodeops.c       2006-02-18 14:40:27 +0100
12560 +++ linux-2.6.16.20-vs2.1.1-rc22/fs/xfs/xfs_vnodeops.c  2006-04-26 19:07:00 +0200
12561 @@ -154,6 +154,7 @@ xfs_getattr(
12562         vap->va_mode = ip->i_d.di_mode;
12563         vap->va_uid = ip->i_d.di_uid;
12564         vap->va_gid = ip->i_d.di_gid;
12565 +       vap->va_tag = ip->i_d.di_tag;
12566         vap->va_projid = ip->i_d.di_projid;
12567  
12568         /*
12569 @@ -254,6 +255,7 @@ xfs_setattr(
12570         uint                    commit_flags=0;
12571         uid_t                   uid=0, iuid=0;
12572         gid_t                   gid=0, igid=0;
12573 +       tag_t                   tag=0, itag=0;
12574         int                     timeflags = 0;
12575         vnode_t                 *vp;
12576         xfs_prid_t              projid=0, iprojid=0;
12577 @@ -310,6 +312,7 @@ xfs_setattr(
12578             (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
12579                 uint    qflags = 0;
12580  
12581 +               /* FIXME: handle tagging? */
12582                 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) {
12583                         uid = vap->va_uid;
12584                         qflags |= XFS_QMOPT_UQUOTA;
12585 @@ -390,6 +393,8 @@ xfs_setattr(
12586         if (mask &
12587             (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
12588              XFS_AT_GID|XFS_AT_PROJID)) {
12589 +               /* FIXME: handle tagging? */
12590 +
12591                 /*
12592                  * CAP_FOWNER overrides the following restrictions:
12593                  *
12594 @@ -438,7 +443,7 @@ xfs_setattr(
12595          * and can change the group id only to a group of which he
12596          * or she is a member.
12597          */
12598 -       if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
12599 +       if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_TAG|XFS_AT_PROJID)) {
12600                 /*
12601                  * These IDs could have changed since we last looked at them.
12602                  * But, we're assured that if the ownership did change
12603 @@ -446,10 +451,12 @@ xfs_setattr(
12604                  * would have changed also.
12605                  */
12606                 iuid = ip->i_d.di_uid;
12607 -               iprojid = ip->i_d.di_projid;
12608                 igid = ip->i_d.di_gid;
12609 -               gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
12610 +               itag = ip->i_d.di_tag;
12611 +               iprojid = ip->i_d.di_projid;
12612                 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
12613 +               gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
12614 +               tag = (mask & XFS_AT_TAG) ? vap->va_tag : itag;
12615                 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
12616                          iprojid;
12617  
12618 @@ -477,6 +484,7 @@ xfs_setattr(
12619                 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
12620                     (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
12621                     (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
12622 +                       /* FIXME: handle tagging? */
12623                         ASSERT(tp);
12624                         code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
12625                                                 capable(CAP_FOWNER) ?
12626 @@ -693,7 +701,7 @@ xfs_setattr(
12627          * and can change the group id only to a group of which he
12628          * or she is a member.
12629          */
12630 -       if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
12631 +       if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_TAG|XFS_AT_PROJID)) {
12632                 /*
12633                  * CAP_FSETID overrides the following restrictions:
12634                  *
12635 @@ -709,6 +717,12 @@ xfs_setattr(
12636                  * Change the ownerships and register quota modifications
12637                  * in the transaction.
12638                  */
12639 +               if (itag != tag) {
12640 +                       if (XFS_IS_GQUOTA_ON(mp)) {
12641 +                               /* FIXME: handle tag quota? */
12642 +                       }
12643 +                       ip->i_d.di_tag = tag;
12644 +               }
12645                 if (iuid != uid) {
12646                         if (XFS_IS_UQUOTA_ON(mp)) {
12647                                 ASSERT(mask & XFS_AT_UID);
12648 @@ -789,6 +803,10 @@ xfs_setattr(
12649                         di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
12650                         if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
12651                                 di_flags |= XFS_DIFLAG_IMMUTABLE;
12652 +                       if (vap->va_xflags & XFS_XFLAG_IUNLINK)
12653 +                               di_flags |= XFS_DIFLAG_IUNLINK;
12654 +                       if (vap->va_xflags & XFS_XFLAG_BARRIER)
12655 +                               di_flags |= XFS_DIFLAG_BARRIER;
12656                         if (vap->va_xflags & XFS_XFLAG_APPEND)
12657                                 di_flags |= XFS_DIFLAG_APPEND;
12658                         if (vap->va_xflags & XFS_XFLAG_SYNC)
12659 diff -NurpP --minimal linux-2.6.16.20/include/asm-arm/tlb.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm/tlb.h
12660 --- linux-2.6.16.20/include/asm-arm/tlb.h       2006-01-03 17:30:02 +0100
12661 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm/tlb.h  2006-04-26 19:07:00 +0200
12662 @@ -20,6 +20,7 @@
12663  #include <asm/cacheflush.h>
12664  #include <asm/tlbflush.h>
12665  #include <asm/pgalloc.h>
12666 +#include <linux/vs_memory.h>
12667  
12668  /*
12669   * TLB handling.  This allows us to remove pages from the page
12670 diff -NurpP --minimal linux-2.6.16.20/include/asm-arm26/tlb.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm26/tlb.h
12671 --- linux-2.6.16.20/include/asm-arm26/tlb.h     2006-01-03 17:30:02 +0100
12672 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm26/tlb.h        2006-04-26 19:07:00 +0200
12673 @@ -3,6 +3,7 @@
12674  
12675  #include <asm/pgalloc.h>
12676  #include <asm/tlbflush.h>
12677 +#include <linux/vs_memory.h>
12678  
12679  /*
12680   * TLB handling.  This allows us to remove pages from the page
12681 diff -NurpP --minimal linux-2.6.16.20/include/asm-arm26/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm26/unistd.h
12682 --- linux-2.6.16.20/include/asm-arm26/unistd.h  2006-01-03 17:30:02 +0100
12683 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-arm26/unistd.h     2006-04-26 19:07:00 +0200
12684 @@ -304,6 +304,8 @@
12685  #define __NR_mq_getsetattr             (__NR_SYSCALL_BASE+279)
12686  #define __NR_waitid                    (__NR_SYSCALL_BASE+280)
12687  
12688 +#define __NR_vserver                   (__NR_SYSCALL_BASE+313)
12689 +
12690  /*
12691   * The following SWIs are ARM private. FIXME - make appropriate for arm26
12692   */
12693 diff -NurpP --minimal linux-2.6.16.20/include/asm-generic/tlb.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-generic/tlb.h
12694 --- linux-2.6.16.20/include/asm-generic/tlb.h   2006-01-03 17:30:02 +0100
12695 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-generic/tlb.h      2006-04-26 19:07:00 +0200
12696 @@ -15,6 +15,7 @@
12697  
12698  #include <linux/config.h>
12699  #include <linux/swap.h>
12700 +#include <linux/vs_memory.h>
12701  #include <asm/pgalloc.h>
12702  #include <asm/tlbflush.h>
12703  
12704 diff -NurpP --minimal linux-2.6.16.20/include/asm-i386/elf.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/elf.h
12705 --- linux-2.6.16.20/include/asm-i386/elf.h      2006-01-03 17:30:04 +0100
12706 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/elf.h 2006-05-11 16:06:22 +0200
12707 @@ -71,7 +71,7 @@ typedef struct user_fxsr_struct elf_fpxr
12708     the loader.  We need to make sure that it is out of the way of the program
12709     that it will "exec", and that there is sufficient room for the brk.  */
12710  
12711 -#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
12712 +#define ELF_ET_DYN_BASE                ((TASK_UNMAPPED_BASE) * 2)
12713  
12714  /* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
12715     now struct_user_regs, they are different) */
12716 @@ -108,7 +108,7 @@ typedef struct user_fxsr_struct elf_fpxr
12717     For the moment, we have only optimizations for the Intel generations,
12718     but that could change... */
12719  
12720 -#define ELF_PLATFORM  (system_utsname.machine)
12721 +#define ELF_PLATFORM  (vx_new_uts(machine))
12722  
12723  #ifdef __KERNEL__
12724  #define SET_PERSONALITY(ex, ibcs2) do { } while (0)
12725 diff -NurpP --minimal linux-2.6.16.20/include/asm-i386/page.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/page.h
12726 --- linux-2.6.16.20/include/asm-i386/page.h     2006-02-18 14:40:29 +0100
12727 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/page.h        2006-04-26 19:07:00 +0200
12728 @@ -109,19 +109,15 @@ extern int page_is_ram(unsigned long pag
12729  
12730  #endif /* __ASSEMBLY__ */
12731  
12732 -#ifdef __ASSEMBLY__
12733  #define __PAGE_OFFSET          CONFIG_PAGE_OFFSET
12734  #define __PHYSICAL_START       CONFIG_PHYSICAL_START
12735 -#else
12736 -#define __PAGE_OFFSET          ((unsigned long)CONFIG_PAGE_OFFSET)
12737 -#define __PHYSICAL_START       ((unsigned long)CONFIG_PHYSICAL_START)
12738 -#endif
12739  #define __KERNEL_START         (__PAGE_OFFSET + __PHYSICAL_START)
12740 -
12741 +#define __MAXMEM               (-__PAGE_OFFSET-__VMALLOC_RESERVE)
12742  
12743  #define PAGE_OFFSET            ((unsigned long)__PAGE_OFFSET)
12744 +#define PHYSICAL_START         ((unsigned long)__PHYSICAL_START)
12745  #define VMALLOC_RESERVE                ((unsigned long)__VMALLOC_RESERVE)
12746 -#define MAXMEM                 (-__PAGE_OFFSET-__VMALLOC_RESERVE)
12747 +#define MAXMEM                 ((unsigned long)__MAXMEM)
12748  #define __pa(x)                        ((unsigned long)(x)-PAGE_OFFSET)
12749  #define __va(x)                        ((void *)((unsigned long)(x)+PAGE_OFFSET))
12750  #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
12751 diff -NurpP --minimal linux-2.6.16.20/include/asm-i386/processor.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/processor.h
12752 --- linux-2.6.16.20/include/asm-i386/processor.h        2006-02-18 14:40:29 +0100
12753 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-i386/processor.h   2006-04-26 19:07:00 +0200
12754 @@ -316,9 +316,10 @@ extern unsigned int mca_pentium_flag;
12755  extern int bootloader_type;
12756  
12757  /*
12758 - * User space process size: 3GB (default).
12759 + * User space process size: (3GB default).
12760   */
12761 -#define TASK_SIZE      (PAGE_OFFSET)
12762 +#define __TASK_SIZE            (__PAGE_OFFSET)
12763 +#define TASK_SIZE              ((unsigned long)__TASK_SIZE)
12764  
12765  /* This decides where the kernel will search for a free chunk of vm
12766   * space during mmap's.
12767 diff -NurpP --minimal linux-2.6.16.20/include/asm-ia64/tlb.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-ia64/tlb.h
12768 --- linux-2.6.16.20/include/asm-ia64/tlb.h      2006-01-03 17:30:05 +0100
12769 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-ia64/tlb.h 2006-04-26 19:07:00 +0200
12770 @@ -41,6 +41,7 @@
12771  #include <linux/mm.h>
12772  #include <linux/pagemap.h>
12773  #include <linux/swap.h>
12774 +#include <linux/vs_memory.h>
12775  
12776  #include <asm/pgalloc.h>
12777  #include <asm/processor.h>
12778 diff -NurpP --minimal linux-2.6.16.20/include/asm-powerpc/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-powerpc/unistd.h
12779 --- linux-2.6.16.20/include/asm-powerpc/unistd.h        2006-02-18 14:40:31 +0100
12780 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-powerpc/unistd.h   2006-04-26 19:07:00 +0200
12781 @@ -275,7 +275,7 @@
12782  #endif
12783  #define __NR_rtas              255
12784  #define __NR_sys_debug_setcontext 256
12785 -/* Number 257 is reserved for vserver */
12786 +#define __NR_vserver           257
12787  /* 258 currently unused */
12788  #define __NR_mbind             259
12789  #define __NR_get_mempolicy     260
12790 diff -NurpP --minimal linux-2.6.16.20/include/asm-s390/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-s390/unistd.h
12791 --- linux-2.6.16.20/include/asm-s390/unistd.h   2006-02-18 14:40:31 +0100
12792 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-s390/unistd.h      2006-04-26 19:07:00 +0200
12793 @@ -255,7 +255,7 @@
12794  #define __NR_clock_gettime     (__NR_timer_create+6)
12795  #define __NR_clock_getres      (__NR_timer_create+7)
12796  #define __NR_clock_nanosleep   (__NR_timer_create+8)
12797 -/* Number 263 is reserved for vserver */
12798 +#define __NR_vserver           263
12799  #define __NR_fadvise64_64      264
12800  #define __NR_statfs64          265
12801  #define __NR_fstatfs64         266
12802 diff -NurpP --minimal linux-2.6.16.20/include/asm-sparc/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc/unistd.h
12803 --- linux-2.6.16.20/include/asm-sparc/unistd.h  2006-02-18 14:40:31 +0100
12804 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc/unistd.h     2006-04-26 19:07:00 +0200
12805 @@ -283,7 +283,7 @@
12806  #define __NR_timer_getoverrun  264
12807  #define __NR_timer_delete      265
12808  #define __NR_timer_create      266
12809 -/* #define __NR_vserver                267 Reserved for VSERVER */
12810 +#define __NR_vserver           267
12811  #define __NR_io_setup          268
12812  #define __NR_io_destroy                269
12813  #define __NR_io_submit         270
12814 diff -NurpP --minimal linux-2.6.16.20/include/asm-sparc64/tlb.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc64/tlb.h
12815 --- linux-2.6.16.20/include/asm-sparc64/tlb.h   2006-01-03 17:30:08 +0100
12816 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc64/tlb.h      2006-04-26 19:07:00 +0200
12817 @@ -3,6 +3,7 @@
12818  
12819  #include <linux/config.h>
12820  #include <linux/swap.h>
12821 +#include <linux/vs_memory.h>
12822  #include <asm/pgalloc.h>
12823  #include <asm/tlbflush.h>
12824  #include <asm/mmu_context.h>
12825 diff -NurpP --minimal linux-2.6.16.20/include/asm-sparc64/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc64/unistd.h
12826 --- linux-2.6.16.20/include/asm-sparc64/unistd.h        2006-02-18 14:40:32 +0100
12827 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-sparc64/unistd.h   2006-04-26 19:07:00 +0200
12828 @@ -285,7 +285,7 @@
12829  #define __NR_timer_getoverrun  264
12830  #define __NR_timer_delete      265
12831  #define __NR_timer_create      266
12832 -/* #define __NR_vserver                267 Reserved for VSERVER */
12833 +#define __NR_vserver           267
12834  #define __NR_io_setup          268
12835  #define __NR_io_destroy                269
12836  #define __NR_io_submit         270
12837 diff -NurpP --minimal linux-2.6.16.20/include/asm-x86_64/unistd.h linux-2.6.16.20-vs2.1.1-rc22/include/asm-x86_64/unistd.h
12838 --- linux-2.6.16.20/include/asm-x86_64/unistd.h 2006-02-18 14:40:32 +0100
12839 +++ linux-2.6.16.20-vs2.1.1-rc22/include/asm-x86_64/unistd.h    2006-04-26 19:07:00 +0200
12840 @@ -532,7 +532,7 @@ __SYSCALL(__NR_tgkill, sys_tgkill)
12841  #define __NR_utimes            235
12842  __SYSCALL(__NR_utimes, sys_utimes)
12843  #define __NR_vserver           236
12844 -__SYSCALL(__NR_vserver, sys_ni_syscall)
12845 +__SYSCALL(__NR_vserver, sys_vserver)
12846  #define __NR_mbind             237
12847  __SYSCALL(__NR_mbind, sys_mbind)
12848  #define __NR_set_mempolicy     238
12849 diff -NurpP --minimal linux-2.6.16.20/include/linux/capability.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/capability.h
12850 --- linux-2.6.16.20/include/linux/capability.h  2006-02-18 14:40:32 +0100
12851 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/capability.h     2006-04-27 18:21:01 +0200
12852 @@ -235,6 +235,7 @@ typedef __u32 kernel_cap_t;
12853     arbitrary SCSI commands */
12854  /* Allow setting encryption key on loopback filesystem */
12855  /* Allow setting zone reclaim policy */
12856 +/* Allow the selection of a security context */
12857  
12858  #define CAP_SYS_ADMIN        21
12859  
12860 @@ -288,6 +289,11 @@ typedef __u32 kernel_cap_t;
12861  
12862  #define CAP_AUDIT_CONTROL    30
12863  
12864 +/* Allow context manipulations */
12865 +/* Allow changing context info on files */
12866 +
12867 +#define CAP_CONTEXT         31
12868 +
12869  #ifdef __KERNEL__
12870  /* 
12871   * Bounding set
12872 diff -NurpP --minimal linux-2.6.16.20/include/linux/devpts_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/devpts_fs.h
12873 --- linux-2.6.16.20/include/linux/devpts_fs.h   2004-08-14 12:55:59 +0200
12874 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/devpts_fs.h      2006-04-26 19:07:00 +0200
12875 @@ -30,5 +30,7 @@ static inline void devpts_pty_kill(int n
12876  
12877  #endif
12878  
12879 +#define DEVPTS_SUPER_MAGIC     0x00001cd1
12880 +
12881  
12882  #endif /* _LINUX_DEVPTS_FS_H */
12883 diff -NurpP --minimal linux-2.6.16.20/include/linux/ext2_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext2_fs.h
12884 --- linux-2.6.16.20/include/linux/ext2_fs.h     2005-10-28 20:49:54 +0200
12885 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext2_fs.h        2006-04-26 19:07:00 +0200
12886 @@ -192,10 +192,17 @@ struct ext2_group_desc
12887  #define EXT2_NOTAIL_FL                 0x00008000 /* file tail should not be merged */
12888  #define EXT2_DIRSYNC_FL                        0x00010000 /* dirsync behaviour (directories only) */
12889  #define EXT2_TOPDIR_FL                 0x00020000 /* Top of directory hierarchies*/
12890 +#define EXT2_BARRIER_FL                        0x04000000 /* Barrier for chroot() */
12891 +#define EXT2_IUNLINK_FL                        0x08000000 /* Immutable unlink */
12892  #define EXT2_RESERVED_FL               0x80000000 /* reserved for ext2 lib */
12893  
12894 +#ifdef CONFIG_VSERVER_LEGACY
12895 +#define EXT2_FL_USER_VISIBLE           0x0803DFFF /* User visible flags */
12896 +#define EXT2_FL_USER_MODIFIABLE                0x080380FF /* User modifiable flags */
12897 +#else
12898  #define EXT2_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
12899  #define EXT2_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
12900 +#endif
12901  
12902  /*
12903   * ioctl commands
12904 @@ -240,7 +247,7 @@ struct ext2_inode {
12905                 struct {
12906                         __u8    l_i_frag;       /* Fragment number */
12907                         __u8    l_i_fsize;      /* Fragment size */
12908 -                       __u16   i_pad1;
12909 +                       __u16   l_i_tag;        /* Context Tag */
12910                         __le16  l_i_uid_high;   /* these 2 fields    */
12911                         __le16  l_i_gid_high;   /* were reserved2[0] */
12912                         __u32   l_i_reserved2;
12913 @@ -272,6 +279,7 @@ struct ext2_inode {
12914  #define i_gid_low      i_gid
12915  #define i_uid_high     osd2.linux2.l_i_uid_high
12916  #define i_gid_high     osd2.linux2.l_i_gid_high
12917 +#define i_raw_tag      osd2.linux2.l_i_tag
12918  #define i_reserved2    osd2.linux2.l_i_reserved2
12919  #endif
12920  
12921 @@ -313,8 +321,9 @@ struct ext2_inode {
12922  #define EXT2_MOUNT_XATTR_USER          0x004000  /* Extended user attributes */
12923  #define EXT2_MOUNT_POSIX_ACL           0x008000  /* POSIX Access Control Lists */
12924  #define EXT2_MOUNT_XIP                 0x010000  /* Execute in place */
12925 -#define EXT2_MOUNT_USRQUOTA            0x020000 /* user quota */
12926 -#define EXT2_MOUNT_GRPQUOTA            0x040000 /* group quota */
12927 +#define EXT2_MOUNT_USRQUOTA            0x020000  /* user quota */
12928 +#define EXT2_MOUNT_GRPQUOTA            0x040000  /* group quota */
12929 +#define EXT2_MOUNT_TAGGED              (1<<24)   /* Enable Context Tags */
12930  
12931  
12932  #define clear_opt(o, opt)              o &= ~EXT2_MOUNT_##opt
12933 diff -NurpP --minimal linux-2.6.16.20/include/linux/ext3_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext3_fs.h
12934 --- linux-2.6.16.20/include/linux/ext3_fs.h     2005-10-28 20:49:54 +0200
12935 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext3_fs.h        2006-04-26 19:07:00 +0200
12936 @@ -185,10 +185,20 @@ struct ext3_group_desc
12937  #define EXT3_NOTAIL_FL                 0x00008000 /* file tail should not be merged */
12938  #define EXT3_DIRSYNC_FL                        0x00010000 /* dirsync behaviour (directories only) */
12939  #define EXT3_TOPDIR_FL                 0x00020000 /* Top of directory hierarchies*/
12940 +#define EXT3_BARRIER_FL                        0x04000000 /* Barrier for chroot() */
12941 +#define EXT3_IUNLINK_FL                        0x08000000 /* Immutable unlink */
12942  #define EXT3_RESERVED_FL               0x80000000 /* reserved for ext3 lib */
12943  
12944 +#ifdef CONFIG_VSERVER_LEGACY
12945 +#define EXT3_FL_USER_VISIBLE           0x0803DFFF /* User visible flags */
12946 +#define EXT3_FL_USER_MODIFIABLE                0x080380FF /* User modifiable flags */
12947 +#else
12948  #define EXT3_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
12949  #define EXT3_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
12950 +#endif
12951 +#ifdef CONFIG_VSERVER_LEGACY
12952 +#define EXT3_IOC_SETTAG                        FIOC_SETTAGJ
12953 +#endif
12954  
12955  /*
12956   * Inode dynamic state flags
12957 @@ -287,7 +297,7 @@ struct ext3_inode {
12958                 struct {
12959                         __u8    l_i_frag;       /* Fragment number */
12960                         __u8    l_i_fsize;      /* Fragment size */
12961 -                       __u16   i_pad1;
12962 +                       __u16   l_i_tag;        /* Context Tag */
12963                         __le16  l_i_uid_high;   /* these 2 fields    */
12964                         __le16  l_i_gid_high;   /* were reserved2[0] */
12965                         __u32   l_i_reserved2;
12966 @@ -321,6 +331,7 @@ struct ext3_inode {
12967  #define i_gid_low      i_gid
12968  #define i_uid_high     osd2.linux2.l_i_uid_high
12969  #define i_gid_high     osd2.linux2.l_i_gid_high
12970 +#define i_raw_tag      osd2.linux2.l_i_tag
12971  #define i_reserved2    osd2.linux2.l_i_reserved2
12972  
12973  #elif defined(__GNU__)
12974 @@ -375,6 +386,7 @@ struct ext3_inode {
12975  #define EXT3_MOUNT_QUOTA               0x80000 /* Some quota option set */
12976  #define EXT3_MOUNT_USRQUOTA            0x100000 /* "old" user quota */
12977  #define EXT3_MOUNT_GRPQUOTA            0x200000 /* "old" group quota */
12978 +#define EXT3_MOUNT_TAGGED              (1<<24) /* Enable Context Tags */
12979  
12980  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
12981  #ifndef _LINUX_EXT2_FS_H
12982 @@ -775,6 +787,7 @@ extern unsigned long ext3_count_free (st
12983  extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
12984  extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
12985  extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
12986 +extern int ext3_sync_flags(struct inode *inode);
12987  
12988  extern void ext3_read_inode (struct inode *);
12989  extern int  ext3_write_inode (struct inode *, int);
12990 diff -NurpP --minimal linux-2.6.16.20/include/linux/ext3_jbd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext3_jbd.h
12991 --- linux-2.6.16.20/include/linux/ext3_jbd.h    2005-08-29 22:25:41 +0200
12992 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/ext3_jbd.h       2006-04-26 19:07:00 +0200
12993 @@ -77,10 +77,10 @@
12994  #define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
12995  /* Amount of blocks needed for quota insert/delete - we do some block writes
12996   * but inode, sb and group updates are done only once */
12997 -#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
12998 -               (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
12999 -#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
13000 -               (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
13001 +#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? \
13002 +       (DQUOT_INIT_ALLOC*(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
13003 +#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? \
13004 +       (DQUOT_DEL_ALLOC*(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
13005  #else
13006  #define EXT3_QUOTA_TRANS_BLOCKS(sb) 0
13007  #define EXT3_QUOTA_INIT_BLOCKS(sb) 0
13008 diff -NurpP --minimal linux-2.6.16.20/include/linux/fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/fs.h
13009 --- linux-2.6.16.20/include/linux/fs.h  2006-05-11 21:25:36 +0200
13010 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/fs.h     2006-05-21 23:34:46 +0200
13011 @@ -109,6 +109,8 @@ extern int dir_notify_enable;
13012  #define MS_PRIVATE     (1<<18) /* change to private */
13013  #define MS_SLAVE       (1<<19) /* change to slave */
13014  #define MS_SHARED      (1<<20) /* change to shared */
13015 +#define MS_TAGGED      (1<<24) /* use generic inode tagging */
13016 +#define MS_TAGID       (1<<25) /* use specific tag for this mount */
13017  #define MS_ACTIVE      (1<<30)
13018  #define MS_NOUSER      (1<<31)
13019  
13020 @@ -135,6 +137,8 @@ extern int dir_notify_enable;
13021  #define S_NOCMTIME     128     /* Do not update file c/mtime */
13022  #define S_SWAPFILE     256     /* Do not truncate: swapon got its bmaps */
13023  #define S_PRIVATE      512     /* Inode is fs-internal */
13024 +#define S_BARRIER      1024    /* Barrier for chroot() */
13025 +#define S_IUNLINK      2048    /* Immutable unlink */
13026  
13027  /*
13028   * Note that nosuid etc flags are inode-specific: setting some file-system
13029 @@ -151,23 +155,30 @@ extern int dir_notify_enable;
13030   */
13031  #define __IS_FLG(inode,flg) ((inode)->i_sb->s_flags & (flg))
13032  
13033 -#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY)
13034 +#define IS_RDONLY(inode)       __IS_FLG(inode, MS_RDONLY)
13035  #define IS_SYNC(inode)         (__IS_FLG(inode, MS_SYNCHRONOUS) || \
13036                                         ((inode)->i_flags & S_SYNC))
13037  #define IS_DIRSYNC(inode)      (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
13038                                         ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
13039  #define IS_MANDLOCK(inode)     __IS_FLG(inode, MS_MANDLOCK)
13040 +#define IS_TAGGED(inode)       __IS_FLG(inode, MS_TAGGED)
13041  
13042  #define IS_NOQUOTA(inode)      ((inode)->i_flags & S_NOQUOTA)
13043  #define IS_APPEND(inode)       ((inode)->i_flags & S_APPEND)
13044  #define IS_IMMUTABLE(inode)    ((inode)->i_flags & S_IMMUTABLE)
13045 +#define IS_IUNLINK(inode)      ((inode)->i_flags & S_IUNLINK)
13046 +#define IS_IXORUNLINK(inode)   ((IS_IUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
13047  #define IS_POSIXACL(inode)     __IS_FLG(inode, MS_POSIXACL)
13048  
13049 +#define IS_BARRIER(inode)      (S_ISDIR((inode)->i_mode) && ((inode)->i_flags & S_BARRIER))
13050  #define IS_DEADDIR(inode)      ((inode)->i_flags & S_DEAD)
13051  #define IS_NOCMTIME(inode)     ((inode)->i_flags & S_NOCMTIME)
13052  #define IS_SWAPFILE(inode)     ((inode)->i_flags & S_SWAPFILE)
13053  #define IS_PRIVATE(inode)      ((inode)->i_flags & S_PRIVATE)
13054  
13055 +#define IS_COW(inode)          (IS_IUNLINK(inode) && IS_IMMUTABLE(inode))
13056 +#define IS_COW_LINK(inode)     (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
13057 +
13058  /* the read-only stuff doesn't really belong here, but any other place is
13059     probably as bad and I don't want to create yet another include file. */
13060  
13061 @@ -265,6 +276,7 @@ typedef void (dio_iodone_t)(struct kiocb
13062  #define ATTR_KILL_SUID 2048
13063  #define ATTR_KILL_SGID 4096
13064  #define ATTR_FILE      8192
13065 +#define ATTR_TAG       16384
13066  
13067  /*
13068   * This is the Inode Attributes structure, used for notify_change().  It
13069 @@ -280,6 +292,7 @@ struct iattr {
13070         umode_t         ia_mode;
13071         uid_t           ia_uid;
13072         gid_t           ia_gid;
13073 +       tag_t           ia_tag;
13074         loff_t          ia_size;
13075         struct timespec ia_atime;
13076         struct timespec ia_mtime;
13077 @@ -293,6 +306,9 @@ struct iattr {
13078         struct file     *ia_file;
13079  };
13080  
13081 +#define ATTR_FLAG_BARRIER      512     /* Barrier for chroot() */
13082 +#define ATTR_FLAG_IUNLINK      1024    /* Immutable unlink */
13083 +
13084  /*
13085   * Includes for diskquotas.
13086   */
13087 @@ -471,6 +487,7 @@ struct inode {
13088         unsigned int            i_nlink;
13089         uid_t                   i_uid;
13090         gid_t                   i_gid;
13091 +       tag_t                   i_tag;
13092         dev_t                   i_rdev;
13093         loff_t                  i_size;
13094         struct timespec         i_atime;
13095 @@ -491,6 +508,7 @@ struct inode {
13096         struct address_space    *i_mapping;
13097         struct address_space    i_data;
13098  #ifdef CONFIG_QUOTA
13099 +       struct dqhash           *i_dqh;
13100         struct dquot            *i_dquot[MAXQUOTAS];
13101  #endif
13102         /* These three should probably be a union */
13103 @@ -633,6 +651,7 @@ struct file {
13104         struct fown_struct      f_owner;
13105         unsigned int            f_uid, f_gid;
13106         struct file_ra_state    f_ra;
13107 +       xid_t                   f_xid;
13108  
13109         unsigned long           f_version;
13110         void                    *f_security;
13111 @@ -712,6 +731,7 @@ struct file_lock {
13112         unsigned char fl_type;
13113         loff_t fl_start;
13114         loff_t fl_end;
13115 +       xid_t fl_xid;
13116  
13117         struct fasync_struct *  fl_fasync; /* for lease break notifications */
13118         unsigned long fl_break_time;    /* for nonblocking lease breaks */
13119 @@ -811,7 +831,7 @@ struct super_block {
13120         unsigned long long      s_maxbytes;     /* Max file size */
13121         struct file_system_type *s_type;
13122         struct super_operations *s_op;
13123 -       struct dquot_operations *dq_op;
13124 +       struct dquot_operations *s_qop;
13125         struct quotactl_ops     *s_qcop;
13126         struct export_operations *s_export_op;
13127         unsigned long           s_flags;
13128 @@ -834,7 +854,7 @@ struct super_block {
13129  
13130         struct block_device     *s_bdev;
13131         struct list_head        s_instances;
13132 -       struct quota_info       s_dquot;        /* Diskquota specific options */
13133 +       struct dqhash           *s_dqh;         /* Diskquota hash */
13134  
13135         int                     s_frozen;
13136         wait_queue_head_t       s_wait_unfrozen;
13137 @@ -904,12 +924,12 @@ static inline void unlock_super(struct s
13138   */
13139  extern int vfs_permission(struct nameidata *, int);
13140  extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
13141 -extern int vfs_mkdir(struct inode *, struct dentry *, int);
13142 -extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
13143 -extern int vfs_symlink(struct inode *, struct dentry *, const char *, int);
13144 -extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
13145 -extern int vfs_rmdir(struct inode *, struct dentry *);
13146 -extern int vfs_unlink(struct inode *, struct dentry *);
13147 +extern int vfs_mkdir(struct inode *, struct dentry *, int, struct nameidata *);
13148 +extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t, struct nameidata *);
13149 +extern int vfs_symlink(struct inode *, struct dentry *, const char *, int, struct nameidata *);
13150 +extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct nameidata *);
13151 +extern int vfs_rmdir(struct inode *, struct dentry *, struct nameidata *);
13152 +extern int vfs_unlink(struct inode *, struct dentry *, struct nameidata *);
13153  extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
13154  
13155  /*
13156 @@ -1049,6 +1069,7 @@ struct inode_operations {
13157         ssize_t (*listxattr) (struct dentry *, char *, size_t);
13158         int (*removexattr) (struct dentry *, const char *);
13159         void (*truncate_range)(struct inode *, loff_t, loff_t);
13160 +       int (*sync_flags) (struct inode *);
13161  };
13162  
13163  struct seq_file;
13164 @@ -1059,6 +1080,7 @@ extern ssize_t vfs_readv(struct file *, 
13165                 unsigned long, loff_t *);
13166  extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
13167                 unsigned long, loff_t *);
13168 +ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
13169  
13170  /*
13171   * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
13172 @@ -1087,8 +1109,8 @@ struct super_operations {
13173  
13174         int (*show_options)(struct seq_file *, struct vfsmount *);
13175  
13176 -       ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
13177 -       ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
13178 +       ssize_t (*quota_read)(struct dqhash *, int, char *, size_t, loff_t);
13179 +       ssize_t (*quota_write)(struct dqhash *, int, const char *, size_t, loff_t);
13180  };
13181  
13182  /* Inode state bits.  Protected by inode_lock. */
13183 @@ -1526,7 +1548,7 @@ extern void clear_inode(struct inode *);
13184  extern void destroy_inode(struct inode *);
13185  extern struct inode *new_inode(struct super_block *);
13186  extern int remove_suid(struct dentry *);
13187 -extern void remove_dquot_ref(struct super_block *, int, struct list_head *);
13188 +extern void remove_dquot_ref(struct dqhash *, int, struct list_head *);
13189  extern struct semaphore iprune_sem;
13190  
13191  extern void __insert_inode_hash(struct inode *, unsigned long hashval);
13192 @@ -1566,6 +1588,7 @@ extern ssize_t do_sync_write(struct file
13193  ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
13194                                 unsigned long nr_segs, loff_t *ppos);
13195  extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
13196 +extern ssize_t generic_file_sendpage(struct file *, struct page *, int, size_t, loff_t *, int);
13197  extern void do_generic_mapping_read(struct address_space *mapping,
13198                                     struct file_ra_state *, struct file *,
13199                                     loff_t *, read_descriptor_t *, read_actor_t);
13200 @@ -1688,6 +1711,7 @@ extern int dcache_dir_open(struct inode 
13201  extern int dcache_dir_close(struct inode *, struct file *);
13202  extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
13203  extern int dcache_readdir(struct file *, void *, filldir_t);
13204 +extern int dcache_readdir_filter(struct file *, void *, filldir_t, int (*)(struct dentry *));
13205  extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
13206  extern int simple_statfs(struct super_block *, struct kstatfs *);
13207  extern int simple_link(struct dentry *, struct inode *, struct dentry *);
13208 diff -NurpP --minimal linux-2.6.16.20/include/linux/init_task.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/init_task.h
13209 --- linux-2.6.16.20/include/linux/init_task.h   2006-01-03 17:30:09 +0100
13210 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/init_task.h      2006-04-26 19:07:00 +0200
13211 @@ -121,6 +121,10 @@ extern struct group_info init_groups;
13212         .journal_info   = NULL,                                         \
13213         .cpu_timers     = INIT_CPU_TIMERS(tsk.cpu_timers),              \
13214         .fs_excl        = ATOMIC_INIT(0),                               \
13215 +       .xid            = 0,                                            \
13216 +       .vx_info        = NULL,                                         \
13217 +       .nid            = 0,                                            \
13218 +       .nx_info        = NULL,                                         \
13219  }
13220  
13221  
13222 diff -NurpP --minimal linux-2.6.16.20/include/linux/ipc.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/ipc.h
13223 --- linux-2.6.16.20/include/linux/ipc.h 2004-08-14 12:54:46 +0200
13224 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/ipc.h    2006-04-26 19:07:00 +0200
13225 @@ -66,6 +66,7 @@ struct kern_ipc_perm
13226         mode_t          mode; 
13227         unsigned long   seq;
13228         void            *security;
13229 +       xid_t           xid;
13230  };
13231  
13232  #endif /* __KERNEL__ */
13233 diff -NurpP --minimal linux-2.6.16.20/include/linux/kernel.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/kernel.h
13234 --- linux-2.6.16.20/include/linux/kernel.h      2006-02-18 14:40:33 +0100
13235 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/kernel.h 2006-04-26 19:07:00 +0200
13236 @@ -17,6 +17,7 @@
13237  #include <asm/bug.h>
13238  
13239  extern const char linux_banner[];
13240 +extern const char vx_linux_banner[];
13241  
13242  #define INT_MAX                ((int)(~0U>>1))
13243  #define INT_MIN                (-INT_MAX - 1)
13244 diff -NurpP --minimal linux-2.6.16.20/include/linux/major.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/major.h
13245 --- linux-2.6.16.20/include/linux/major.h       2005-08-29 22:25:41 +0200
13246 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/major.h  2006-04-26 19:07:00 +0200
13247 @@ -15,6 +15,7 @@
13248  #define HD_MAJOR               IDE0_MAJOR
13249  #define PTY_SLAVE_MAJOR                3
13250  #define TTY_MAJOR              4
13251 +#define VROOT_MAJOR            4
13252  #define TTYAUX_MAJOR           5
13253  #define LP_MAJOR               6
13254  #define VCS_MAJOR              7
13255 diff -NurpP --minimal linux-2.6.16.20/include/linux/mount.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/mount.h
13256 --- linux-2.6.16.20/include/linux/mount.h       2006-04-09 13:49:57 +0200
13257 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/mount.h  2006-04-26 19:07:00 +0200
13258 @@ -22,10 +22,14 @@
13259  #define MNT_NOEXEC     0x04
13260  #define MNT_NOATIME    0x08
13261  #define MNT_NODIRATIME 0x10
13262 +#define MNT_RDONLY     0x20
13263 +
13264 +#define MNT_IS_RDONLY(m)       ((m) && ((m)->mnt_flags & MNT_RDONLY))
13265  
13266  #define MNT_SHARED     0x1000  /* if the vfsmount is a shared mount */
13267  #define MNT_UNBINDABLE 0x2000  /* if the vfsmount is a unbindable mount */
13268  #define MNT_PNODE_MASK 0x3000  /* propogation flag mask */
13269 +#define MNT_TAGID              0x8000
13270  
13271  struct vfsmount {
13272         struct list_head mnt_hash;
13273 @@ -47,6 +51,7 @@ struct vfsmount {
13274         struct vfsmount *mnt_master;    /* slave is on master->mnt_slave_list */
13275         struct namespace *mnt_namespace; /* containing namespace */
13276         int mnt_pinned;
13277 +       tag_t mnt_tag;                  /* tagging used for vfsmount */
13278  };
13279  
13280  static inline struct vfsmount *mntget(struct vfsmount *mnt)
13281 diff -NurpP --minimal linux-2.6.16.20/include/linux/namespace.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/namespace.h
13282 --- linux-2.6.16.20/include/linux/namespace.h   2006-02-18 14:40:34 +0100
13283 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/namespace.h      2006-04-26 19:07:00 +0200
13284 @@ -16,6 +16,7 @@ struct namespace {
13285  extern int copy_namespace(int, struct task_struct *);
13286  extern void __put_namespace(struct namespace *namespace);
13287  extern struct namespace *dup_namespace(struct task_struct *, struct fs_struct *);
13288 +extern void umount_unused(struct vfsmount *, struct fs_struct *);
13289  
13290  static inline void put_namespace(struct namespace *namespace)
13291  {
13292 diff -NurpP --minimal linux-2.6.16.20/include/linux/net.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/net.h
13293 --- linux-2.6.16.20/include/linux/net.h 2006-02-18 14:40:34 +0100
13294 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/net.h    2006-04-26 19:07:00 +0200
13295 @@ -62,6 +62,7 @@ typedef enum {
13296  #define SOCK_ASYNC_WAITDATA    1
13297  #define SOCK_NOSPACE           2
13298  #define SOCK_PASSCRED          3
13299 +#define SOCK_USER_SOCKET       4
13300  
13301  #ifndef ARCH_HAS_SOCKET_TYPES
13302  /**
13303 diff -NurpP --minimal linux-2.6.16.20/include/linux/nfs_mount.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/nfs_mount.h
13304 --- linux-2.6.16.20/include/linux/nfs_mount.h   2005-08-29 22:25:42 +0200
13305 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/nfs_mount.h      2006-04-26 19:07:00 +0200
13306 @@ -61,6 +61,7 @@ struct nfs_mount_data {
13307  #define NFS_MOUNT_NOACL                0x0800  /* 4 */
13308  #define NFS_MOUNT_STRICTLOCK   0x1000  /* reserved for NFSv4 */
13309  #define NFS_MOUNT_SECFLAVOUR   0x2000  /* 5 */
13310 +#define NFS_MOUNT_TAGGED       0x8000  /* context tagging */
13311  #define NFS_MOUNT_FLAGMASK     0xFFFF
13312  
13313  #endif
13314 diff -NurpP --minimal linux-2.6.16.20/include/linux/percpu.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/percpu.h
13315 --- linux-2.6.16.20/include/linux/percpu.h      2006-04-09 13:49:57 +0200
13316 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/percpu.h 2006-04-26 19:07:00 +0200
13317 @@ -8,7 +8,7 @@
13318  
13319  /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
13320  #ifndef PERCPU_ENOUGH_ROOM
13321 -#define PERCPU_ENOUGH_ROOM 32768
13322 +#define PERCPU_ENOUGH_ROOM 65536
13323  #endif
13324  
13325  /* Must be an lvalue. */
13326 diff -NurpP --minimal linux-2.6.16.20/include/linux/proc_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/proc_fs.h
13327 --- linux-2.6.16.20/include/linux/proc_fs.h     2006-05-11 21:25:36 +0200
13328 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/proc_fs.h        2006-04-26 19:07:00 +0200
13329 @@ -55,6 +55,7 @@ struct proc_dir_entry {
13330         nlink_t nlink;
13331         uid_t uid;
13332         gid_t gid;
13333 +       int vx_flags;
13334         unsigned long size;
13335         struct inode_operations * proc_iops;
13336         struct file_operations * proc_fops;
13337 @@ -248,9 +249,11 @@ extern void kclist_add(struct kcore_list
13338  struct proc_inode {
13339         struct task_struct *task;
13340         int type;
13341 +       int vx_flags;
13342         union {
13343                 int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
13344                 int (*proc_read)(struct task_struct *task, char *page);
13345 +               int (*proc_vid_read)(int vid, char *page);
13346         } op;
13347         struct proc_dir_entry *pde;
13348         struct inode vfs_inode;
13349 diff -NurpP --minimal linux-2.6.16.20/include/linux/quota.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/quota.h
13350 --- linux-2.6.16.20/include/linux/quota.h       2006-01-03 17:30:10 +0100
13351 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/quota.h  2006-04-26 19:07:00 +0200
13352 @@ -56,6 +56,13 @@ extern spinlock_t dq_data_lock;
13353  #define kb2qb(x) ((x) >> (QUOTABLOCK_BITS-10))
13354  #define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS)
13355  
13356 +/* are NULL dqhash ptrs valid? */
13357 +#ifdef HANDLE_DQHASH_NULL
13358 +#define        dqhash_valid(hash)      ((hash) != NULL)
13359 +#else
13360 +#define        dqhash_valid(hash)      (0 == 0)
13361 +#endif
13362 +
13363  #define MAXQUOTAS 2
13364  #define USRQUOTA  0            /* element used for user quotas */
13365  #define GRPQUOTA  1            /* element used for group quotas */
13366 @@ -175,19 +182,20 @@ struct mem_dqinfo {
13367         } u;
13368  };
13369  
13370 -struct super_block;
13371 +struct dqhash;
13372  
13373  #define DQF_MASK 0xffff                /* Mask for format specific flags */
13374  #define DQF_INFO_DIRTY_B 16
13375  #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */
13376  
13377 -extern void mark_info_dirty(struct super_block *sb, int type);
13378 +extern void mark_info_dirty(struct dqhash *hash, int type);
13379 +
13380  #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
13381  #define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list))
13382  #define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info))
13383  
13384 -#define sb_dqopt(sb) (&(sb)->s_dquot)
13385 -#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
13386 +#define dqh_dqopt(hash) (&(hash)->dqh_dqopt)
13387 +#define dqh_dqinfo(hash, type) (dqh_dqopt(hash)->info+(type))
13388  
13389  struct dqstats {
13390         int lookups;
13391 @@ -218,7 +226,7 @@ struct dquot {
13392         struct semaphore dq_lock;       /* dquot IO lock */
13393         atomic_t dq_count;              /* Use count */
13394         wait_queue_head_t dq_wait_unused;       /* Wait queue for dquot to become unused */
13395 -       struct super_block *dq_sb;      /* superblock this applies to */
13396 +       struct dqhash *dq_dqh;          /* quota hash backpointer */
13397         unsigned int dq_id;             /* ID this applies to (uid, gid) */
13398         loff_t dq_off;                  /* Offset of dquot on disk */
13399         unsigned long dq_flags;         /* See DQ_* */
13400 @@ -233,13 +241,14 @@ struct dquot {
13401  
13402  /* Operations which must be implemented by each quota format */
13403  struct quota_format_ops {
13404 -       int (*check_quota_file)(struct super_block *sb, int type);      /* Detect whether file is in our format */
13405 -       int (*read_file_info)(struct super_block *sb, int type);        /* Read main info about file - called on quotaon() */
13406 -       int (*write_file_info)(struct super_block *sb, int type);       /* Write main info about file */
13407 -       int (*free_file_info)(struct super_block *sb, int type);        /* Called on quotaoff() */
13408 -       int (*read_dqblk)(struct dquot *dquot);         /* Read structure for one user */
13409 -       int (*commit_dqblk)(struct dquot *dquot);       /* Write structure for one user */
13410 -       int (*release_dqblk)(struct dquot *dquot);      /* Called when last reference to dquot is being dropped */
13411 +       int (*check_quota_file)(struct dqhash *, int);  /* Detect whether file is in our format */
13412 +       int (*read_file_info)(struct dqhash *, int);    /* Read main info about file - called on quotaon() */
13413 +       int (*write_file_info)(struct dqhash *, int);   /* Write main info about file */
13414 +       int (*free_file_info)(struct dqhash *, int);    /* Called on quotaoff() */
13415 +
13416 +       int (*read_dqblk)(struct dquot *);      /* Read structure for one user */
13417 +       int (*commit_dqblk)(struct dquot *);    /* Write structure for one user */
13418 +       int (*release_dqblk)(struct dquot *);   /* Called when last reference to dquot is being dropped */
13419  };
13420  
13421  /* Operations working with dquots */
13422 @@ -255,22 +264,22 @@ struct dquot_operations {
13423         int (*acquire_dquot) (struct dquot *);          /* Quota is going to be created on disk */
13424         int (*release_dquot) (struct dquot *);          /* Quota is going to be deleted from disk */
13425         int (*mark_dirty) (struct dquot *);             /* Dquot is marked dirty */
13426 -       int (*write_info) (struct super_block *, int);  /* Write of quota "superblock" */
13427 +       int (*write_info) (struct dqhash *, int);       /* Write of quota "superblock" */
13428  };
13429  
13430  /* Operations handling requests from userspace */
13431  struct quotactl_ops {
13432 -       int (*quota_on)(struct super_block *, int, int, char *);
13433 -       int (*quota_off)(struct super_block *, int);
13434 -       int (*quota_sync)(struct super_block *, int);
13435 -       int (*get_info)(struct super_block *, int, struct if_dqinfo *);
13436 -       int (*set_info)(struct super_block *, int, struct if_dqinfo *);
13437 -       int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
13438 -       int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
13439 -       int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
13440 -       int (*set_xstate)(struct super_block *, unsigned int, int);
13441 -       int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
13442 -       int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
13443 +       int (*quota_on)(struct dqhash *, int, int, char *);
13444 +       int (*quota_off)(struct dqhash *, int);
13445 +       int (*quota_sync)(struct dqhash *, int);
13446 +       int (*get_info)(struct dqhash *, int, struct if_dqinfo *);
13447 +       int (*set_info)(struct dqhash *, int, struct if_dqinfo *);
13448 +       int (*get_dqblk)(struct dqhash *, int, qid_t, struct if_dqblk *);
13449 +       int (*set_dqblk)(struct dqhash *, int, qid_t, struct if_dqblk *);
13450 +       int (*get_xstate)(struct dqhash *, struct fs_quota_stat *);
13451 +       int (*set_xstate)(struct dqhash *, unsigned int, int);
13452 +       int (*get_xquota)(struct dqhash *, int, qid_t, struct fs_disk_quota *);
13453 +       int (*set_xquota)(struct dqhash *, int, qid_t, struct fs_disk_quota *);
13454  };
13455  
13456  struct quota_format_type {
13457 @@ -293,16 +302,15 @@ struct quota_info {
13458         struct quota_format_ops *ops[MAXQUOTAS];        /* Operations for each type */
13459  };
13460  
13461 -/* Inline would be better but we need to dereference super_block which is not defined yet */
13462 -int mark_dquot_dirty(struct dquot *dquot);
13463  
13464  #define dquot_dirty(dquot) test_bit(DQ_MOD_B, &(dquot)->dq_flags)
13465  
13466 -#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
13467 -       (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
13468 +#define dqh_has_quota_enabled(hash, type) (dqhash_valid(hash) && ((type)==USRQUOTA ? \
13469 +       (dqh_dqopt(hash)->flags & DQUOT_USR_ENABLED) : (dqh_dqopt(hash)->flags & DQUOT_GRP_ENABLED)))
13470 +
13471 +#define dqh_any_quota_enabled(hash) (dqhash_valid(hash) && \
13472 +       (dqh_has_quota_enabled(hash, USRQUOTA) || dqh_has_quota_enabled(hash, GRPQUOTA)))
13473  
13474 -#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
13475 -                                 sb_has_quota_enabled(sb, GRPQUOTA))
13476  
13477  int register_quota_format(struct quota_format_type *fmt);
13478  void unregister_quota_format(struct quota_format_type *fmt);
13479 @@ -317,6 +325,51 @@ struct quota_module_name {
13480         {QFMT_VFS_V0, "quota_v2"},\
13481         {0, NULL}}
13482  
13483 +struct dqhash {
13484 +       struct list_head dqh_list;      /* List of all quota hashes */
13485 +       unsigned int dqh_id;            /* ID for hash */
13486 +       atomic_t dqh_count;             /* Use count */
13487 +       struct quota_info dqh_dqopt;    /* Diskquota specific options */
13488 +       struct dquot_operations *dqh_qop;
13489 +       struct quotactl_ops *dqh_qcop;
13490 +       struct super_block *dqh_sb;     /* super block */
13491 +       unsigned int dqh_hash_bits;
13492 +       unsigned int dqh_hash_mask;
13493 +       struct hlist_head *dqh_hash;
13494 +};
13495 +
13496 +#if defined(CONFIG_QUOTA)
13497 +
13498 +
13499 +struct dqhash *new_dqhash(struct super_block *, unsigned int);
13500 +void destroy_dqhash(struct dqhash *);
13501 +struct dqhash *find_dqhash(unsigned int);
13502 +
13503 +static inline void dqhput(struct dqhash *hash)
13504 +{
13505 +       if (dqhash_valid(hash))
13506 +               if (atomic_dec_and_test(&hash->dqh_count))
13507 +                       destroy_dqhash(hash);
13508 +}
13509 +
13510 +static inline struct dqhash *dqhget(struct dqhash *hash)
13511 +{
13512 +       if (dqhash_valid(hash))
13513 +               atomic_inc(&hash->dqh_count);
13514 +       return hash;
13515 +}
13516 +
13517 +#else /* CONFIG_QUOTA */
13518 +
13519 +#define new_dqhash(sb, dqdom)          (0)
13520 +#define find_dqhash(dqdom)             (0)
13521 +#define destroy_dqhash(hash)           do { } while(0)
13522 +
13523 +#define dqhput(hash)                   do { } while(0)
13524 +#define dqhget(hash)                   (hash)
13525 +
13526 +#endif /* CONFIG_QUOTA */
13527 +
13528  #else
13529  
13530  # /* nodep */ include <sys/cdefs.h>
13531 diff -NurpP --minimal linux-2.6.16.20/include/linux/quotaops.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/quotaops.h
13532 --- linux-2.6.16.20/include/linux/quotaops.h    2006-04-09 13:49:57 +0200
13533 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/quotaops.h       2006-04-26 19:07:00 +0200
13534 @@ -20,7 +20,7 @@
13535  /*
13536   * declaration of quota_function calls in kernel.
13537   */
13538 -extern void sync_dquots(struct super_block *sb, int type);
13539 +extern void sync_dquots(struct dqhash *hash, int type);
13540  
13541  extern int dquot_initialize(struct inode *inode, int type);
13542  extern int dquot_drop(struct inode *inode);
13543 @@ -35,19 +35,19 @@ extern int dquot_transfer(struct inode *
13544  extern int dquot_commit(struct dquot *dquot);
13545  extern int dquot_acquire(struct dquot *dquot);
13546  extern int dquot_release(struct dquot *dquot);
13547 -extern int dquot_commit_info(struct super_block *sb, int type);
13548 +extern int dquot_commit_info(struct dqhash *hash, int type);
13549  extern int dquot_mark_dquot_dirty(struct dquot *dquot);
13550  
13551 -extern int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path);
13552 -extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
13553 +extern int vfs_quota_on(struct dqhash *hash, int type, int format_id, char *path);
13554 +extern int vfs_quota_on_mount(struct dqhash *hash, char *qf_name,
13555                 int format_id, int type);
13556 -extern int vfs_quota_off(struct super_block *sb, int type);
13557 -#define vfs_quota_off_mount(sb, type) vfs_quota_off(sb, type)
13558 -extern int vfs_quota_sync(struct super_block *sb, int type);
13559 -extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
13560 -extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
13561 -extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
13562 -extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
13563 +extern int vfs_quota_off(struct dqhash *hash, int type);
13564 +#define vfs_quota_off_mount(dqh, type) vfs_quota_off(dqh, type)
13565 +extern int vfs_quota_sync(struct dqhash *hash, int type);
13566 +extern int vfs_get_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii);
13567 +extern int vfs_set_dqinfo(struct dqhash *hash, int type, struct if_dqinfo *ii);
13568 +extern int vfs_get_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di);
13569 +extern int vfs_set_dqblk(struct dqhash *hash, int type, qid_t id, struct if_dqblk *di);
13570  
13571  /*
13572   * Operations supported for diskquotas.
13573 @@ -62,9 +62,12 @@ extern struct quotactl_ops vfs_quotactl_
13574   * need a lot of space in journal for dquot structure allocation. */
13575  static __inline__ void DQUOT_INIT(struct inode *inode)
13576  {
13577 -       BUG_ON(!inode->i_sb);
13578 -       if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
13579 -               inode->i_sb->dq_op->initialize(inode, -1);
13580 +       if (!dqhash_valid(inode->i_dqh))
13581 +               return;
13582 +       BUG_ON(!inode->i_dqh);
13583 +       // printk("DQUOT_INIT(%p,%p,%d)\n", inode, inode->i_dqh, dqh_any_quota_enabled(inode->i_dqh));
13584 +       if (dqh_any_quota_enabled(inode->i_dqh) && !IS_NOQUOTA(inode))
13585 +               inode->i_dqh->dqh_qop->initialize(inode, -1);
13586  }
13587  
13588  /* The same as with DQUOT_INIT */
13589 @@ -73,8 +76,8 @@ static __inline__ void DQUOT_DROP(struct
13590         /* Here we can get arbitrary inode from clear_inode() so we have
13591          * to be careful. OTOH we don't need locking as quota operations
13592          * are allowed to change only at mount time */
13593 -       if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
13594 -           && inode->i_sb->dq_op->drop) {
13595 +       if (!IS_NOQUOTA(inode) && inode->i_dqh && inode->i_dqh->dqh_qop
13596 +           && inode->i_dqh->dqh_qop->drop) {
13597                 int cnt;
13598                 /* Test before calling to rule out calls from proc and such
13599                   * where we are not allowed to block. Note that this is
13600 @@ -85,7 +88,7 @@ static __inline__ void DQUOT_DROP(struct
13601                         if (inode->i_dquot[cnt] != NODQUOT)
13602                                 break;
13603                 if (cnt < MAXQUOTAS)
13604 -                       inode->i_sb->dq_op->drop(inode);
13605 +                       inode->i_dqh->dqh_qop->drop(inode);
13606         }
13607  }
13608  
13609 @@ -93,9 +96,9 @@ static __inline__ void DQUOT_DROP(struct
13610   * a transaction (deadlocks possible otherwise) */
13611  static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
13612  {
13613 -       if (sb_any_quota_enabled(inode->i_sb)) {
13614 +       if (dqh_any_quota_enabled(inode->i_dqh)) {
13615                 /* Used space is updated in alloc_space() */
13616 -               if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
13617 +               if (inode->i_dqh->dqh_qop->alloc_space(inode, nr, 1) == NO_QUOTA)
13618                         return 1;
13619         }
13620         else
13621 @@ -113,9 +116,9 @@ static __inline__ int DQUOT_PREALLOC_SPA
13622  
13623  static __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
13624  {
13625 -       if (sb_any_quota_enabled(inode->i_sb)) {
13626 +       if (dqh_any_quota_enabled(inode->i_dqh)) {
13627                 /* Used space is updated in alloc_space() */
13628 -               if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
13629 +               if (inode->i_dqh->dqh_qop->alloc_space(inode, nr, 0) == NO_QUOTA)
13630                         return 1;
13631         }
13632         else
13633 @@ -133,9 +136,9 @@ static __inline__ int DQUOT_ALLOC_SPACE(
13634  
13635  static __inline__ int DQUOT_ALLOC_INODE(struct inode *inode)
13636  {
13637 -       if (sb_any_quota_enabled(inode->i_sb)) {
13638 +       if (dqh_any_quota_enabled(inode->i_dqh)) {
13639                 DQUOT_INIT(inode);
13640 -               if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
13641 +               if (inode->i_dqh->dqh_qop->alloc_inode(inode, 1) == NO_QUOTA)
13642                         return 1;
13643         }
13644         return 0;
13645 @@ -143,8 +146,8 @@ static __inline__ int DQUOT_ALLOC_INODE(
13646  
13647  static __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
13648  {
13649 -       if (sb_any_quota_enabled(inode->i_sb))
13650 -               inode->i_sb->dq_op->free_space(inode, nr);
13651 +       if (dqh_any_quota_enabled(inode->i_dqh))
13652 +               inode->i_dqh->dqh_qop->free_space(inode, nr);
13653         else
13654                 inode_sub_bytes(inode, nr);
13655  }
13656 @@ -157,29 +160,30 @@ static __inline__ void DQUOT_FREE_SPACE(
13657  
13658  static __inline__ void DQUOT_FREE_INODE(struct inode *inode)
13659  {
13660 -       if (sb_any_quota_enabled(inode->i_sb))
13661 -               inode->i_sb->dq_op->free_inode(inode, 1);
13662 +       if (dqh_any_quota_enabled(inode->i_dqh))
13663 +               inode->i_dqh->dqh_qop->free_inode(inode, 1);
13664  }
13665  
13666  static __inline__ int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
13667  {
13668 -       if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
13669 +       if (dqh_any_quota_enabled(inode->i_dqh) && !IS_NOQUOTA(inode)) {
13670                 DQUOT_INIT(inode);
13671 -               if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
13672 +               if (inode->i_dqh->dqh_qop->transfer(inode, iattr) == NO_QUOTA)
13673                         return 1;
13674         }
13675         return 0;
13676  }
13677  
13678  /* The following two functions cannot be called inside a transaction */
13679 -#define DQUOT_SYNC(sb) sync_dquots(sb, -1)
13680 +#define DQUOT_SYNC(hash)       sync_dquots(hash, -1)
13681  
13682 -static __inline__ int DQUOT_OFF(struct super_block *sb)
13683 +static __inline__ int DQUOT_OFF(struct dqhash *hash)
13684  {
13685         int ret = -ENOSYS;
13686  
13687 -       if (sb_any_quota_enabled(sb) && sb->s_qcop && sb->s_qcop->quota_off)
13688 -               ret = sb->s_qcop->quota_off(sb, -1);
13689 +       if (dqh_any_quota_enabled(hash) && hash->dqh_qcop &&
13690 +               hash->dqh_qcop->quota_off)
13691 +               ret = hash->dqh_qcop->quota_off(hash, -1);
13692         return ret;
13693  }
13694  
13695 @@ -194,8 +198,8 @@ static __inline__ int DQUOT_OFF(struct s
13696  #define DQUOT_DROP(inode)                      do { } while(0)
13697  #define DQUOT_ALLOC_INODE(inode)               (0)
13698  #define DQUOT_FREE_INODE(inode)                        do { } while(0)
13699 -#define DQUOT_SYNC(sb)                         do { } while(0)
13700 -#define DQUOT_OFF(sb)                          do { } while(0)
13701 +#define DQUOT_SYNC(hash)                       do { } while(0)
13702 +#define DQUOT_OFF(hash)                                do { } while(0)
13703  #define DQUOT_TRANSFER(inode, iattr)           (0)
13704  static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
13705  {
13706 diff -NurpP --minimal linux-2.6.16.20/include/linux/reiserfs_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/reiserfs_fs.h
13707 --- linux-2.6.16.20/include/linux/reiserfs_fs.h 2006-04-09 13:49:57 +0200
13708 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/reiserfs_fs.h    2006-04-26 19:07:00 +0200
13709 @@ -829,6 +829,18 @@ struct stat_data_v1 {
13710  #define REISERFS_COMPR_FL     EXT2_COMPR_FL
13711  #define REISERFS_NOTAIL_FL    EXT2_NOTAIL_FL
13712  
13713 +/* unfortunately reiserfs sdattr is only 16 bit */
13714 +#define REISERFS_BARRIER_FL   (EXT2_BARRIER_FL >> 16)
13715 +#define REISERFS_IUNLINK_FL   (EXT2_IUNLINK_FL >> 16)
13716 +
13717 +#ifdef CONFIG_VSERVER_LEGACY
13718 +#define REISERFS_FL_USER_VISIBLE       (REISERFS_IUNLINK_FL|0x80FF)
13719 +#define REISERFS_FL_USER_MODIFIABLE    (REISERFS_IUNLINK_FL|0x80FF)
13720 +#else
13721 +#define REISERFS_FL_USER_VISIBLE       0x80FF
13722 +#define REISERFS_FL_USER_MODIFIABLE    0x80FF
13723 +#endif
13724 +
13725  /* persistent flags that file inherits from the parent directory */
13726  #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL |        \
13727                                 REISERFS_SYNC_FL |      \
13728 @@ -1904,6 +1916,7 @@ static inline void reiserfs_update_sd(st
13729  void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
13730  void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
13731  int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
13732 +int reiserfs_sync_flags(struct inode *inode);
13733  
13734  /* namei.c */
13735  void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
13736 diff -NurpP --minimal linux-2.6.16.20/include/linux/reiserfs_fs_sb.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/reiserfs_fs_sb.h
13737 --- linux-2.6.16.20/include/linux/reiserfs_fs_sb.h      2006-02-18 14:40:35 +0100
13738 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/reiserfs_fs_sb.h 2006-04-26 19:07:00 +0200
13739 @@ -456,6 +456,7 @@ enum reiserfs_mount_options {
13740         REISERFS_POSIXACL,
13741         REISERFS_BARRIER_NONE,
13742         REISERFS_BARRIER_FLUSH,
13743 +       REISERFS_TAGGED,
13744  
13745         /* Actions on error */
13746         REISERFS_ERROR_PANIC,
13747 diff -NurpP --minimal linux-2.6.16.20/include/linux/sched.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/sched.h
13748 --- linux-2.6.16.20/include/linux/sched.h       2006-04-09 13:49:57 +0200
13749 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/sched.h  2006-04-26 19:07:00 +0200
13750 @@ -15,6 +15,7 @@
13751  #include <linux/cpumask.h>
13752  #include <linux/errno.h>
13753  #include <linux/nodemask.h>
13754 +#include <linux/vs_base.h>
13755  
13756  #include <asm/system.h>
13757  #include <asm/semaphore.h>
13758 @@ -61,12 +62,13 @@ struct exec_domain;
13759  #define CLONE_UNTRACED         0x00800000      /* set if the tracing process can't force CLONE_PTRACE on this clone */
13760  #define CLONE_CHILD_SETTID     0x01000000      /* set the TID in the child */
13761  #define CLONE_STOPPED          0x02000000      /* Start in stopped state */
13762 +#define CLONE_KTHREAD          0x10000000      /* clone a kernel thread */
13763  
13764  /*
13765   * List of flags we want to share for kernel threads,
13766   * if only because they are not used by them anyway.
13767   */
13768 -#define CLONE_KERNEL   (CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
13769 +#define CLONE_KERNEL   (CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_KTHREAD)
13770  
13771  /*
13772   * These are the constant used to fake the fixed-point load-average
13773 @@ -129,6 +131,7 @@ extern unsigned long nr_iowait(void);
13774  #define EXIT_DEAD              32
13775  /* in tsk->state again */
13776  #define TASK_NONINTERACTIVE    64
13777 +#define TASK_ONHOLD            128
13778  
13779  #define __set_task_state(tsk, state_value)             \
13780         do { (tsk)->state = (state_value); } while (0)
13781 @@ -257,27 +260,30 @@ extern void arch_unmap_area_topdown(stru
13782   * The mm counters are not protected by its page_table_lock,
13783   * so must be incremented atomically.
13784   */
13785 -#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
13786 -#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
13787 -#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
13788 -#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
13789 -#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
13790  typedef atomic_long_t mm_counter_t;
13791 +#define __set_mm_counter(mm, member, value) \
13792 +       atomic_long_set(&(mm)->_##member, value)
13793 +#define get_mm_counter(mm, member) \
13794 +       ((unsigned long)atomic_long_read(&(mm)->_##member))
13795  
13796  #else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
13797  /*
13798   * The mm counters are protected by its page_table_lock,
13799   * so can be incremented directly.
13800   */
13801 -#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
13802 -#define get_mm_counter(mm, member) ((mm)->_##member)
13803 -#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
13804 -#define inc_mm_counter(mm, member) (mm)->_##member++
13805 -#define dec_mm_counter(mm, member) (mm)->_##member--
13806  typedef unsigned long mm_counter_t;
13807 +#define __set_mm_counter(mm, member, value) (mm)->_##member = (value)
13808 +#define get_mm_counter(mm, member) ((mm)->_##member)
13809  
13810  #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
13811  
13812 +#define set_mm_counter(mm, member, value) \
13813 +       vx_ ## member ## pages_sub((mm), (get_mm_counter(mm, member) - value))
13814 +#define add_mm_counter(mm, member, value) \
13815 +       vx_ ## member ## pages_add((mm), (value))
13816 +#define inc_mm_counter(mm, member) vx_ ## member ## pages_inc((mm))
13817 +#define dec_mm_counter(mm, member) vx_ ## member ## pages_dec((mm))
13818 +
13819  #define get_mm_rss(mm)                                 \
13820         (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
13821  #define update_hiwater_rss(mm) do {                    \
13822 @@ -336,6 +342,7 @@ struct mm_struct {
13823  
13824         /* Architecture-specific MM context */
13825         mm_context_t context;
13826 +       struct vx_info *mm_vx_info;
13827  
13828         /* Token based thrashing protection. */
13829         unsigned long swap_token_time;
13830 @@ -514,9 +521,10 @@ struct user_struct {
13831         /* Hash table maintenance information */
13832         struct list_head uidhash_list;
13833         uid_t uid;
13834 +       xid_t xid;
13835  };
13836  
13837 -extern struct user_struct *find_user(uid_t);
13838 +extern struct user_struct *find_user(xid_t, uid_t);
13839  
13840  extern struct user_struct root_user;
13841  #define INIT_USER (&root_user)
13842 @@ -818,6 +826,14 @@ struct task_struct {
13843         
13844         void *security;
13845         struct audit_context *audit_context;
13846 +
13847 +/* vserver context data */
13848 +       struct vx_info *vx_info;
13849 +       struct nx_info *nx_info;
13850 +
13851 +       xid_t xid;
13852 +       nid_t nid;
13853 +
13854         seccomp_t seccomp;
13855  
13856  /* Thread group tracking */
13857 @@ -1020,13 +1036,19 @@ extern struct task_struct init_task;
13858  
13859  extern struct   mm_struct init_mm;
13860  
13861 -#define find_task_by_pid(nr)   find_task_by_pid_type(PIDTYPE_PID, nr)
13862 +
13863 +#define find_task_by_real_pid(nr) \
13864 +       find_task_by_pid_type(PIDTYPE_PID, nr)
13865 +#define find_task_by_pid(nr) \
13866 +       find_task_by_pid_type(PIDTYPE_PID, \
13867 +               vx_rmap_pid(nr))
13868 +
13869  extern struct task_struct *find_task_by_pid_type(int type, int pid);
13870  extern void set_special_pids(pid_t session, pid_t pgrp);
13871  extern void __set_special_pids(pid_t session, pid_t pgrp);
13872  
13873  /* per-UID process charging. */
13874 -extern struct user_struct * alloc_uid(uid_t);
13875 +extern struct user_struct * alloc_uid(xid_t, uid_t);
13876  static inline struct user_struct *get_uid(struct user_struct *u)
13877  {
13878         atomic_inc(&u->__count);
13879 diff -NurpP --minimal linux-2.6.16.20/include/linux/security.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/security.h
13880 --- linux-2.6.16.20/include/linux/security.h    2006-04-09 13:49:57 +0200
13881 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/security.h       2006-04-26 19:07:00 +0200
13882 @@ -1102,7 +1102,7 @@ struct security_operations {
13883         int (*acct) (struct file * file);
13884         int (*sysctl) (struct ctl_table * table, int op);
13885         int (*capable) (struct task_struct * tsk, int cap);
13886 -       int (*quotactl) (int cmds, int type, int id, struct super_block * sb);
13887 +       int (*quotactl) (int cmds, int type, int id, struct dqhash *);
13888         int (*quota_on) (struct dentry * dentry);
13889         int (*syslog) (int type);
13890         int (*settime) (struct timespec *ts, struct timezone *tz);
13891 @@ -1357,9 +1357,9 @@ static inline int security_sysctl(struct
13892  }
13893  
13894  static inline int security_quotactl (int cmds, int type, int id,
13895 -                                    struct super_block *sb)
13896 +                                    struct dqhash *hash)
13897  {
13898 -       return security_ops->quotactl (cmds, type, id, sb);
13899 +       return security_ops->quotactl (cmds, type, id, hash);
13900  }
13901  
13902  static inline int security_quota_on (struct dentry * dentry)
13903 @@ -2060,7 +2060,7 @@ static inline int security_sysctl(struct
13904  }
13905  
13906  static inline int security_quotactl (int cmds, int type, int id,
13907 -                                    struct super_block * sb)
13908 +                                    struct dqhash * hash)
13909  {
13910         return 0;
13911  }
13912 diff -NurpP --minimal linux-2.6.16.20/include/linux/shmem_fs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/shmem_fs.h
13913 --- linux-2.6.16.20/include/linux/shmem_fs.h    2006-04-09 13:49:57 +0200
13914 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/shmem_fs.h       2006-04-26 19:07:00 +0200
13915 @@ -8,6 +8,9 @@
13916  
13917  #define SHMEM_NR_DIRECT 16
13918  
13919 +#define TMPFS_SUPER_MAGIC      0x01021994
13920 +
13921 +
13922  struct shmem_inode_info {
13923         spinlock_t              lock;
13924         unsigned long           flags;
13925 diff -NurpP --minimal linux-2.6.16.20/include/linux/stat.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/stat.h
13926 --- linux-2.6.16.20/include/linux/stat.h        2004-08-14 12:55:10 +0200
13927 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/stat.h   2006-04-26 19:07:00 +0200
13928 @@ -63,6 +63,7 @@ struct kstat {
13929         unsigned int    nlink;
13930         uid_t           uid;
13931         gid_t           gid;
13932 +       tag_t           tag;
13933         dev_t           rdev;
13934         loff_t          size;
13935         struct timespec  atime;
13936 diff -NurpP --minimal linux-2.6.16.20/include/linux/sunrpc/auth.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/sunrpc/auth.h
13937 --- linux-2.6.16.20/include/linux/sunrpc/auth.h 2006-02-18 14:40:35 +0100
13938 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/sunrpc/auth.h    2006-04-26 19:07:00 +0200
13939 @@ -28,6 +28,7 @@
13940  struct auth_cred {
13941         uid_t   uid;
13942         gid_t   gid;
13943 +       tag_t   tag;
13944         struct group_info *group_info;
13945  };
13946  
13947 diff -NurpP --minimal linux-2.6.16.20/include/linux/sunrpc/clnt.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/sunrpc/clnt.h
13948 --- linux-2.6.16.20/include/linux/sunrpc/clnt.h 2006-02-18 14:40:35 +0100
13949 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/sunrpc/clnt.h    2006-04-26 19:07:00 +0200
13950 @@ -51,7 +51,8 @@ struct rpc_clnt {
13951                                 cl_intr     : 1,/* interruptible */
13952                                 cl_autobind : 1,/* use getport() */
13953                                 cl_oneshot  : 1,/* dispose after use */
13954 -                               cl_dead     : 1;/* abandoned */
13955 +                               cl_dead     : 1,/* abandoned */
13956 +                               cl_tag      : 1;/* context tagging */
13957  
13958         struct rpc_rtt *        cl_rtt;         /* RTO estimator data */
13959         struct rpc_portmap *    cl_pmap;        /* port mapping */
13960 diff -NurpP --minimal linux-2.6.16.20/include/linux/syscalls.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/syscalls.h
13961 --- linux-2.6.16.20/include/linux/syscalls.h    2006-04-09 13:49:57 +0200
13962 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/syscalls.h       2006-04-26 19:07:00 +0200
13963 @@ -293,6 +293,8 @@ asmlinkage long sys_symlink(const char _
13964  asmlinkage long sys_unlink(const char __user *pathname);
13965  asmlinkage long sys_rename(const char __user *oldname,
13966                                 const char __user *newname);
13967 +asmlinkage long sys_copyfile(const char __user *from, const char __user *to,
13968 +                               umode_t mode);
13969  asmlinkage long sys_chmod(const char __user *filename, mode_t mode);
13970  asmlinkage long sys_fchmod(unsigned int fd, mode_t mode);
13971  
13972 diff -NurpP --minimal linux-2.6.16.20/include/linux/sysctl.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/sysctl.h
13973 --- linux-2.6.16.20/include/linux/sysctl.h      2006-04-09 13:49:57 +0200
13974 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/sysctl.h 2006-04-26 19:07:00 +0200
13975 @@ -93,6 +93,7 @@ enum
13976         KERN_CAP_BSET=14,       /* int: capability bounding set */
13977         KERN_PANIC=15,          /* int: panic timeout */
13978         KERN_REALROOTDEV=16,    /* real root device to mount after initrd */
13979 +       KERN_VSHELPER=17,       /* string: path to vshelper policy agent */
13980  
13981         KERN_SPARC_REBOOT=21,   /* reboot command on Sparc */
13982         KERN_CTLALTDEL=22,      /* int: allow ctl-alt-del to reboot */
13983 @@ -882,6 +883,9 @@ typedef int ctl_handler (ctl_table *tabl
13984  typedef int proc_handler (ctl_table *ctl, int write, struct file * filp,
13985                           void __user *buffer, size_t *lenp, loff_t *ppos);
13986  
13987 +typedef int virt_handler (struct ctl_table *ctl, int write, xid_t xid,
13988 +                         void **datap, size_t *lenp);
13989 +
13990  extern int proc_dostring(ctl_table *, int, struct file *,
13991                          void __user *, size_t *, loff_t *);
13992  extern int proc_dointvec(ctl_table *, int, struct file *,
13993 @@ -963,6 +967,7 @@ struct ctl_table 
13994         mode_t mode;
13995         ctl_table *child;
13996         proc_handler *proc_handler;     /* Callback for text formatting */
13997 +       virt_handler *virt_handler;     /* Context virtualization */
13998         ctl_handler *strategy;          /* Callback function for all r/w */
13999         struct proc_dir_entry *de;      /* /proc control block */
14000         void *extra1;
14001 diff -NurpP --minimal linux-2.6.16.20/include/linux/sysfs.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/sysfs.h
14002 --- linux-2.6.16.20/include/linux/sysfs.h       2005-08-29 22:25:42 +0200
14003 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/sysfs.h  2006-04-26 19:07:00 +0200
14004 @@ -12,6 +12,8 @@
14005  
14006  #include <asm/atomic.h>
14007  
14008 +#define SYSFS_SUPER_MAGIC      0x62656572
14009 +
14010  struct kobject;
14011  struct module;
14012  
14013 diff -NurpP --minimal linux-2.6.16.20/include/linux/time.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/time.h
14014 --- linux-2.6.16.20/include/linux/time.h        2006-02-15 13:54:35 +0100
14015 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/time.h   2006-05-29 17:31:53 +0200
14016 @@ -147,6 +147,8 @@ extern struct timespec ns_to_timespec(co
14017   */
14018  extern struct timeval ns_to_timeval(const nsec_t nsec);
14019  
14020 +#include <linux/vs_time.h>
14021 +
14022  #endif /* __KERNEL__ */
14023  
14024  #define NFDBITS                        __NFDBITS
14025 diff -NurpP --minimal linux-2.6.16.20/include/linux/types.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/types.h
14026 --- linux-2.6.16.20/include/linux/types.h       2006-02-18 14:40:35 +0100
14027 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/types.h  2006-04-26 19:07:00 +0200
14028 @@ -38,6 +38,9 @@ typedef __kernel_uid32_t      uid_t;
14029  typedef __kernel_gid32_t       gid_t;
14030  typedef __kernel_uid16_t        uid16_t;
14031  typedef __kernel_gid16_t        gid16_t;
14032 +typedef unsigned int           xid_t;
14033 +typedef unsigned int           nid_t;
14034 +typedef unsigned int           tag_t;
14035  
14036  #ifdef CONFIG_UID16
14037  /* This is defined by include/asm-{arch}/posix_types.h */
14038 diff -NurpP --minimal linux-2.6.16.20/include/linux/vroot.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vroot.h
14039 --- linux-2.6.16.20/include/linux/vroot.h       1970-01-01 01:00:00 +0100
14040 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vroot.h  2006-04-26 19:07:00 +0200
14041 @@ -0,0 +1,51 @@
14042 +
14043 +/*
14044 + * include/linux/vroot.h
14045 + *
14046 + * written by Herbert Pötzl, 9/11/2002
14047 + * ported to 2.6 by Herbert Pötzl, 30/12/2004
14048 + *
14049 + * Copyright (C) 2002-2005 by Herbert Pötzl.
14050 + * Redistribution of this file is permitted under the
14051 + * GNU General Public License.
14052 + */
14053 +
14054 +#ifndef _LINUX_VROOT_H
14055 +#define _LINUX_VROOT_H
14056 +
14057 +
14058 +#ifdef __KERNEL__
14059 +
14060 +/* Possible states of device */
14061 +enum {
14062 +       Vr_unbound,
14063 +       Vr_bound,
14064 +};
14065 +
14066 +struct vroot_device {
14067 +       int             vr_number;
14068 +       int             vr_refcnt;
14069 +
14070 +       struct semaphore        vr_ctl_mutex;
14071 +       struct block_device    *vr_device;
14072 +       int                     vr_state;
14073 +};
14074 +
14075 +
14076 +typedef struct block_device *(vroot_grb_func)(struct block_device *);
14077 +
14078 +extern int register_vroot_grb(vroot_grb_func *);
14079 +extern int unregister_vroot_grb(vroot_grb_func *);
14080 +
14081 +#endif /* __KERNEL__ */
14082 +
14083 +#define MAX_VROOT_DEFAULT      8
14084 +
14085 +/*
14086 + * IOCTL commands --- we will commandeer 0x56 ('V')
14087 + */
14088 +
14089 +#define VROOT_SET_DEV          0x5600
14090 +#define VROOT_CLR_DEV          0x5601
14091 +
14092 +#endif /* _LINUX_VROOT_H */
14093 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_base.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_base.h
14094 --- linux-2.6.16.20/include/linux/vs_base.h     1970-01-01 01:00:00 +0100
14095 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_base.h        2006-04-27 21:24:37 +0200
14096 @@ -0,0 +1,130 @@
14097 +#ifndef _VX_VS_BASE_H
14098 +#define _VX_VS_BASE_H
14099 +
14100 +#include "vserver/context.h"
14101 +
14102 +
14103 +#define vx_task_xid(t) ((t)->xid)
14104 +
14105 +#define vx_current_xid() vx_task_xid(current)
14106 +
14107 +#define vx_check(c,m)  __vx_check(vx_current_xid(),c,m)
14108 +
14109 +#define vx_weak_check(c,m)     ((m) ? vx_check(c,m) : 1)
14110 +
14111 +
14112 +/*
14113 + * check current context for ADMIN/WATCH and
14114 + * optionally against supplied argument
14115 + */
14116 +static inline int __vx_check(xid_t cid, xid_t id, unsigned int mode)
14117 +{
14118 +       if (mode & VX_ARG_MASK) {
14119 +               if ((mode & VX_IDENT) &&
14120 +                       (id == cid))
14121 +                       return 1;
14122 +       }
14123 +       if (mode & VX_ATR_MASK) {
14124 +               if ((mode & VX_DYNAMIC) &&
14125 +                       (id >= MIN_D_CONTEXT) &&
14126 +                       (id <= MAX_S_CONTEXT))
14127 +                       return 1;
14128 +               if ((mode & VX_STATIC) &&
14129 +                       (id > 1) && (id < MIN_D_CONTEXT))
14130 +                       return 1;
14131 +       }
14132 +       return (((mode & VX_ADMIN) && (cid == 0)) ||
14133 +               ((mode & VX_WATCH) && (cid == 1)) ||
14134 +               ((mode & VX_HOSTID) && (id == 0)));
14135 +}
14136 +
14137 +
14138 +#define __vx_state(v)  ((v) ? ((v)->vx_state) : 0)
14139 +
14140 +#define vx_info_state(v,m)     (__vx_state(v) & (m))
14141 +
14142 +
14143 +/* generic flag merging */
14144 +
14145 +#define vx_check_flags(v,m,f)  (((v) & (m)) ^ (f))
14146 +
14147 +#define vx_mask_flags(v,f,m)   (((v) & ~(m)) | ((f) & (m)))
14148 +
14149 +#define vx_mask_mask(v,f,m)    (((v) & ~(m)) | ((v) & (f) & (m)))
14150 +
14151 +#define vx_check_bit(v,n)      ((v) & (1LL << (n)))
14152 +
14153 +
14154 +/* context flags */
14155 +
14156 +#define __vx_flags(v)  ((v) ? (v)->vx_flags : 0)
14157 +
14158 +#define vx_current_flags()     __vx_flags(current->vx_info)
14159 +
14160 +#define vx_info_flags(v,m,f) \
14161 +       vx_check_flags(__vx_flags(v),(m),(f))
14162 +
14163 +#define task_vx_flags(t,m,f) \
14164 +       ((t) && vx_info_flags((t)->vx_info, (m), (f)))
14165 +
14166 +#define vx_flags(m,f)  vx_info_flags(current->vx_info,(m),(f))
14167 +
14168 +
14169 +/* context caps */
14170 +
14171 +#define __vx_ccaps(v)  ((v) ? (v)->vx_ccaps : 0)
14172 +
14173 +#define vx_current_ccaps()     __vx_ccaps(current->vx_info)
14174 +
14175 +#define vx_info_ccaps(v,c)     (__vx_ccaps(v) & (c))
14176 +
14177 +#define vx_ccaps(c)    vx_info_ccaps(current->vx_info,(c))
14178 +
14179 +
14180 +#define __vx_mcaps(v)  ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
14181 +
14182 +#define vx_info_mcaps(v,c)     (__vx_mcaps(v) & (c))
14183 +
14184 +#define vx_mcaps(c)    vx_info_mcaps(current->vx_info,(c))
14185 +
14186 +
14187 +/* context bcap mask */
14188 +
14189 +#define __vx_bcaps(v)  ((v) ? (v)->vx_bcaps : ~0 )
14190 +
14191 +#define vx_current_bcaps()     __vx_bcaps(current->vx_info)
14192 +
14193 +#define vx_info_bcaps(v,c)     (__vx_bcaps(v) & (c))
14194 +
14195 +#define vx_bcaps(c)    vx_info_bcaps(current->vx_info,(c))
14196 +
14197 +
14198 +#define vx_info_cap_bset(v)    ((v) ? (v)->vx_cap_bset : cap_bset)
14199 +
14200 +#define vx_current_cap_bset()  vx_info_cap_bset(current->vx_info)
14201 +
14202 +
14203 +#define __vx_info_mbcap(v,b) \
14204 +       (!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
14205 +       vx_info_bcaps(v, b) : (b))
14206 +
14207 +#define vx_info_mbcap(v,b)     __vx_info_mbcap(v,cap_t(b))
14208 +
14209 +#define task_vx_mbcap(t,b) \
14210 +       vx_info_mbcap((t)->vx_info, (t)->b)
14211 +
14212 +#define vx_mbcap(b)    task_vx_mbcap(current,b)
14213 +
14214 +#define vx_cap_raised(v,c,f)   (vx_info_mbcap(v,c) & CAP_TO_MASK(f))
14215 +
14216 +#define vx_capable(b,c) (capable(b) || \
14217 +       (cap_raised(current->cap_effective,b) && vx_ccaps(c)))
14218 +
14219 +
14220 +#define vx_current_initpid(n) \
14221 +       (current->vx_info && \
14222 +       (current->vx_info->vx_initpid == (n)))
14223 +
14224 +#else
14225 +#warning duplicate inclusion
14226 +#endif
14227 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_context.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_context.h
14228 --- linux-2.6.16.20/include/linux/vs_context.h  1970-01-01 01:00:00 +0100
14229 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_context.h     2006-04-28 03:25:06 +0200
14230 @@ -0,0 +1,241 @@
14231 +#ifndef _VX_VS_CONTEXT_H
14232 +#define _VX_VS_CONTEXT_H
14233 +
14234 +#include <linux/kernel.h>
14235 +#include "vserver/debug.h"
14236 +#include "vserver/history.h"
14237 +
14238 +
14239 +#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__,__HERE__)
14240 +
14241 +static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
14242 +       const char *_file, int _line, void *_here)
14243 +{
14244 +       if (!vxi)
14245 +               return NULL;
14246 +
14247 +       vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
14248 +               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
14249 +               _file, _line);
14250 +       __vxh_get_vx_info(vxi, _here);
14251 +
14252 +       atomic_inc(&vxi->vx_usecnt);
14253 +       return vxi;
14254 +}
14255 +
14256 +
14257 +extern void free_vx_info(struct vx_info *);
14258 +
14259 +#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__,__HERE__)
14260 +
14261 +static inline void __put_vx_info(struct vx_info *vxi,
14262 +       const char *_file, int _line, void *_here)
14263 +{
14264 +       if (!vxi)
14265 +               return;
14266 +
14267 +       vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
14268 +               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
14269 +               _file, _line);
14270 +       __vxh_put_vx_info(vxi, _here);
14271 +
14272 +       if (atomic_dec_and_test(&vxi->vx_usecnt))
14273 +               free_vx_info(vxi);
14274 +}
14275 +
14276 +
14277 +#define init_vx_info(p,i) __init_vx_info(p,i,__FILE__,__LINE__,__HERE__)
14278 +
14279 +static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
14280 +       const char *_file, int _line, void *_here)
14281 +{
14282 +       if (vxi) {
14283 +               vxlprintk(VXD_CBIT(xid, 3),
14284 +                       "init_vx_info(%p[#%d.%d])",
14285 +                       vxi, vxi?vxi->vx_id:0,
14286 +                       vxi?atomic_read(&vxi->vx_usecnt):0,
14287 +                       _file, _line);
14288 +               __vxh_init_vx_info(vxi, vxp, _here);
14289 +
14290 +               atomic_inc(&vxi->vx_usecnt);
14291 +       }
14292 +       *vxp = vxi;
14293 +}
14294 +
14295 +
14296 +#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__,__HERE__)
14297 +
14298 +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
14299 +       const char *_file, int _line, void *_here)
14300 +{
14301 +       struct vx_info *vxo;
14302 +
14303 +       if (!vxi)
14304 +               return;
14305 +
14306 +       vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
14307 +               vxi, vxi?vxi->vx_id:0,
14308 +               vxi?atomic_read(&vxi->vx_usecnt):0,
14309 +               _file, _line);
14310 +       __vxh_set_vx_info(vxi, vxp, _here);
14311 +
14312 +       atomic_inc(&vxi->vx_usecnt);
14313 +       vxo = xchg(vxp, vxi);
14314 +       BUG_ON(vxo);
14315 +}
14316 +
14317 +
14318 +#define clr_vx_info(p) __clr_vx_info(p,__FILE__,__LINE__,__HERE__)
14319 +
14320 +static inline void __clr_vx_info(struct vx_info **vxp,
14321 +       const char *_file, int _line, void *_here)
14322 +{
14323 +       struct vx_info *vxo;
14324 +
14325 +       vxo = xchg(vxp, NULL);
14326 +       if (!vxo)
14327 +               return;
14328 +
14329 +       vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
14330 +               vxo, vxo?vxo->vx_id:0,
14331 +               vxo?atomic_read(&vxo->vx_usecnt):0,
14332 +               _file, _line);
14333 +       __vxh_clr_vx_info(vxo, vxp, _here);
14334 +
14335 +       if (atomic_dec_and_test(&vxo->vx_usecnt))
14336 +               free_vx_info(vxo);
14337 +}
14338 +
14339 +
14340 +#define claim_vx_info(v,p) \
14341 +       __claim_vx_info(v,p,__FILE__,__LINE__,__HERE__)
14342 +
14343 +static inline void __claim_vx_info(struct vx_info *vxi,
14344 +       struct task_struct *task,
14345 +       const char *_file, int _line, void *_here)
14346 +{
14347 +       vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
14348 +               vxi, vxi?vxi->vx_id:0,
14349 +               vxi?atomic_read(&vxi->vx_usecnt):0,
14350 +               vxi?atomic_read(&vxi->vx_tasks):0,
14351 +               task, _file, _line);
14352 +       __vxh_claim_vx_info(vxi, task, _here);
14353 +
14354 +       atomic_inc(&vxi->vx_tasks);
14355 +}
14356 +
14357 +
14358 +extern void unhash_vx_info(struct vx_info *);
14359 +
14360 +#define release_vx_info(v,p) \
14361 +       __release_vx_info(v,p,__FILE__,__LINE__,__HERE__)
14362 +
14363 +static inline void __release_vx_info(struct vx_info *vxi,
14364 +       struct task_struct *task,
14365 +       const char *_file, int _line, void *_here)
14366 +{
14367 +       vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
14368 +               vxi, vxi?vxi->vx_id:0,
14369 +               vxi?atomic_read(&vxi->vx_usecnt):0,
14370 +               vxi?atomic_read(&vxi->vx_tasks):0,
14371 +               task, _file, _line);
14372 +       __vxh_release_vx_info(vxi, task, _here);
14373 +
14374 +       might_sleep();
14375 +
14376 +       if (atomic_dec_and_test(&vxi->vx_tasks))
14377 +               unhash_vx_info(vxi);
14378 +}
14379 +
14380 +
14381 +#define task_get_vx_info(p) \
14382 +       __task_get_vx_info(p,__FILE__,__LINE__,__HERE__)
14383 +
14384 +static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
14385 +       const char *_file, int _line, void *_here)
14386 +{
14387 +       struct vx_info *vxi;
14388 +
14389 +       task_lock(p);
14390 +       vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
14391 +               p, _file, _line);
14392 +       vxi = __get_vx_info(p->vx_info, _file, _line, _here);
14393 +       task_unlock(p);
14394 +       return vxi;
14395 +}
14396 +
14397 +
14398 +static inline void __wakeup_vx_info(struct vx_info *vxi)
14399 +{
14400 +       if (waitqueue_active(&vxi->vx_wait))
14401 +               wake_up_interruptible(&vxi->vx_wait);
14402 +}
14403 +
14404 +
14405 +#define enter_vx_info(v,s)     __enter_vx_info(v,s,__FILE__,__LINE__)
14406 +
14407 +static inline void __enter_vx_info(struct vx_info *vxi,
14408 +       struct vx_info_save *vxis, const char *_file, int _line)
14409 +{
14410 +       vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
14411 +               vxi, vxi ? vxi->vx_id : 0, vxis, current,
14412 +               current->xid, current->vx_info, _file, _line);
14413 +       vxis->vxi = xchg(&current->vx_info, vxi);
14414 +       vxis->xid = current->xid;
14415 +       current->xid = vxi ? vxi->vx_id : 0;
14416 +}
14417 +
14418 +#define leave_vx_info(s)       __leave_vx_info(s,__FILE__,__LINE__)
14419 +
14420 +static inline void __leave_vx_info(struct vx_info_save *vxis,
14421 +       const char *_file, int _line)
14422 +{
14423 +       vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
14424 +               vxis, vxis->xid, vxis->vxi, current,
14425 +               current->xid, current->vx_info, _file, _line);
14426 +       (void)xchg(&current->vx_info, vxis->vxi);
14427 +       current->xid = vxis->xid;
14428 +}
14429 +
14430 +
14431 +static inline void __enter_vx_admin(struct vx_info_save *vxis)
14432 +{
14433 +       vxis->vxi = xchg(&current->vx_info, NULL);
14434 +       vxis->xid = current->xid;
14435 +       current->xid = 0;
14436 +}
14437 +
14438 +static inline void __leave_vx_admin(struct vx_info_save *vxis)
14439 +{
14440 +       if (vxis->vxi)
14441 +               (void)xchg(&current->vx_info, vxis->vxi);
14442 +       current->xid = vxis->xid;
14443 +}
14444 +
14445 +extern void exit_vx_info(struct task_struct *, int);
14446 +
14447 +
14448 +static inline
14449 +struct task_struct *vx_child_reaper(struct task_struct *p)
14450 +{
14451 +       struct vx_info *vxi = p->vx_info;
14452 +       struct task_struct *reaper = child_reaper;
14453 +
14454 +       if (!vxi)
14455 +               goto out;
14456 +
14457 +       BUG_ON(!p->vx_info->vx_reaper);
14458 +
14459 +       /* child reaper for the guest reaper */
14460 +       if (vxi->vx_reaper == p)
14461 +               goto out;
14462 +
14463 +       reaper = vxi->vx_reaper;
14464 +out:
14465 +       return reaper;
14466 +}
14467 +
14468 +
14469 +#else
14470 +#warning duplicate inclusion
14471 +#endif
14472 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_cvirt.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_cvirt.h
14473 --- linux-2.6.16.20/include/linux/vs_cvirt.h    1970-01-01 01:00:00 +0100
14474 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_cvirt.h       2006-04-26 19:07:00 +0200
14475 @@ -0,0 +1,59 @@
14476 +#ifndef _VX_VS_CVIRT_H
14477 +#define _VX_VS_CVIRT_H
14478 +
14479 +#include "vserver/cvirt.h"
14480 +#include "vserver/debug.h"
14481 +
14482 +
14483 +/* utsname virtualization */
14484 +
14485 +static inline struct new_utsname *vx_new_utsname(void)
14486 +{
14487 +       if (current->vx_info)
14488 +               return &current->vx_info->cvirt.utsname;
14489 +       return &system_utsname;
14490 +}
14491 +
14492 +#define vx_new_uts(x)          ((vx_new_utsname())->x)
14493 +
14494 +
14495 +static inline void vx_activate_task(struct task_struct *p)
14496 +{
14497 +       struct vx_info *vxi;
14498 +
14499 +       if ((vxi = p->vx_info)) {
14500 +               vx_update_load(vxi);
14501 +               atomic_inc(&vxi->cvirt.nr_running);
14502 +       }
14503 +}
14504 +
14505 +static inline void vx_deactivate_task(struct task_struct *p)
14506 +{
14507 +       struct vx_info *vxi;
14508 +
14509 +       if ((vxi = p->vx_info)) {
14510 +               vx_update_load(vxi);
14511 +               atomic_dec(&vxi->cvirt.nr_running);
14512 +       }
14513 +}
14514 +
14515 +static inline void vx_uninterruptible_inc(struct task_struct *p)
14516 +{
14517 +       struct vx_info *vxi;
14518 +
14519 +       if ((vxi = p->vx_info))
14520 +               atomic_inc(&vxi->cvirt.nr_uninterruptible);
14521 +}
14522 +
14523 +static inline void vx_uninterruptible_dec(struct task_struct *p)
14524 +{
14525 +       struct vx_info *vxi;
14526 +
14527 +       if ((vxi = p->vx_info))
14528 +               atomic_dec(&vxi->cvirt.nr_uninterruptible);
14529 +}
14530 +
14531 +
14532 +#else
14533 +#warning duplicate inclusion
14534 +#endif
14535 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_dlimit.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_dlimit.h
14536 --- linux-2.6.16.20/include/linux/vs_dlimit.h   1970-01-01 01:00:00 +0100
14537 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_dlimit.h      2006-04-26 19:07:00 +0200
14538 @@ -0,0 +1,213 @@
14539 +#ifndef _VX_VS_DLIMIT_H
14540 +#define _VX_VS_DLIMIT_H
14541 +
14542 +#include "vserver/dlimit.h"
14543 +#include "vserver/debug.h"
14544 +
14545 +
14546 +#define get_dl_info(i) __get_dl_info(i,__FILE__,__LINE__)
14547 +
14548 +static inline struct dl_info *__get_dl_info(struct dl_info *dli,
14549 +       const char *_file, int _line)
14550 +{
14551 +       if (!dli)
14552 +               return NULL;
14553 +       vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
14554 +               dli, dli?dli->dl_tag:0, dli?atomic_read(&dli->dl_usecnt):0,
14555 +               _file, _line);
14556 +       atomic_inc(&dli->dl_usecnt);
14557 +       return dli;
14558 +}
14559 +
14560 +
14561 +#define free_dl_info(i) \
14562 +       call_rcu(&i->dl_rcu, rcu_free_dl_info);
14563 +
14564 +#define put_dl_info(i) __put_dl_info(i,__FILE__,__LINE__)
14565 +
14566 +static inline void __put_dl_info(struct dl_info *dli,
14567 +       const char *_file, int _line)
14568 +{
14569 +       if (!dli)
14570 +               return;
14571 +       vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
14572 +               dli, dli?dli->dl_tag:0, dli?atomic_read(&dli->dl_usecnt):0,
14573 +               _file, _line);
14574 +       if (atomic_dec_and_test(&dli->dl_usecnt))
14575 +               free_dl_info(dli);
14576 +}
14577 +
14578 +
14579 +#define __dlimit_char(d)       ((d)?'*':' ')
14580 +
14581 +static inline int __dl_alloc_space(struct super_block *sb,
14582 +       tag_t tag, dlsize_t nr, const char *file, int line)
14583 +{
14584 +       struct dl_info *dli = NULL;
14585 +       int ret = 0;
14586 +
14587 +       if (nr == 0)
14588 +               goto out;
14589 +       dli = locate_dl_info(sb, tag);
14590 +       if (!dli)
14591 +               goto out;
14592 +
14593 +       spin_lock(&dli->dl_lock);
14594 +       ret = (dli->dl_space_used + nr > dli->dl_space_total);
14595 +       if (!ret)
14596 +               dli->dl_space_used += nr;
14597 +       spin_unlock(&dli->dl_lock);
14598 +       put_dl_info(dli);
14599 +out:
14600 +       vxlprintk(VXD_CBIT(dlim, 1),
14601 +               "ALLOC (%p,#%d)%c %lld bytes (%d)",
14602 +               sb, tag, __dlimit_char(dli), (long long)nr,
14603 +               ret, file, line);
14604 +       return ret;
14605 +}
14606 +
14607 +static inline void __dl_free_space(struct super_block *sb,
14608 +       tag_t tag, dlsize_t nr, const char *_file, int _line)
14609 +{
14610 +       struct dl_info *dli = NULL;
14611 +
14612 +       if (nr == 0)
14613 +               goto out;
14614 +       dli = locate_dl_info(sb, tag);
14615 +       if (!dli)
14616 +               goto out;
14617 +
14618 +       spin_lock(&dli->dl_lock);
14619 +       if (dli->dl_space_used > nr)
14620 +               dli->dl_space_used -= nr;
14621 +       else
14622 +               dli->dl_space_used = 0;
14623 +       spin_unlock(&dli->dl_lock);
14624 +       put_dl_info(dli);
14625 +out:
14626 +       vxlprintk(VXD_CBIT(dlim, 1),
14627 +               "FREE  (%p,#%d)%c %lld bytes",
14628 +               sb, tag, __dlimit_char(dli), (long long)nr,
14629 +               _file, _line);
14630 +}
14631 +
14632 +static inline int __dl_alloc_inode(struct super_block *sb,
14633 +       tag_t tag, const char *_file, int _line)
14634 +{
14635 +       struct dl_info *dli;
14636 +       int ret = 0;
14637 +
14638 +       dli = locate_dl_info(sb, tag);
14639 +       if (!dli)
14640 +               goto out;
14641 +
14642 +       spin_lock(&dli->dl_lock);
14643 +       ret = (dli->dl_inodes_used >= dli->dl_inodes_total);
14644 +       if (!ret)
14645 +               dli->dl_inodes_used++;
14646 +#if 0
14647 +       else
14648 +               vxwprintk("DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d",
14649 +                       sb, tag,
14650 +                       dli->dl_inodes_used, dli->dl_inodes_total,
14651 +                       file, line);
14652 +#endif
14653 +       spin_unlock(&dli->dl_lock);
14654 +       put_dl_info(dli);
14655 +out:
14656 +       vxlprintk(VXD_CBIT(dlim, 0),
14657 +               "ALLOC (%p,#%d)%c inode (%d)",
14658 +               sb, tag, __dlimit_char(dli), ret, _file, _line);
14659 +       return ret;
14660 +}
14661 +
14662 +static inline void __dl_free_inode(struct super_block *sb,
14663 +       tag_t tag, const char *_file, int _line)
14664 +{
14665 +       struct dl_info *dli;
14666 +
14667 +       dli = locate_dl_info(sb, tag);
14668 +       if (!dli)
14669 +               goto out;
14670 +
14671 +       spin_lock(&dli->dl_lock);
14672 +       if (dli->dl_inodes_used > 1)
14673 +               dli->dl_inodes_used--;
14674 +       else
14675 +               dli->dl_inodes_used = 0;
14676 +       spin_unlock(&dli->dl_lock);
14677 +       put_dl_info(dli);
14678 +out:
14679 +       vxlprintk(VXD_CBIT(dlim, 0),
14680 +               "FREE  (%p,#%d)%c inode",
14681 +               sb, tag, __dlimit_char(dli), _file, _line);
14682 +}
14683 +
14684 +static inline void __dl_adjust_block(struct super_block *sb, tag_t tag,
14685 +       unsigned int *free_blocks, unsigned int *root_blocks,
14686 +       const char *_file, int _line)
14687 +{
14688 +       struct dl_info *dli;
14689 +       uint64_t broot, bfree;
14690 +
14691 +       dli = locate_dl_info(sb, tag);
14692 +       if (!dli)
14693 +               return;
14694 +
14695 +       spin_lock(&dli->dl_lock);
14696 +       broot = (dli->dl_space_total -
14697 +               (dli->dl_space_total >> 10) * dli->dl_nrlmult)
14698 +               >> sb->s_blocksize_bits;
14699 +       bfree = (dli->dl_space_total - dli->dl_space_used)
14700 +                       >> sb->s_blocksize_bits;
14701 +       spin_unlock(&dli->dl_lock);
14702 +
14703 +       vxlprintk(VXD_CBIT(dlim, 2),
14704 +               "ADJUST: %lld,%lld on %d,%d [mult=%d]",
14705 +               (long long)bfree, (long long)broot,
14706 +               *free_blocks, *root_blocks, dli->dl_nrlmult,
14707 +               _file, _line);
14708 +       if (free_blocks) {
14709 +               if (*free_blocks > bfree)
14710 +                       *free_blocks = bfree;
14711 +       }
14712 +       if (root_blocks) {
14713 +               if (*root_blocks > broot)
14714 +                       *root_blocks = broot;
14715 +       }
14716 +       put_dl_info(dli);
14717 +}
14718 +
14719 +#define DLIMIT_ALLOC_SPACE(in, bytes) \
14720 +       __dl_alloc_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
14721 +               __FILE__, __LINE__ )
14722 +
14723 +#define DLIMIT_FREE_SPACE(in, bytes) \
14724 +       __dl_free_space((in)->i_sb, (in)->i_tag, (dlsize_t)(bytes), \
14725 +               __FILE__, __LINE__ )
14726 +
14727 +#define DLIMIT_ALLOC_BLOCK(in, nr) \
14728 +       __dl_alloc_space((in)->i_sb, (in)->i_tag, \
14729 +               ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \
14730 +               __FILE__, __LINE__ )
14731 +
14732 +#define DLIMIT_FREE_BLOCK(in, nr) \
14733 +       __dl_free_space((in)->i_sb, (in)->i_tag, \
14734 +               ((dlsize_t)(nr)) << (in)->i_sb->s_blocksize_bits, \
14735 +               __FILE__, __LINE__ )
14736 +
14737 +
14738 +#define DLIMIT_ALLOC_INODE(in) \
14739 +       __dl_alloc_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
14740 +
14741 +#define DLIMIT_FREE_INODE(in) \
14742 +       __dl_free_inode((in)->i_sb, (in)->i_tag, __FILE__, __LINE__ )
14743 +
14744 +
14745 +#define DLIMIT_ADJUST_BLOCK(sb, tag, fb, rb) \
14746 +       __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
14747 +
14748 +
14749 +#else
14750 +#warning duplicate inclusion
14751 +#endif
14752 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_limit.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_limit.h
14753 --- linux-2.6.16.20/include/linux/vs_limit.h    1970-01-01 01:00:00 +0100
14754 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_limit.h       2006-05-02 02:10:17 +0200
14755 @@ -0,0 +1,137 @@
14756 +#ifndef _VX_VS_LIMIT_H
14757 +#define _VX_VS_LIMIT_H
14758 +
14759 +#include "vserver/limit.h"
14760 +#include "vserver/debug.h"
14761 +#include "vserver/limit_int.h"
14762 +
14763 +
14764 +#define vx_acc_cres(v,d,p,r) \
14765 +       __vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
14766 +
14767 +#define vx_acc_cres_cond(x,d,p,r) \
14768 +       __vx_acc_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \
14769 +       r, d, p, __FILE__, __LINE__)
14770 +
14771 +
14772 +#define vx_add_cres(v,a,p,r) \
14773 +       __vx_add_cres(v, r, a, p, __FILE__, __LINE__)
14774 +#define vx_sub_cres(v,a,p,r)           vx_add_cres(v,-(a),p,r)
14775 +
14776 +#define vx_add_cres_cond(x,a,p,r) \
14777 +       __vx_add_cres(((x) == vx_current_xid()) ? current->vx_info : 0, \
14778 +       r, a, p, __FILE__, __LINE__)
14779 +#define vx_sub_cres_cond(x,a,p,r)      vx_add_cres_cond(x,-(a),p,r)
14780 +
14781 +
14782 +/* process and file limits */
14783 +
14784 +#define vx_nproc_inc(p) \
14785 +       vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
14786 +
14787 +#define vx_nproc_dec(p) \
14788 +       vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
14789 +
14790 +#define vx_files_inc(f) \
14791 +       vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
14792 +
14793 +#define vx_files_dec(f) \
14794 +       vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
14795 +
14796 +#define vx_locks_inc(l) \
14797 +       vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
14798 +
14799 +#define vx_locks_dec(l) \
14800 +       vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
14801 +
14802 +#define vx_openfd_inc(f) \
14803 +       vx_acc_cres(current->vx_info, 1, (void *)(long)(f), VLIMIT_OPENFD)
14804 +
14805 +#define vx_openfd_dec(f) \
14806 +       vx_acc_cres(current->vx_info,-1, (void *)(long)(f), VLIMIT_OPENFD)
14807 +
14808 +
14809 +#define vx_cres_avail(v,n,r) \
14810 +       __vx_cres_avail(v, r, n, __FILE__, __LINE__)
14811 +
14812 +
14813 +#define vx_nproc_avail(n) \
14814 +       vx_cres_avail(current->vx_info, n, RLIMIT_NPROC)
14815 +
14816 +#define vx_files_avail(n) \
14817 +       vx_cres_avail(current->vx_info, n, RLIMIT_NOFILE)
14818 +
14819 +#define vx_locks_avail(n) \
14820 +       vx_cres_avail(current->vx_info, n, RLIMIT_LOCKS)
14821 +
14822 +#define vx_openfd_avail(n) \
14823 +       vx_cres_avail(current->vx_info, n, VLIMIT_OPENFD)
14824 +
14825 +
14826 +/* dentry limits */
14827 +
14828 +#define vx_dentry_inc(d) do {                                          \
14829 +       if (atomic_read(&d->d_count) == 1)                              \
14830 +               vx_acc_cres(current->vx_info, 1, d, VLIMIT_DENTRY);     \
14831 +       } while (0)
14832 +
14833 +#define vx_dentry_dec(d) do {                                          \
14834 +       if (atomic_read(&d->d_count) == 0)                              \
14835 +               vx_acc_cres(current->vx_info,-1, d, VLIMIT_DENTRY);     \
14836 +       } while (0)
14837 +
14838 +#define vx_dentry_avail(n) \
14839 +       vx_cres_avail(current->vx_info, n, VLIMIT_DENTRY)
14840 +
14841 +
14842 +/* socket limits */
14843 +
14844 +#define vx_sock_inc(s) \
14845 +       vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
14846 +
14847 +#define vx_sock_dec(s) \
14848 +       vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
14849 +
14850 +#define vx_sock_avail(n) \
14851 +       vx_cres_avail(current->vx_info, n, VLIMIT_NSOCK)
14852 +
14853 +
14854 +/* ipc resource limits */
14855 +
14856 +#define vx_ipcmsg_add(v,u,a) \
14857 +       vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
14858 +
14859 +#define vx_ipcmsg_sub(v,u,a) \
14860 +       vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
14861 +
14862 +#define vx_ipcmsg_avail(v,a) \
14863 +       vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
14864 +
14865 +
14866 +#define vx_ipcshm_add(v,k,a) \
14867 +       vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
14868 +
14869 +#define vx_ipcshm_sub(v,k,a) \
14870 +       vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
14871 +
14872 +#define vx_ipcshm_avail(v,a) \
14873 +       vx_cres_avail(v, a, VLIMIT_SHMEM)
14874 +
14875 +
14876 +#define vx_semary_inc(a) \
14877 +       vx_acc_cres(current->vx_info, 1, a, VLIMIT_SEMARY)
14878 +
14879 +#define vx_semary_dec(a) \
14880 +       vx_acc_cres(current->vx_info,-1, a, VLIMIT_SEMARY)
14881 +
14882 +
14883 +#define vx_nsems_add(a,n) \
14884 +       vx_add_cres(current->vx_info, n, a, VLIMIT_NSEMS)
14885 +
14886 +#define vx_nsems_sub(a,n) \
14887 +       vx_sub_cres(current->vx_info, n, a, VLIMIT_NSEMS)
14888 +
14889 +
14890 +#else
14891 +#warning duplicate inclusion
14892 +#endif
14893 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_memory.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_memory.h
14894 --- linux-2.6.16.20/include/linux/vs_memory.h   1970-01-01 01:00:00 +0100
14895 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_memory.h      2006-04-26 19:07:00 +0200
14896 @@ -0,0 +1,149 @@
14897 +#ifndef _VX_VS_MEMORY_H
14898 +#define _VX_VS_MEMORY_H
14899 +
14900 +#include "vserver/limit.h"
14901 +#include "vserver/debug.h"
14902 +#include "vserver/limit_int.h"
14903 +
14904 +
14905 +#define __acc_add_long(a,v)    (*(v) += (a))
14906 +#define __acc_inc_long(v)      (++*(v))
14907 +#define __acc_dec_long(v)      (--*(v))
14908 +
14909 +#if    NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
14910 +#define __acc_add_atomic(a,v)  atomic_long_add(a,v)
14911 +#define __acc_inc_atomic(v)    atomic_long_inc(v)
14912 +#define __acc_dec_atomic(v)    atomic_long_dec(v)
14913 +#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
14914 +#define __acc_add_atomic(a,v)  __acc_add_long(a,v)
14915 +#define __acc_inc_atomic(v)    __acc_inc_long(v)
14916 +#define __acc_dec_atomic(v)    __acc_dec_long(v)
14917 +#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
14918 +
14919 +
14920 +#define vx_acc_page(m,d,v,r) do {                                      \
14921 +       if ((d) > 0)                                                    \
14922 +               __acc_inc_long(&(m->v));                                \
14923 +       else                                                            \
14924 +               __acc_dec_long(&(m->v));                                \
14925 +       __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__);      \
14926 +} while (0)
14927 +
14928 +#define vx_acc_page_atomic(m,d,v,r) do {                               \
14929 +       if ((d) > 0)                                                    \
14930 +               __acc_inc_atomic(&(m->v));                              \
14931 +       else                                                            \
14932 +               __acc_dec_atomic(&(m->v));                              \
14933 +       __vx_acc_cres(m->mm_vx_info, r, d, m, __FILE__, __LINE__);      \
14934 +} while (0)
14935 +
14936 +
14937 +#define vx_acc_pages(m,p,v,r) do {                                     \
14938 +       unsigned long __p = (p);                                        \
14939 +       __acc_add_long(__p, &(m->v));                                   \
14940 +       __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__);    \
14941 +} while (0)
14942 +
14943 +#define vx_acc_pages_atomic(m,p,v,r) do {                              \
14944 +       unsigned long __p = (p);                                        \
14945 +       __acc_add_atomic(__p, &(m->v));                                 \
14946 +       __vx_add_cres(m->mm_vx_info, r, __p, m, __FILE__, __LINE__);    \
14947 +} while (0)
14948 +
14949 +
14950 +
14951 +#define vx_acc_vmpage(m,d) \
14952 +       vx_acc_page(m, d, total_vm,  RLIMIT_AS)
14953 +#define vx_acc_vmlpage(m,d) \
14954 +       vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
14955 +#define vx_acc_file_rsspage(m,d) \
14956 +       vx_acc_page_atomic(m, d, _file_rss, RLIMIT_RSS)
14957 +#define vx_acc_anon_rsspage(m,d) \
14958 +       vx_acc_page_atomic(m, d, _anon_rss, VLIMIT_ANON)
14959 +
14960 +#define vx_acc_vmpages(m,p) \
14961 +       vx_acc_pages(m, p, total_vm,  RLIMIT_AS)
14962 +#define vx_acc_vmlpages(m,p) \
14963 +       vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
14964 +#define vx_acc_file_rsspages(m,p) \
14965 +       vx_acc_pages_atomic(m, p, _file_rss, RLIMIT_RSS)
14966 +#define vx_acc_anon_rsspages(m,p) \
14967 +       vx_acc_pages_atomic(m, p, _anon_rss, VLIMIT_ANON)
14968 +
14969 +#define vx_pages_add(s,r,p)    __vx_add_cres(s, r, p, 0, __FILE__, __LINE__)
14970 +#define vx_pages_sub(s,r,p)    vx_pages_add(s, r, -(p))
14971 +
14972 +#define vx_vmpages_inc(m)              vx_acc_vmpage(m, 1)
14973 +#define vx_vmpages_dec(m)              vx_acc_vmpage(m,-1)
14974 +#define vx_vmpages_add(m,p)            vx_acc_vmpages(m, p)
14975 +#define vx_vmpages_sub(m,p)            vx_acc_vmpages(m,-(p))
14976 +
14977 +#define vx_vmlocked_inc(m)             vx_acc_vmlpage(m, 1)
14978 +#define vx_vmlocked_dec(m)             vx_acc_vmlpage(m,-1)
14979 +#define vx_vmlocked_add(m,p)           vx_acc_vmlpages(m, p)
14980 +#define vx_vmlocked_sub(m,p)           vx_acc_vmlpages(m,-(p))
14981 +
14982 +#define vx_file_rsspages_inc(m)                vx_acc_file_rsspage(m, 1)
14983 +#define vx_file_rsspages_dec(m)                vx_acc_file_rsspage(m,-1)
14984 +#define vx_file_rsspages_add(m,p)      vx_acc_file_rsspages(m, p)
14985 +#define vx_file_rsspages_sub(m,p)      vx_acc_file_rsspages(m,-(p))
14986 +
14987 +#define vx_anon_rsspages_inc(m)                vx_acc_anon_rsspage(m, 1)
14988 +#define vx_anon_rsspages_dec(m)                vx_acc_anon_rsspage(m,-1)
14989 +#define vx_anon_rsspages_add(m,p)      vx_acc_anon_rsspages(m, p)
14990 +#define vx_anon_rsspages_sub(m,p)      vx_acc_anon_rsspages(m,-(p))
14991 +
14992 +
14993 +#define vx_pages_avail(m,p,r) \
14994 +       __vx_cres_avail((m)->mm_vx_info, r, p, __FILE__, __LINE__)
14995 +
14996 +#define vx_vmpages_avail(m,p)  vx_pages_avail(m, p, RLIMIT_AS)
14997 +#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK)
14998 +#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS)
14999 +#define vx_anonpages_avail(m,p)        vx_pages_avail(m, p, VLIMIT_ANON)
15000 +
15001 +enum {
15002 +       VXPT_UNKNOWN = 0,
15003 +       VXPT_ANON,
15004 +       VXPT_NONE,
15005 +       VXPT_FILE,
15006 +       VXPT_SWAP,
15007 +       VXPT_WRITE
15008 +};
15009 +
15010 +#if 0
15011 +#define        vx_page_fault(mm,vma,type,ret)
15012 +#else
15013 +
15014 +static inline
15015 +void __vx_page_fault(struct mm_struct *mm,
15016 +       struct vm_area_struct *vma, int type, int ret)
15017 +{
15018 +       struct vx_info *vxi = mm->mm_vx_info;
15019 +       int what;
15020 +/*
15021 +       static char *page_type[6] =
15022 +               { "UNKNOWN", "ANON","NONE", "FILE", "SWAP", "WRITE" };
15023 +       static char *page_what[4] =
15024 +               { "FAULT_OOM", "FAULT_SIGBUS", "FAULT_MINOR", "FAULT_MAJOR" };
15025 +*/
15026 +
15027 +       if (!vxi)
15028 +               return;
15029 +
15030 +       what = (ret & 0x3);
15031 +
15032 +/*     printk("[%d] page[%d][%d] %2x %s %s\n", vxi->vx_id,
15033 +               type, what, ret, page_type[type], page_what[what]);
15034 +*/
15035 +       if (ret & VM_FAULT_WRITE)
15036 +               what |= 0x4;
15037 +       atomic_inc(&vxi->cacct.page[type][what]);
15038 +}
15039 +
15040 +#define        vx_page_fault(mm,vma,type,ret)  __vx_page_fault(mm,vma,type,ret)
15041 +#endif
15042 +
15043 +#else
15044 +#warning duplicate inclusion
15045 +#endif
15046 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_network.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_network.h
15047 --- linux-2.6.16.20/include/linux/vs_network.h  1970-01-01 01:00:00 +0100
15048 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_network.h     2006-04-26 19:07:00 +0200
15049 @@ -0,0 +1,244 @@
15050 +#ifndef _NX_VS_NETWORK_H
15051 +#define _NX_VS_NETWORK_H
15052 +
15053 +#include "vserver/network.h"
15054 +#include "vserver/debug.h"
15055 +
15056 +
15057 +#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__)
15058 +
15059 +static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
15060 +       const char *_file, int _line)
15061 +{
15062 +       if (!nxi)
15063 +               return NULL;
15064 +
15065 +       vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
15066 +               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
15067 +               _file, _line);
15068 +
15069 +       atomic_inc(&nxi->nx_usecnt);
15070 +       return nxi;
15071 +}
15072 +
15073 +
15074 +extern void free_nx_info(struct nx_info *);
15075 +
15076 +#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__)
15077 +
15078 +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
15079 +{
15080 +       if (!nxi)
15081 +               return;
15082 +
15083 +       vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
15084 +               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
15085 +               _file, _line);
15086 +
15087 +       if (atomic_dec_and_test(&nxi->nx_usecnt))
15088 +               free_nx_info(nxi);
15089 +}
15090 +
15091 +
15092 +#define init_nx_info(p,i) __init_nx_info(p,i,__FILE__,__LINE__)
15093 +
15094 +static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
15095 +               const char *_file, int _line)
15096 +{
15097 +       if (nxi) {
15098 +               vxlprintk(VXD_CBIT(nid, 3),
15099 +                       "init_nx_info(%p[#%d.%d])",
15100 +                       nxi, nxi?nxi->nx_id:0,
15101 +                       nxi?atomic_read(&nxi->nx_usecnt):0,
15102 +                       _file, _line);
15103 +
15104 +               atomic_inc(&nxi->nx_usecnt);
15105 +       }
15106 +       *nxp = nxi;
15107 +}
15108 +
15109 +
15110 +#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__)
15111 +
15112 +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
15113 +       const char *_file, int _line)
15114 +{
15115 +       struct nx_info *nxo;
15116 +
15117 +       if (!nxi)
15118 +               return;
15119 +
15120 +       vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
15121 +               nxi, nxi?nxi->nx_id:0,
15122 +               nxi?atomic_read(&nxi->nx_usecnt):0,
15123 +               _file, _line);
15124 +
15125 +       atomic_inc(&nxi->nx_usecnt);
15126 +       nxo = xchg(nxp, nxi);
15127 +       BUG_ON(nxo);
15128 +}
15129 +
15130 +#define clr_nx_info(p) __clr_nx_info(p,__FILE__,__LINE__)
15131 +
15132 +static inline void __clr_nx_info(struct nx_info **nxp,
15133 +       const char *_file, int _line)
15134 +{
15135 +       struct nx_info *nxo;
15136 +
15137 +       nxo = xchg(nxp, NULL);
15138 +       if (!nxo)
15139 +               return;
15140 +
15141 +       vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
15142 +               nxo, nxo?nxo->nx_id:0,
15143 +               nxo?atomic_read(&nxo->nx_usecnt):0,
15144 +               _file, _line);
15145 +
15146 +       if (atomic_dec_and_test(&nxo->nx_usecnt))
15147 +               free_nx_info(nxo);
15148 +}
15149 +
15150 +
15151 +#define claim_nx_info(v,p) __claim_nx_info(v,p,__FILE__,__LINE__)
15152 +
15153 +static inline void __claim_nx_info(struct nx_info *nxi,
15154 +       struct task_struct *task, const char *_file, int _line)
15155 +{
15156 +       vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
15157 +               nxi, nxi?nxi->nx_id:0,
15158 +               nxi?atomic_read(&nxi->nx_usecnt):0,
15159 +               nxi?atomic_read(&nxi->nx_tasks):0,
15160 +               task, _file, _line);
15161 +
15162 +       atomic_inc(&nxi->nx_tasks);
15163 +}
15164 +
15165 +
15166 +extern void unhash_nx_info(struct nx_info *);
15167 +
15168 +#define release_nx_info(v,p) __release_nx_info(v,p,__FILE__,__LINE__)
15169 +
15170 +static inline void __release_nx_info(struct nx_info *nxi,
15171 +       struct task_struct *task, const char *_file, int _line)
15172 +{
15173 +       vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
15174 +               nxi, nxi?nxi->nx_id:0,
15175 +               nxi?atomic_read(&nxi->nx_usecnt):0,
15176 +               nxi?atomic_read(&nxi->nx_tasks):0,
15177 +               task, _file, _line);
15178 +
15179 +       might_sleep();
15180 +
15181 +       if (atomic_dec_and_test(&nxi->nx_tasks))
15182 +               unhash_nx_info(nxi);
15183 +}
15184 +
15185 +
15186 +#define task_get_nx_info(i)    __task_get_nx_info(i,__FILE__,__LINE__)
15187 +
15188 +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
15189 +       const char *_file, int _line)
15190 +{
15191 +       struct nx_info *nxi;
15192 +
15193 +       task_lock(p);
15194 +       vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
15195 +               p, _file, _line);
15196 +       nxi = __get_nx_info(p->nx_info, _file, _line);
15197 +       task_unlock(p);
15198 +       return nxi;
15199 +}
15200 +
15201 +
15202 +#define nx_task_nid(t) ((t)->nid)
15203 +
15204 +#define nx_current_nid() nx_task_nid(current)
15205 +
15206 +#define nx_check(c,m)  __nx_check(nx_current_nid(),c,m)
15207 +
15208 +#define nx_weak_check(c,m)     ((m) ? nx_check(c,m) : 1)
15209 +
15210 +
15211 +/*
15212 + * check current context for ADMIN/WATCH and
15213 + * optionally against supplied argument
15214 + */
15215 +static inline int __nx_check(nid_t cid, nid_t id, unsigned int mode)
15216 +{
15217 +       if (mode & NX_ARG_MASK) {
15218 +               if ((mode & NX_IDENT) &&
15219 +                       (id == cid))
15220 +                       return 1;
15221 +       }
15222 +       if (mode & NX_ATR_MASK) {
15223 +               if ((mode & NX_DYNAMIC) &&
15224 +                       (id >= MIN_D_CONTEXT) &&
15225 +                       (id <= MAX_S_CONTEXT))
15226 +                       return 1;
15227 +               if ((mode & NX_STATIC) &&
15228 +                       (id > 1) && (id < MIN_D_CONTEXT))
15229 +                       return 1;
15230 +       }
15231 +       return (((mode & NX_ADMIN) && (cid == 0)) ||
15232 +               ((mode & NX_WATCH) && (cid == 1)) ||
15233 +               ((mode & NX_BLEND) && (id == 1)) ||
15234 +               ((mode & NX_HOSTID) && (id == 0)));
15235 +}
15236 +
15237 +
15238 +#define __nx_state(v)  ((v) ? ((v)->nx_state) : 0)
15239 +
15240 +#define nx_info_state(v,m)     (__nx_state(v) & (m))
15241 +
15242 +
15243 +#define __nx_flags(v)  ((v) ? (v)->nx_flags : 0)
15244 +
15245 +#define nx_current_flags()     __nx_flags(current->nx_info)
15246 +
15247 +#define nx_info_flags(v,m,f) \
15248 +       vx_check_flags(__nx_flags(v),(m),(f))
15249 +
15250 +#define task_nx_flags(t,m,f) \
15251 +       ((t) && nx_info_flags((t)->nx_info, (m), (f)))
15252 +
15253 +#define nx_flags(m,f)  nx_info_flags(current->nx_info,(m),(f))
15254 +
15255 +
15256 +/* context caps */
15257 +
15258 +#define __nx_ncaps(v)  ((v) ? (v)->nx_ncaps : 0)
15259 +
15260 +#define nx_current_ncaps()     __nx_ncaps(current->nx_info)
15261 +
15262 +#define nx_info_ncaps(v,c)     (__nx_ncaps(v) & (c))
15263 +
15264 +#define nx_ncaps(c)    nx_info_ncaps(current->nx_info,(c))
15265 +
15266 +
15267 +static inline int addr_in_nx_info(struct nx_info *nxi, uint32_t addr)
15268 +{
15269 +       int n,i;
15270 +
15271 +       if (!nxi)
15272 +               return 1;
15273 +
15274 +       n = nxi->nbipv4;
15275 +       if (n && (nxi->ipv4[0] == 0))
15276 +               return 1;
15277 +       for (i=0; i<n; i++) {
15278 +               if (nxi->ipv4[i] == addr)
15279 +                       return 1;
15280 +       }
15281 +       return 0;
15282 +}
15283 +
15284 +static inline void exit_nx_info(struct task_struct *p)
15285 +{
15286 +       if (p->nx_info)
15287 +               release_nx_info(p->nx_info, p);
15288 +}
15289 +
15290 +
15291 +#else
15292 +#warning duplicate inclusion
15293 +#endif
15294 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_pid.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_pid.h
15295 --- linux-2.6.16.20/include/linux/vs_pid.h      1970-01-01 01:00:00 +0100
15296 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_pid.h 2006-04-26 19:07:00 +0200
15297 @@ -0,0 +1,57 @@
15298 +#ifndef _VX_VS_PID_H
15299 +#define _VX_VS_PID_H
15300 +
15301 +#include "vserver/debug.h"
15302 +
15303 +
15304 +/* pid faking stuff */
15305 +
15306 +
15307 +#define vx_info_map_pid(v,p) \
15308 +       __vx_info_map_pid((v), (p), __FUNC__, __FILE__, __LINE__)
15309 +#define vx_info_map_tgid(v,p)  vx_info_map_pid(v,p)
15310 +#define vx_map_pid(p)  vx_info_map_pid(current->vx_info, p)
15311 +#define vx_map_tgid(p) vx_map_pid(p)
15312 +
15313 +static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
15314 +       const char *func, const char *file, int line)
15315 +{
15316 +       if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
15317 +               vxfprintk(VXD_CBIT(cvirt, 2),
15318 +                       "vx_map_tgid: %p/%llx: %d -> %d",
15319 +                       vxi, (long long)vxi->vx_flags, pid,
15320 +                       (pid && pid == vxi->vx_initpid)?1:pid,
15321 +                       func, file, line);
15322 +               if (pid == 0)
15323 +                       return 0;
15324 +               if (pid == vxi->vx_initpid)
15325 +                       return 1;
15326 +       }
15327 +       return pid;
15328 +}
15329 +
15330 +#define vx_info_rmap_pid(v,p) \
15331 +       __vx_info_rmap_pid((v), (p), __FUNC__, __FILE__, __LINE__)
15332 +#define vx_rmap_pid(p) vx_info_rmap_pid(current->vx_info, p)
15333 +#define vx_rmap_tgid(p) vx_rmap_pid(p)
15334 +
15335 +static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
15336 +       const char *func, const char *file, int line)
15337 +{
15338 +       if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
15339 +               vxfprintk(VXD_CBIT(cvirt, 2),
15340 +                       "vx_rmap_tgid: %p/%llx: %d -> %d",
15341 +                       vxi, (long long)vxi->vx_flags, pid,
15342 +                       (pid == 1)?vxi->vx_initpid:pid,
15343 +                       func, file, line);
15344 +               if ((pid == 1) && vxi->vx_initpid)
15345 +                       return vxi->vx_initpid;
15346 +               if (pid == vxi->vx_initpid)
15347 +                       return ~0U;
15348 +       }
15349 +       return pid;
15350 +}
15351 +
15352 +#else
15353 +#warning duplicate inclusion
15354 +#endif
15355 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_sched.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_sched.h
15356 --- linux-2.6.16.20/include/linux/vs_sched.h    1970-01-01 01:00:00 +0100
15357 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_sched.h       2006-04-27 21:05:56 +0200
15358 @@ -0,0 +1,107 @@
15359 +#ifndef _VX_VS_SCHED_H
15360 +#define _VX_VS_SCHED_H
15361 +
15362 +#include "vserver/sched.h"
15363 +
15364 +
15365 +#define VAVAVOOM_RATIO          50
15366 +
15367 +#define MAX_PRIO_BIAS           20
15368 +#define MIN_PRIO_BIAS          -20
15369 +
15370 +
15371 +#ifdef CONFIG_VSERVER_HARDCPU
15372 +
15373 +/*
15374 + * effective_prio - return the priority that is based on the static
15375 + * priority but is modified by bonuses/penalties.
15376 + *
15377 + * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
15378 + * into a -4 ... 0 ... +4 bonus/penalty range.
15379 + *
15380 + * Additionally, we scale another amount based on the number of
15381 + * CPU tokens currently held by the context, if the process is
15382 + * part of a context (and the appropriate SCHED flag is set).
15383 + * This ranges from -5 ... 0 ... +15, quadratically.
15384 + *
15385 + * So, the total bonus is -9 .. 0 .. +19
15386 + * We use ~50% of the full 0...39 priority range so that:
15387 + *
15388 + * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
15389 + * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
15390 + *    unless that context is far exceeding its CPU allocation.
15391 + *
15392 + * Both properties are important to certain workloads.
15393 + */
15394 +static inline
15395 +int vx_effective_vavavoom(struct _vx_sched_pc *sched_pc, int max_prio)
15396 +{
15397 +       int vavavoom, max;
15398 +
15399 +       /* lots of tokens = lots of vavavoom
15400 +        *      no tokens = no vavavoom      */
15401 +       if ((vavavoom = sched_pc->tokens) >= 0) {
15402 +               max = sched_pc->tokens_max;
15403 +               vavavoom = max - vavavoom;
15404 +               max = max * max;
15405 +               vavavoom = max_prio * VAVAVOOM_RATIO / 100
15406 +                       * (vavavoom*vavavoom - (max >> 2)) / max;
15407 +               return vavavoom;
15408 +       }
15409 +       return 0;
15410 +}
15411 +
15412 +
15413 +static inline
15414 +int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
15415 +{
15416 +       struct vx_info *vxi = p->vx_info;
15417 +
15418 +       if (!vxi)
15419 +               return prio;
15420 +
15421 +       if (vx_info_flags(vxi, VXF_SCHED_PRIO, 0)) {
15422 +               struct _vx_sched_pc *sched_pc = &vx_cpu(vxi, sched_pc);
15423 +               int vavavoom = vx_effective_vavavoom(sched_pc, max_user);
15424 +
15425 +               vxi->sched.vavavoom = vavavoom;
15426 +               prio += vavavoom;
15427 +       }
15428 +       prio += vxi->sched.prio_bias;
15429 +       return prio;
15430 +}
15431 +
15432 +#else /* !CONFIG_VSERVER_HARDCPU */
15433 +
15434 +static inline
15435 +int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
15436 +{
15437 +       struct vx_info *vxi = p->vx_info;
15438 +
15439 +       if (vxi)
15440 +               prio += vxi->sched.prio_bias;
15441 +       return prio;
15442 +}
15443 +
15444 +#endif /* CONFIG_VSERVER_HARDCPU */
15445 +
15446 +
15447 +static inline void vx_account_user(struct vx_info *vxi,
15448 +       cputime_t cputime, int nice)
15449 +{
15450 +       if (!vxi)
15451 +               return;
15452 +       vx_cpu(vxi, sched_pc).user_ticks += cputime;
15453 +}
15454 +
15455 +static inline void vx_account_system(struct vx_info *vxi,
15456 +       cputime_t cputime, int idle)
15457 +{
15458 +       if (!vxi)
15459 +               return;
15460 +       vx_cpu(vxi, sched_pc).sys_ticks += cputime;
15461 +}
15462 +
15463 +#else
15464 +#warning duplicate inclusion
15465 +#endif
15466 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_socket.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_socket.h
15467 --- linux-2.6.16.20/include/linux/vs_socket.h   1970-01-01 01:00:00 +0100
15468 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_socket.h      2006-04-26 19:07:00 +0200
15469 @@ -0,0 +1,57 @@
15470 +#ifndef _VX_VS_SOCKET_H
15471 +#define _VX_VS_SOCKET_H
15472 +
15473 +#include "vserver/debug.h"
15474 +
15475 +
15476 +/* socket accounting */
15477 +
15478 +#include <linux/socket.h>
15479 +
15480 +static inline int vx_sock_type(int family)
15481 +{
15482 +       int type = 4;
15483 +
15484 +       if (family > 0 && family < 3)
15485 +               type = family;
15486 +       else if (family == PF_INET6)
15487 +               type = 3;
15488 +       return type;
15489 +}
15490 +
15491 +#define vx_acc_sock(v,f,p,s) \
15492 +       __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__)
15493 +
15494 +static inline void __vx_acc_sock(struct vx_info *vxi,
15495 +       int family, int pos, int size, char *file, int line)
15496 +{
15497 +       if (vxi) {
15498 +               int type = vx_sock_type(family);
15499 +
15500 +               atomic_inc(&vxi->cacct.sock[type][pos].count);
15501 +               atomic_add(size, &vxi->cacct.sock[type][pos].total);
15502 +       }
15503 +}
15504 +
15505 +#define vx_sock_recv(sk,s) \
15506 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s))
15507 +#define vx_sock_send(sk,s) \
15508 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s))
15509 +#define vx_sock_fail(sk,s) \
15510 +       vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s))
15511 +
15512 +
15513 +#define sock_vx_init(s) do {           \
15514 +       (s)->sk_xid = 0;                \
15515 +       (s)->sk_vx_info = NULL;         \
15516 +       } while (0)
15517 +
15518 +#define sock_nx_init(s) do {           \
15519 +       (s)->sk_nid = 0;                \
15520 +       (s)->sk_nx_info = NULL;         \
15521 +       } while (0)
15522 +
15523 +
15524 +#else
15525 +#warning duplicate inclusion
15526 +#endif
15527 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_tag.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_tag.h
15528 --- linux-2.6.16.20/include/linux/vs_tag.h      1970-01-01 01:00:00 +0100
15529 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_tag.h 2006-04-26 19:07:00 +0200
15530 @@ -0,0 +1,45 @@
15531 +#ifndef _VX_VS_TAG_H
15532 +#define _VX_VS_TAG_H
15533 +
15534 +#include <linux/kernel.h>
15535 +#include <linux/vserver/tag.h>
15536 +
15537 +/* check conditions */
15538 +
15539 +#define DX_ADMIN       0x0001
15540 +#define DX_WATCH       0x0002
15541 +#define DX_HOSTID      0x0008
15542 +
15543 +#define DX_IDENT       0x0010
15544 +
15545 +#define DX_ARG_MASK    0x0010
15546 +
15547 +
15548 +#define dx_task_tag(t) ((t)->xid)
15549 +
15550 +#define dx_current_tag() dx_task_tag(current)
15551 +
15552 +#define dx_check(c,m)  __dx_check(dx_current_tag(),c,m)
15553 +
15554 +#define dx_weak_check(c,m)     ((m) ? dx_check(c,m) : 1)
15555 +
15556 +
15557 +/*
15558 + * check current context for ADMIN/WATCH and
15559 + * optionally against supplied argument
15560 + */
15561 +static inline int __dx_check(tag_t cid, tag_t id, unsigned int mode)
15562 +{
15563 +       if (mode & DX_ARG_MASK) {
15564 +               if ((mode & DX_IDENT) &&
15565 +                       (id == cid))
15566 +                       return 1;
15567 +       }
15568 +       return (((mode & DX_ADMIN) && (cid == 0)) ||
15569 +               ((mode & DX_WATCH) && (cid == 1)) ||
15570 +               ((mode & DX_HOSTID) && (id == 0)));
15571 +}
15572 +
15573 +#else
15574 +#warning duplicate inclusion
15575 +#endif
15576 diff -NurpP --minimal linux-2.6.16.20/include/linux/vs_time.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_time.h
15577 --- linux-2.6.16.20/include/linux/vs_time.h     1970-01-01 01:00:00 +0100
15578 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vs_time.h        2006-05-31 01:27:45 +0200
15579 @@ -0,0 +1,19 @@
15580 +#ifndef _VX_VS_TIME_H
15581 +#define _VX_VS_TIME_H
15582 +
15583 +
15584 +/* time faking stuff */
15585 +
15586 +#ifdef CONFIG_VSERVER_VTIME
15587 +
15588 +extern void vx_gettimeofday(struct timeval *tv);
15589 +extern int vx_settimeofday(struct timespec *ts);
15590 +
15591 +#else
15592 +#define        vx_gettimeofday(t)      do_gettimeofday(t)
15593 +#define        vx_settimeofday(t)      do_settimeofday(t)
15594 +#endif
15595 +
15596 +#else
15597 +#warning duplicate inclusion
15598 +#endif
15599 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/context.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/context.h
15600 --- linux-2.6.16.20/include/linux/vserver/context.h     1970-01-01 01:00:00 +0100
15601 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/context.h        2006-05-29 17:49:04 +0200
15602 @@ -0,0 +1,206 @@
15603 +#ifndef _VX_CONTEXT_H
15604 +#define _VX_CONTEXT_H
15605 +
15606 +#include <linux/types.h>
15607 +#include <linux/capability.h>
15608 +
15609 +
15610 +#define MAX_S_CONTEXT  65535   /* Arbitrary limit */
15611 +
15612 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
15613 +#define MIN_D_CONTEXT  49152   /* dynamic contexts start here */
15614 +#else
15615 +#define MIN_D_CONTEXT  65536
15616 +#endif
15617 +
15618 +#define VX_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
15619 +
15620 +/* context flags */
15621 +
15622 +#define VXF_INFO_LOCK          0x00000001
15623 +#define VXF_INFO_SCHED         0x00000002
15624 +#define VXF_INFO_NPROC         0x00000004
15625 +#define VXF_INFO_PRIVATE       0x00000008
15626 +
15627 +#define VXF_INFO_INIT          0x00000010
15628 +#define VXF_INFO_HIDE          0x00000020
15629 +#define VXF_INFO_ULIMIT                0x00000040
15630 +#define VXF_INFO_NSPACE                0x00000080
15631 +
15632 +#define VXF_SCHED_HARD         0x00000100
15633 +#define VXF_SCHED_PRIO         0x00000200
15634 +#define VXF_SCHED_PAUSE                0x00000400
15635 +
15636 +#define VXF_VIRT_MEM           0x00010000
15637 +#define VXF_VIRT_UPTIME                0x00020000
15638 +#define VXF_VIRT_CPU           0x00040000
15639 +#define VXF_VIRT_LOAD          0x00080000
15640 +#define VXF_VIRT_TIME          0x00100000
15641 +
15642 +#define VXF_HIDE_MOUNT         0x01000000
15643 +#define VXF_HIDE_NETIF         0x02000000
15644 +
15645 +#define VXF_STATE_SETUP                (1ULL<<32)
15646 +#define VXF_STATE_INIT         (1ULL<<33)
15647 +
15648 +#define VXF_SC_HELPER          (1ULL<<36)
15649 +#define VXF_REBOOT_KILL                (1ULL<<37)
15650 +#define VXF_PERSISTENT         (1ULL<<38)
15651 +
15652 +#define VXF_FORK_RSS           (1ULL<<48)
15653 +#define VXF_PROLIFIC           (1ULL<<49)
15654 +
15655 +#define VXF_IGNEG_NICE         (1ULL<<52)
15656 +
15657 +#define VXF_ONE_TIME           (0x0003ULL<<32)
15658 +
15659 +#define VXF_INIT_SET           (VXF_STATE_SETUP|VXF_STATE_INIT)
15660 +
15661 +
15662 +/* context migration */
15663 +
15664 +#define VXM_SET_INIT           0x00000001
15665 +#define VXM_SET_REAPER         0x00000002
15666 +
15667 +/* context caps */
15668 +
15669 +#define VXC_CAP_MASK           0x00000000
15670 +
15671 +#define VXC_SET_UTSNAME                0x00000001
15672 +#define VXC_SET_RLIMIT         0x00000002
15673 +
15674 +#define VXC_RAW_ICMP           0x00000100
15675 +#define VXC_SYSLOG             0x00001000
15676 +
15677 +#define VXC_SECURE_MOUNT       0x00010000
15678 +#define VXC_SECURE_REMOUNT     0x00020000
15679 +#define VXC_BINARY_MOUNT       0x00040000
15680 +
15681 +#define VXC_QUOTA_CTL          0x00100000
15682 +
15683 +
15684 +/* context state changes */
15685 +
15686 +enum {
15687 +       VSC_STARTUP = 1,
15688 +       VSC_SHUTDOWN,
15689 +
15690 +       VSC_NETUP,
15691 +       VSC_NETDOWN,
15692 +};
15693 +
15694 +
15695 +#ifdef __KERNEL__
15696 +
15697 +#include <linux/list.h>
15698 +#include <linux/spinlock.h>
15699 +#include <linux/rcupdate.h>
15700 +
15701 +#include "limit_def.h"
15702 +#include "sched_def.h"
15703 +#include "cvirt_def.h"
15704 +
15705 +struct _vx_info_pc {
15706 +       struct _vx_sched_pc sched_pc;
15707 +       struct _vx_cvirt_pc cvirt_pc;
15708 +};
15709 +
15710 +struct vx_info {
15711 +       struct hlist_node vx_hlist;             /* linked list of contexts */
15712 +       xid_t vx_id;                            /* context id */
15713 +       atomic_t vx_usecnt;                     /* usage count */
15714 +       atomic_t vx_tasks;                      /* tasks count */
15715 +       struct vx_info *vx_parent;              /* parent context */
15716 +       int vx_state;                           /* context state */
15717 +
15718 +       struct namespace *vx_namespace;         /* private namespace */
15719 +       struct fs_struct *vx_fs;                /* private namespace fs */
15720 +       uint64_t vx_flags;                      /* context flags */
15721 +       uint64_t vx_bcaps;                      /* bounding caps (system) */
15722 +       uint64_t vx_ccaps;                      /* context caps (vserver) */
15723 +       kernel_cap_t vx_cap_bset;               /* the guest's bset */
15724 +
15725 +       struct task_struct *vx_reaper;          /* guest reaper process */
15726 +       pid_t vx_initpid;                       /* PID of guest init */
15727 +
15728 +       struct _vx_limit limit;                 /* vserver limits */
15729 +       struct _vx_sched sched;                 /* vserver scheduler */
15730 +       struct _vx_cvirt cvirt;                 /* virtual/bias stuff */
15731 +       struct _vx_cacct cacct;                 /* context accounting */
15732 +
15733 +#ifndef CONFIG_SMP
15734 +       struct _vx_info_pc info_pc;             /* per cpu data */
15735 +#else
15736 +       struct _vx_info_pc *ptr_pc;             /* per cpu array */
15737 +#endif
15738 +
15739 +       wait_queue_head_t vx_wait;              /* context exit waitqueue */
15740 +       int reboot_cmd;                         /* last sys_reboot() cmd */
15741 +       int exit_code;                          /* last process exit code */
15742 +
15743 +       char vx_name[65];                       /* vserver name */
15744 +};
15745 +
15746 +#ifndef CONFIG_SMP
15747 +#define        vx_ptr_pc(vxi)          (&(vxi)->info_pc)
15748 +#define        vx_per_cpu(vxi, v, id)  vx_ptr_pc(vxi)->v
15749 +#else
15750 +#define        vx_ptr_pc(vxi)          ((vxi)->ptr_pc)
15751 +#define        vx_per_cpu(vxi, v, id)  per_cpu_ptr(vx_ptr_pc(vxi), id)->v
15752 +#endif
15753 +
15754 +#define        vx_cpu(vxi, v)          vx_per_cpu(vxi, v, smp_processor_id())
15755 +
15756 +
15757 +struct vx_info_save {
15758 +       struct vx_info *vxi;
15759 +       xid_t xid;
15760 +};
15761 +
15762 +
15763 +/* status flags */
15764 +
15765 +#define VXS_HASHED     0x0001
15766 +#define VXS_PAUSED     0x0010
15767 +#define VXS_SHUTDOWN   0x0100
15768 +#define VXS_HELPER     0x1000
15769 +#define VXS_RELEASED   0x8000
15770 +
15771 +/* check conditions */
15772 +
15773 +#define VX_ADMIN       0x0001
15774 +#define VX_WATCH       0x0002
15775 +#define VX_HIDE                0x0004
15776 +#define VX_HOSTID      0x0008
15777 +
15778 +#define VX_IDENT       0x0010
15779 +#define VX_EQUIV       0x0020
15780 +#define VX_PARENT      0x0040
15781 +#define VX_CHILD       0x0080
15782 +
15783 +#define VX_ARG_MASK    0x00F0
15784 +
15785 +#define VX_DYNAMIC     0x0100
15786 +#define VX_STATIC      0x0200
15787 +
15788 +#define VX_ATR_MASK    0x0F00
15789 +
15790 +
15791 +extern void claim_vx_info(struct vx_info *, struct task_struct *);
15792 +extern void release_vx_info(struct vx_info *, struct task_struct *);
15793 +
15794 +extern struct vx_info *lookup_vx_info(int);
15795 +extern struct vx_info *lookup_or_create_vx_info(int);
15796 +
15797 +extern int get_xid_list(int, unsigned int *, int);
15798 +extern int xid_is_hashed(xid_t);
15799 +
15800 +extern int vx_migrate_task(struct task_struct *, struct vx_info *);
15801 +
15802 +extern long vs_state_change(struct vx_info *, unsigned int);
15803 +
15804 +
15805 +#endif /* __KERNEL__ */
15806 +#else  /* _VX_CONTEXT_H */
15807 +#warning duplicate inclusion
15808 +#endif /* _VX_CONTEXT_H */
15809 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/context_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/context_cmd.h
15810 --- linux-2.6.16.20/include/linux/vserver/context_cmd.h 1970-01-01 01:00:00 +0100
15811 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/context_cmd.h    2006-04-26 19:07:00 +0200
15812 @@ -0,0 +1,111 @@
15813 +#ifndef _VX_CONTEXT_CMD_H
15814 +#define _VX_CONTEXT_CMD_H
15815 +
15816 +
15817 +/* vinfo commands */
15818 +
15819 +#define VCMD_task_xid          VC_CMD(VINFO, 1, 0)
15820 +
15821 +#ifdef __KERNEL__
15822 +extern int vc_task_xid(uint32_t, void __user *);
15823 +
15824 +#endif /* __KERNEL__ */
15825 +
15826 +#define VCMD_vx_info           VC_CMD(VINFO, 5, 0)
15827 +
15828 +struct vcmd_vx_info_v0 {
15829 +       uint32_t xid;
15830 +       uint32_t initpid;
15831 +       /* more to come */
15832 +};
15833 +
15834 +#ifdef __KERNEL__
15835 +extern int vc_vx_info(uint32_t, void __user *);
15836 +
15837 +#endif /* __KERNEL__ */
15838 +
15839 +
15840 +/* context commands */
15841 +
15842 +#define VCMD_ctx_create_v0     VC_CMD(VPROC, 1, 0)
15843 +#define VCMD_ctx_create                VC_CMD(VPROC, 1, 1)
15844 +
15845 +struct vcmd_ctx_create {
15846 +       uint64_t flagword;
15847 +};
15848 +
15849 +#define VCMD_ctx_migrate_v0    VC_CMD(PROCMIG, 1, 0)
15850 +#define VCMD_ctx_migrate       VC_CMD(PROCMIG, 1, 1)
15851 +
15852 +struct vcmd_ctx_migrate {
15853 +       uint64_t flagword;
15854 +};
15855 +
15856 +#ifdef __KERNEL__
15857 +extern int vc_ctx_create(uint32_t, void __user *);
15858 +extern int vc_ctx_migrate(uint32_t, void __user *);
15859 +
15860 +#endif /* __KERNEL__ */
15861 +
15862 +
15863 +/* flag commands */
15864 +
15865 +#define VCMD_get_cflags                VC_CMD(FLAGS, 1, 0)
15866 +#define VCMD_set_cflags                VC_CMD(FLAGS, 2, 0)
15867 +
15868 +struct vcmd_ctx_flags_v0 {
15869 +       uint64_t flagword;
15870 +       uint64_t mask;
15871 +};
15872 +
15873 +#ifdef __KERNEL__
15874 +extern int vc_get_cflags(uint32_t, void __user *);
15875 +extern int vc_set_cflags(uint32_t, void __user *);
15876 +
15877 +#endif /* __KERNEL__ */
15878 +
15879 +
15880 +/* context caps commands */
15881 +
15882 +#define VCMD_get_ccaps_v0      VC_CMD(FLAGS, 3, 0)
15883 +#define VCMD_set_ccaps_v0      VC_CMD(FLAGS, 4, 0)
15884 +
15885 +struct vcmd_ctx_caps_v0 {
15886 +       uint64_t bcaps;
15887 +       uint64_t ccaps;
15888 +       uint64_t cmask;
15889 +};
15890 +
15891 +#define VCMD_get_ccaps         VC_CMD(FLAGS, 3, 1)
15892 +#define VCMD_set_ccaps         VC_CMD(FLAGS, 4, 1)
15893 +
15894 +struct vcmd_ctx_caps_v1 {
15895 +       uint64_t ccaps;
15896 +       uint64_t cmask;
15897 +};
15898 +
15899 +#ifdef __KERNEL__
15900 +extern int vc_get_ccaps_v0(uint32_t, void __user *);
15901 +extern int vc_set_ccaps_v0(uint32_t, void __user *);
15902 +extern int vc_get_ccaps(uint32_t, void __user *);
15903 +extern int vc_set_ccaps(uint32_t, void __user *);
15904 +
15905 +#endif /* __KERNEL__ */
15906 +
15907 +
15908 +/* bcaps commands */
15909 +
15910 +#define VCMD_get_bcaps         VC_CMD(FLAGS, 9, 0)
15911 +#define VCMD_set_bcaps         VC_CMD(FLAGS,10, 0)
15912 +
15913 +struct vcmd_bcaps {
15914 +       uint64_t bcaps;
15915 +       uint64_t bmask;
15916 +};
15917 +
15918 +#ifdef __KERNEL__
15919 +extern int vc_get_bcaps(uint32_t, void __user *);
15920 +extern int vc_set_bcaps(uint32_t, void __user *);
15921 +
15922 +#endif /* __KERNEL__ */
15923 +#endif /* _VX_CONTEXT_CMD_H */
15924 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/cvirt.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt.h
15925 --- linux-2.6.16.20/include/linux/vserver/cvirt.h       1970-01-01 01:00:00 +0100
15926 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt.h  2006-04-26 19:07:00 +0200
15927 @@ -0,0 +1,26 @@
15928 +#ifndef _VX_CVIRT_H
15929 +#define _VX_CVIRT_H
15930 +
15931 +
15932 +#ifdef __KERNEL__
15933 +
15934 +struct timespec;
15935 +
15936 +void vx_vsi_uptime(struct timespec *, struct timespec *);
15937 +
15938 +
15939 +struct vx_info;
15940 +
15941 +void vx_update_load(struct vx_info *);
15942 +
15943 +
15944 +int vx_uts_virt_handler(struct ctl_table *ctl, int write, xid_t xid,
15945 +       void **datap, size_t *lenp);
15946 +
15947 +
15948 +int vx_do_syslog(int, char __user *, int);
15949 +
15950 +#endif /* __KERNEL__ */
15951 +#else  /* _VX_CVIRT_H */
15952 +#warning duplicate inclusion
15953 +#endif /* _VX_CVIRT_H */
15954 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/cvirt_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt_cmd.h
15955 --- linux-2.6.16.20/include/linux/vserver/cvirt_cmd.h   1970-01-01 01:00:00 +0100
15956 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt_cmd.h      2006-04-26 19:07:00 +0200
15957 @@ -0,0 +1,35 @@
15958 +#ifndef _VX_CVIRT_CMD_H
15959 +#define _VX_CVIRT_CMD_H
15960 +
15961 +
15962 +/* virtual host info name commands */
15963 +
15964 +#define VCMD_set_vhi_name      VC_CMD(VHOST, 1, 0)
15965 +#define VCMD_get_vhi_name      VC_CMD(VHOST, 2, 0)
15966 +
15967 +struct vcmd_vhi_name_v0 {
15968 +       uint32_t field;
15969 +       char name[65];
15970 +};
15971 +
15972 +
15973 +enum vhi_name_field {
15974 +       VHIN_CONTEXT=0,
15975 +       VHIN_SYSNAME,
15976 +       VHIN_NODENAME,
15977 +       VHIN_RELEASE,
15978 +       VHIN_VERSION,
15979 +       VHIN_MACHINE,
15980 +       VHIN_DOMAINNAME,
15981 +};
15982 +
15983 +
15984 +#ifdef __KERNEL__
15985 +
15986 +#include <linux/compiler.h>
15987 +
15988 +extern int vc_set_vhi_name(uint32_t, void __user *);
15989 +extern int vc_get_vhi_name(uint32_t, void __user *);
15990 +
15991 +#endif /* __KERNEL__ */
15992 +#endif /* _VX_CVIRT_CMD_H */
15993 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/cvirt_def.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt_def.h
15994 --- linux-2.6.16.20/include/linux/vserver/cvirt_def.h   1970-01-01 01:00:00 +0100
15995 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/cvirt_def.h      2006-05-29 17:36:22 +0200
15996 @@ -0,0 +1,120 @@
15997 +#ifndef _VX_CVIRT_DEF_H
15998 +#define _VX_CVIRT_DEF_H
15999 +
16000 +#include <linux/jiffies.h>
16001 +#include <linux/utsname.h>
16002 +#include <linux/spinlock.h>
16003 +#include <linux/wait.h>
16004 +#include <linux/time.h>
16005 +#include <asm/atomic.h>
16006 +
16007 +
16008 +struct _vx_usage_stat {
16009 +       uint64_t user;
16010 +       uint64_t nice;
16011 +       uint64_t system;
16012 +       uint64_t softirq;
16013 +       uint64_t irq;
16014 +       uint64_t idle;
16015 +       uint64_t iowait;
16016 +};
16017 +
16018 +struct _vx_syslog {
16019 +       wait_queue_head_t log_wait;
16020 +       spinlock_t logbuf_lock;         /* lock for the log buffer */
16021 +
16022 +       unsigned long log_start;        /* next char to be read by syslog() */
16023 +       unsigned long con_start;        /* next char to be sent to consoles */
16024 +       unsigned long log_end;  /* most-recently-written-char + 1 */
16025 +       unsigned long logged_chars;     /* #chars since last read+clear operation */
16026 +
16027 +       char log_buf[1024];
16028 +};
16029 +
16030 +
16031 +/* context sub struct */
16032 +
16033 +struct _vx_cvirt {
16034 +//     int max_threads;                /* maximum allowed threads */
16035 +       atomic_t nr_threads;            /* number of current threads */
16036 +       atomic_t nr_running;            /* number of running threads */
16037 +       atomic_t nr_uninterruptible;    /* number of uninterruptible threads */
16038 +
16039 +       atomic_t nr_onhold;             /* processes on hold */
16040 +       uint32_t onhold_last;           /* jiffies when put on hold */
16041 +
16042 +       struct timeval bias_tv;         /* time offset to the host */
16043 +       struct timespec bias_idle;
16044 +       struct timespec bias_uptime;    /* context creation point */
16045 +       uint64_t bias_clock;            /* offset in clock_t */
16046 +
16047 +       struct new_utsname utsname;
16048 +
16049 +       spinlock_t load_lock;           /* lock for the load averages */
16050 +       atomic_t load_updates;          /* nr of load updates done so far */
16051 +       uint32_t load_last;             /* last time load was cacled */
16052 +       uint32_t load[3];               /* load averages 1,5,15 */
16053 +
16054 +       atomic_t total_forks;           /* number of forks so far */
16055 +
16056 +       struct _vx_syslog syslog;
16057 +};
16058 +
16059 +struct _vx_cvirt_pc {
16060 +       struct _vx_usage_stat cpustat;
16061 +};
16062 +
16063 +
16064 +#ifdef CONFIG_VSERVER_DEBUG
16065 +
16066 +static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
16067 +{
16068 +       printk("\t_vx_cvirt:\n");
16069 +       printk("\t threads: %4d, %4d, %4d, %4d\n",
16070 +               atomic_read(&cvirt->nr_threads),
16071 +               atomic_read(&cvirt->nr_running),
16072 +               atomic_read(&cvirt->nr_uninterruptible),
16073 +               atomic_read(&cvirt->nr_onhold));
16074 +       /* add rest here */
16075 +       printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
16076 +}
16077 +
16078 +#endif
16079 +
16080 +
16081 +struct _vx_sock_acc {
16082 +       atomic_t count;
16083 +       atomic_t total;
16084 +};
16085 +
16086 +/* context sub struct */
16087 +
16088 +struct _vx_cacct {
16089 +       struct _vx_sock_acc sock[5][3];
16090 +       atomic_t slab[8];
16091 +       atomic_t page[6][8];
16092 +};
16093 +
16094 +#ifdef CONFIG_VSERVER_DEBUG
16095 +
16096 +static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
16097 +{
16098 +       int i,j;
16099 +
16100 +       printk("\t_vx_cacct:");
16101 +       for (i=0; i<5; i++) {
16102 +               struct _vx_sock_acc *ptr = cacct->sock[i];
16103 +
16104 +               printk("\t [%d] =", i);
16105 +               for (j=0; j<3; j++) {
16106 +                       printk(" [%d] = %8d, %8d", j,
16107 +                               atomic_read(&ptr[j].count),
16108 +                               atomic_read(&ptr[j].total));
16109 +               }
16110 +               printk("\n");
16111 +       }
16112 +}
16113 +
16114 +#endif
16115 +
16116 +#endif /* _VX_CVIRT_DEF_H */
16117 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/debug.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/debug.h
16118 --- linux-2.6.16.20/include/linux/vserver/debug.h       1970-01-01 01:00:00 +0100
16119 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/debug.h  2006-04-26 19:07:00 +0200
16120 @@ -0,0 +1,110 @@
16121 +#ifndef _VX_DEBUG_H
16122 +#define _VX_DEBUG_H
16123 +
16124 +
16125 +#define VXD_CBIT(n,m)  (vx_debug_ ## n & (1 << (m)))
16126 +#define VXD_CMIN(n,m)  (vx_debug_ ## n > (m))
16127 +#define VXD_MASK(n,m)  (vx_debug_ ## n & (m))
16128 +
16129 +#define VXD_QPOS(v,p)  (((uint32_t)(v) >> ((p)*8)) & 0xFF)
16130 +#define VXD_QUAD(v)    VXD_QPOS(v,0), VXD_QPOS(v,1),           \
16131 +                       VXD_QPOS(v,2), VXD_QPOS(v,3)
16132 +#define VXF_QUAD       "%u.%u.%u.%u"
16133 +
16134 +#define VXD_DEV(d)     (d), (d)->bd_inode->i_ino,              \
16135 +                       imajor((d)->bd_inode), iminor((d)->bd_inode)
16136 +#define VXF_DEV                "%p[%lu,%d:%d]"
16137 +
16138 +
16139 +#define __FUNC__       __func__
16140 +
16141 +
16142 +#ifdef CONFIG_VSERVER_DEBUG
16143 +
16144 +extern unsigned int vx_debug_switch;
16145 +extern unsigned int vx_debug_xid;
16146 +extern unsigned int vx_debug_nid;
16147 +extern unsigned int vx_debug_tag;
16148 +extern unsigned int vx_debug_net;
16149 +extern unsigned int vx_debug_limit;
16150 +extern unsigned int vx_debug_cres;
16151 +extern unsigned int vx_debug_dlim;
16152 +extern unsigned int vx_debug_quota;
16153 +extern unsigned int vx_debug_cvirt;
16154 +extern unsigned int vx_debug_misc;
16155 +
16156 +
16157 +#define VX_LOGLEVEL    "vxD: "
16158 +#define VX_WARNLEVEL   KERN_WARNING "vxW: "
16159 +
16160 +#define vxdprintk(c,f,x...)                                    \
16161 +       do {                                                    \
16162 +               if (c)                                          \
16163 +                       printk(VX_LOGLEVEL f "\n" , ##x);       \
16164 +       } while (0)
16165 +
16166 +#define vxlprintk(c,f,x...)                                    \
16167 +       do {                                                    \
16168 +               if (c)                                          \
16169 +                       printk(VX_LOGLEVEL f " @%s:%d\n", x);   \
16170 +       } while (0)
16171 +
16172 +#define vxfprintk(c,f,x...)                                    \
16173 +       do {                                                    \
16174 +               if (c)                                          \
16175 +                       printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
16176 +       } while (0)
16177 +
16178 +
16179 +#define vxwprintk(c,f,x...)                                    \
16180 +       do {                                                    \
16181 +               if (c)                                          \
16182 +                       printk(VX_WARNLEVEL f "\n" , ##x);      \
16183 +       } while (0)
16184 +
16185 +
16186 +#define vxd_path(d,m)                                          \
16187 +       ({ static char _buffer[PATH_MAX];                       \
16188 +          d_path((d), (m), _buffer, sizeof(_buffer)); })
16189 +
16190 +#define vxd_cond_path(n)                                       \
16191 +       ((n) ? vxd_path((n)->dentry, (n)->mnt) : "<null>" )
16192 +
16193 +
16194 +void dump_vx_info(struct vx_info *, int);
16195 +void dump_vx_info_inactive(int);
16196 +
16197 +#else  /* CONFIG_VSERVER_DEBUG */
16198 +
16199 +#define vx_debug_switch 0
16200 +#define vx_debug_xid   0
16201 +#define vx_debug_nid   0
16202 +#define vx_debug_tag   0
16203 +#define vx_debug_net   0
16204 +#define vx_debug_limit 0
16205 +#define vx_debug_cres  0
16206 +#define vx_debug_dlim  0
16207 +#define vx_debug_cvirt 0
16208 +
16209 +#define vxdprintk(x...) do { } while (0)
16210 +#define vxlprintk(x...) do { } while (0)
16211 +#define vxfprintk(x...) do { } while (0)
16212 +#define vxwprintk(x...) do { } while (0)
16213 +
16214 +#define vxd_path       "<none>"
16215 +#define vxd_cond_path  vxd_path
16216 +
16217 +#endif /* CONFIG_VSERVER_DEBUG */
16218 +
16219 +
16220 +#ifdef CONFIG_VSERVER_DEBUG
16221 +#define vxd_assert_lock(l)     assert_spin_locked(l)
16222 +#define vxd_assert(c,f,x...)   vxlprintk(!(c), \
16223 +       "assertion [" f "] failed.", ##x, __FILE__, __LINE__)
16224 +#else
16225 +#define vxd_assert_lock(l)     do { } while (0)
16226 +#define vxd_assert(c,f,x...)   do { } while (0)
16227 +#endif
16228 +
16229 +
16230 +#endif /* _VX_DEBUG_H */
16231 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/debug_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/debug_cmd.h
16232 --- linux-2.6.16.20/include/linux/vserver/debug_cmd.h   1970-01-01 01:00:00 +0100
16233 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/debug_cmd.h      2006-04-26 19:07:00 +0200
16234 @@ -0,0 +1,14 @@
16235 +#ifndef _VX_DEBUG_CMD_H
16236 +#define _VX_DEBUG_CMD_H
16237 +
16238 +
16239 +/* debug commands */
16240 +
16241 +#define VCMD_dump_history      VC_CMD(DEBUG, 1, 0)
16242 +
16243 +#ifdef __KERNEL__
16244 +
16245 +extern int vc_dump_history(uint32_t);
16246 +
16247 +#endif /* __KERNEL__ */
16248 +#endif /* _VX_DEBUG_CMD_H */
16249 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/dlimit.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/dlimit.h
16250 --- linux-2.6.16.20/include/linux/vserver/dlimit.h      1970-01-01 01:00:00 +0100
16251 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/dlimit.h 2006-04-26 19:07:00 +0200
16252 @@ -0,0 +1,53 @@
16253 +#ifndef _VX_DLIMIT_H
16254 +#define _VX_DLIMIT_H
16255 +
16256 +#include "switch.h"
16257 +
16258 +
16259 +#ifdef __KERNEL__
16260 +
16261 +/*      keep in sync with CDLIM_INFINITY       */
16262 +
16263 +#define DLIM_INFINITY          (~0ULL)
16264 +
16265 +#include <linux/spinlock.h>
16266 +
16267 +struct super_block;
16268 +
16269 +struct dl_info {
16270 +       struct hlist_node dl_hlist;             /* linked list of contexts */
16271 +       struct rcu_head dl_rcu;                 /* the rcu head */
16272 +       tag_t dl_tag;                           /* context tag */
16273 +       atomic_t dl_usecnt;                     /* usage count */
16274 +       atomic_t dl_refcnt;                     /* reference count */
16275 +
16276 +       struct super_block *dl_sb;              /* associated superblock */
16277 +
16278 +       spinlock_t dl_lock;                     /* protect the values */
16279 +
16280 +       unsigned long long dl_space_used;       /* used space in bytes */
16281 +       unsigned long long dl_space_total;      /* maximum space in bytes */
16282 +       unsigned long dl_inodes_used;           /* used inodes */
16283 +       unsigned long dl_inodes_total;          /* maximum inodes */
16284 +
16285 +       unsigned int dl_nrlmult;                /* non root limit mult */
16286 +};
16287 +
16288 +struct rcu_head;
16289 +
16290 +extern void rcu_free_dl_info(struct rcu_head *);
16291 +extern void unhash_dl_info(struct dl_info *);
16292 +
16293 +extern struct dl_info *locate_dl_info(struct super_block *, tag_t);
16294 +
16295 +
16296 +struct kstatfs;
16297 +
16298 +extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
16299 +
16300 +typedef uint64_t dlsize_t;
16301 +
16302 +#endif /* __KERNEL__ */
16303 +#else  /* _VX_DLIMIT_H */
16304 +#warning duplicate inclusion
16305 +#endif /* _VX_DLIMIT_H */
16306 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/dlimit_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/dlimit_cmd.h
16307 --- linux-2.6.16.20/include/linux/vserver/dlimit_cmd.h  1970-01-01 01:00:00 +0100
16308 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/dlimit_cmd.h     2006-04-26 19:07:00 +0200
16309 @@ -0,0 +1,72 @@
16310 +#ifndef _VX_DLIMIT_CMD_H
16311 +#define _VX_DLIMIT_CMD_H
16312 +
16313 +
16314 +/*  dlimit vserver commands */
16315 +
16316 +#define VCMD_add_dlimit                VC_CMD(DLIMIT, 1, 0)
16317 +#define VCMD_rem_dlimit                VC_CMD(DLIMIT, 2, 0)
16318 +
16319 +#define VCMD_set_dlimit                VC_CMD(DLIMIT, 5, 0)
16320 +#define VCMD_get_dlimit                VC_CMD(DLIMIT, 6, 0)
16321 +
16322 +struct vcmd_ctx_dlimit_base_v0 {
16323 +       const char __user *name;
16324 +       uint32_t flags;
16325 +};
16326 +
16327 +struct vcmd_ctx_dlimit_v0 {
16328 +       const char __user *name;
16329 +       uint32_t space_used;                    /* used space in kbytes */
16330 +       uint32_t space_total;                   /* maximum space in kbytes */
16331 +       uint32_t inodes_used;                   /* used inodes */
16332 +       uint32_t inodes_total;                  /* maximum inodes */
16333 +       uint32_t reserved;                      /* reserved for root in % */
16334 +       uint32_t flags;
16335 +};
16336 +
16337 +#define CDLIM_UNSET            ((uint32_t)0UL)
16338 +#define CDLIM_INFINITY         ((uint32_t)~0UL)
16339 +#define CDLIM_KEEP             ((uint32_t)~1UL)
16340 +
16341 +#ifdef __KERNEL__
16342 +
16343 +#ifdef CONFIG_COMPAT
16344 +
16345 +struct vcmd_ctx_dlimit_base_v0_x32 {
16346 +       compat_uptr_t name_ptr;
16347 +       uint32_t flags;
16348 +};
16349 +
16350 +struct vcmd_ctx_dlimit_v0_x32 {
16351 +       compat_uptr_t name_ptr;
16352 +       uint32_t space_used;                    /* used space in kbytes */
16353 +       uint32_t space_total;                   /* maximum space in kbytes */
16354 +       uint32_t inodes_used;                   /* used inodes */
16355 +       uint32_t inodes_total;                  /* maximum inodes */
16356 +       uint32_t reserved;                      /* reserved for root in % */
16357 +       uint32_t flags;
16358 +};
16359 +
16360 +#endif /* CONFIG_COMPAT */
16361 +
16362 +#include <linux/compiler.h>
16363 +
16364 +extern int vc_add_dlimit(uint32_t, void __user *);
16365 +extern int vc_rem_dlimit(uint32_t, void __user *);
16366 +
16367 +extern int vc_set_dlimit(uint32_t, void __user *);
16368 +extern int vc_get_dlimit(uint32_t, void __user *);
16369 +
16370 +#ifdef CONFIG_COMPAT
16371 +
16372 +extern int vc_add_dlimit_x32(uint32_t, void __user *);
16373 +extern int vc_rem_dlimit_x32(uint32_t, void __user *);
16374 +
16375 +extern int vc_set_dlimit_x32(uint32_t, void __user *);
16376 +extern int vc_get_dlimit_x32(uint32_t, void __user *);
16377 +
16378 +#endif /* CONFIG_COMPAT */
16379 +
16380 +#endif /* __KERNEL__ */
16381 +#endif /* _VX_DLIMIT_CMD_H */
16382 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/global.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/global.h
16383 --- linux-2.6.16.20/include/linux/vserver/global.h      1970-01-01 01:00:00 +0100
16384 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/global.h 2006-04-26 19:07:00 +0200
16385 @@ -0,0 +1,8 @@
16386 +#ifndef _VX_GLOBAL_H
16387 +#define _VX_GLOBAL_H
16388 +
16389 +
16390 +extern atomic_t vx_global_ctotal;
16391 +extern atomic_t vx_global_cactive;
16392 +
16393 +#endif /* _VX_GLOBAL_H */
16394 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/history.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/history.h
16395 --- linux-2.6.16.20/include/linux/vserver/history.h     1970-01-01 01:00:00 +0100
16396 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/history.h        2006-04-26 19:07:00 +0200
16397 @@ -0,0 +1,196 @@
16398 +#ifndef _VX_HISTORY_H
16399 +#define _VX_HISTORY_H
16400 +
16401 +#ifdef CONFIG_VSERVER_HISTORY
16402 +
16403 +extern unsigned volatile int vxh_active;
16404 +
16405 +struct _vxhe_vxi {
16406 +       struct vx_info *ptr;
16407 +       unsigned xid;
16408 +       unsigned usecnt;
16409 +       unsigned tasks;
16410 +};
16411 +
16412 +struct _vxhe_set_clr {
16413 +       void *data;
16414 +};
16415 +
16416 +struct _vxhe_loc_lookup {
16417 +       unsigned arg;
16418 +};
16419 +
16420 +enum {
16421 +       VXH_UNUSED=0,
16422 +       VXH_THROW_OOPS=1,
16423 +
16424 +       VXH_GET_VX_INFO,
16425 +       VXH_PUT_VX_INFO,
16426 +       VXH_INIT_VX_INFO,
16427 +       VXH_SET_VX_INFO,
16428 +       VXH_CLR_VX_INFO,
16429 +       VXH_CLAIM_VX_INFO,
16430 +       VXH_RELEASE_VX_INFO,
16431 +       VXH_ALLOC_VX_INFO,
16432 +       VXH_DEALLOC_VX_INFO,
16433 +       VXH_HASH_VX_INFO,
16434 +       VXH_UNHASH_VX_INFO,
16435 +       VXH_LOC_VX_INFO,
16436 +       VXH_LOOKUP_VX_INFO,
16437 +       VXH_CREATE_VX_INFO,
16438 +};
16439 +
16440 +struct _vx_hist_entry {
16441 +       void *loc;
16442 +       unsigned short seq;
16443 +       unsigned short type;
16444 +       struct _vxhe_vxi vxi;
16445 +       union {
16446 +               struct _vxhe_set_clr sc;
16447 +               struct _vxhe_loc_lookup ll;
16448 +       };
16449 +};
16450 +
16451 +struct _vx_hist_entry *vxh_advance(void *loc);
16452 +
16453 +
16454 +static inline
16455 +void   __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
16456 +{
16457 +       entry->vxi.ptr = vxi;
16458 +       if (vxi) {
16459 +               entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
16460 +               entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
16461 +               entry->vxi.xid = vxi->vx_id;
16462 +       }
16463 +}
16464 +
16465 +
16466 +#define        __HERE__ current_text_addr()
16467 +
16468 +#define __VXH_BODY(__type, __data, __here)     \
16469 +       struct _vx_hist_entry *entry;           \
16470 +                                               \
16471 +       preempt_disable();                      \
16472 +       entry = vxh_advance(__here);            \
16473 +       __data;                                 \
16474 +       entry->type = __type;                   \
16475 +       preempt_enable();
16476 +
16477 +
16478 +       /* pass vxi only */
16479 +
16480 +#define __VXH_SMPL                             \
16481 +       __vxh_copy_vxi(entry, vxi)
16482 +
16483 +static inline
16484 +void   __vxh_smpl(struct vx_info *vxi, int __type, void *__here)
16485 +{
16486 +       __VXH_BODY(__type, __VXH_SMPL, __here)
16487 +}
16488 +
16489 +       /* pass vxi and data (void *) */
16490 +
16491 +#define __VXH_DATA                             \
16492 +       __vxh_copy_vxi(entry, vxi);             \
16493 +       entry->sc.data = data
16494 +
16495 +static inline
16496 +void   __vxh_data(struct vx_info *vxi, void *data,
16497 +                       int __type, void *__here)
16498 +{
16499 +       __VXH_BODY(__type, __VXH_DATA, __here)
16500 +}
16501 +
16502 +       /* pass vxi and arg (long) */
16503 +
16504 +#define __VXH_LONG                             \
16505 +       __vxh_copy_vxi(entry, vxi);             \
16506 +       entry->ll.arg = arg
16507 +
16508 +static inline
16509 +void   __vxh_long(struct vx_info *vxi, long arg,
16510 +                       int __type, void *__here)
16511 +{
16512 +       __VXH_BODY(__type, __VXH_LONG, __here)
16513 +}
16514 +
16515 +
16516 +static inline
16517 +void   __vxh_throw_oops(void *__here)
16518 +{
16519 +       __VXH_BODY(VXH_THROW_OOPS, {}, __here);
16520 +       /* prevent further acquisition */
16521 +       vxh_active = 0;
16522 +}
16523 +
16524 +
16525 +#define vxh_throw_oops()       __vxh_throw_oops(__HERE__);
16526 +
16527 +#define __vxh_get_vx_info(v,h) __vxh_smpl(v, VXH_GET_VX_INFO, h);
16528 +#define __vxh_put_vx_info(v,h) __vxh_smpl(v, VXH_PUT_VX_INFO, h);
16529 +
16530 +#define __vxh_init_vx_info(v,d,h) \
16531 +       __vxh_data(v,d, VXH_INIT_VX_INFO, h);
16532 +#define __vxh_set_vx_info(v,d,h) \
16533 +       __vxh_data(v,d, VXH_SET_VX_INFO, h);
16534 +#define __vxh_clr_vx_info(v,d,h) \
16535 +       __vxh_data(v,d, VXH_CLR_VX_INFO, h);
16536 +
16537 +#define __vxh_claim_vx_info(v,d,h) \
16538 +       __vxh_data(v,d, VXH_CLAIM_VX_INFO, h);
16539 +#define __vxh_release_vx_info(v,d,h) \
16540 +       __vxh_data(v,d, VXH_RELEASE_VX_INFO, h);
16541 +
16542 +#define vxh_alloc_vx_info(v) \
16543 +       __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
16544 +#define vxh_dealloc_vx_info(v) \
16545 +       __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
16546 +
16547 +#define vxh_hash_vx_info(v) \
16548 +       __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
16549 +#define vxh_unhash_vx_info(v) \
16550 +       __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
16551 +
16552 +#define vxh_loc_vx_info(v,l) \
16553 +       __vxh_long(v,l, VXH_LOC_VX_INFO, __HERE__);
16554 +#define vxh_lookup_vx_info(v,l) \
16555 +       __vxh_long(v,l, VXH_LOOKUP_VX_INFO, __HERE__);
16556 +#define vxh_create_vx_info(v,l) \
16557 +       __vxh_long(v,l, VXH_CREATE_VX_INFO, __HERE__);
16558 +
16559 +extern void vxh_dump_history(void);
16560 +
16561 +
16562 +#else  /* CONFIG_VSERVER_HISTORY */
16563 +
16564 +#define        __HERE__        0
16565 +
16566 +#define vxh_throw_oops()               do { } while (0)
16567 +
16568 +#define __vxh_get_vx_info(v,h)         do { } while (0)
16569 +#define __vxh_put_vx_info(v,h)         do { } while (0)
16570 +
16571 +#define __vxh_init_vx_info(v,d,h)      do { } while (0)
16572 +#define __vxh_set_vx_info(v,d,h)       do { } while (0)
16573 +#define __vxh_clr_vx_info(v,d,h)       do { } while (0)
16574 +
16575 +#define __vxh_claim_vx_info(v,d,h)     do { } while (0)
16576 +#define __vxh_release_vx_info(v,d,h)   do { } while (0)
16577 +
16578 +#define vxh_alloc_vx_info(v)           do { } while (0)
16579 +#define vxh_dealloc_vx_info(v)         do { } while (0)
16580 +
16581 +#define vxh_hash_vx_info(v)            do { } while (0)
16582 +#define vxh_unhash_vx_info(v)          do { } while (0)
16583 +
16584 +#define vxh_loc_vx_info(a,v)           do { } while (0)
16585 +#define vxh_lookup_vx_info(a,v)                do { } while (0)
16586 +#define vxh_create_vx_info(a,v)                do { } while (0)
16587 +
16588 +#define vxh_dump_history()             do { } while (0)
16589 +
16590 +
16591 +#endif /* CONFIG_VSERVER_HISTORY */
16592 +
16593 +#endif /* _VX_HISTORY_H */
16594 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/inode.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/inode.h
16595 --- linux-2.6.16.20/include/linux/vserver/inode.h       1970-01-01 01:00:00 +0100
16596 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/inode.h  2006-04-26 19:07:00 +0200
16597 @@ -0,0 +1,38 @@
16598 +#ifndef _VX_INODE_H
16599 +#define _VX_INODE_H
16600 +
16601 +
16602 +#define IATTR_TAG      0x01000000
16603 +
16604 +#define IATTR_ADMIN    0x00000001
16605 +#define IATTR_WATCH    0x00000002
16606 +#define IATTR_HIDE     0x00000004
16607 +#define IATTR_FLAGS    0x00000007
16608 +
16609 +#define IATTR_BARRIER  0x00010000
16610 +#define IATTR_IUNLINK  0x00020000
16611 +#define IATTR_IMMUTABLE 0x00040000
16612 +
16613 +#ifdef __KERNEL__
16614 +
16615 +
16616 +#ifdef CONFIG_VSERVER_PROC_SECURE
16617 +#define IATTR_PROC_DEFAULT     ( IATTR_ADMIN | IATTR_HIDE )
16618 +#define IATTR_PROC_SYMLINK     ( IATTR_ADMIN )
16619 +#else
16620 +#define IATTR_PROC_DEFAULT     ( IATTR_ADMIN )
16621 +#define IATTR_PROC_SYMLINK     ( IATTR_ADMIN )
16622 +#endif
16623 +
16624 +#define vx_hide_check(c,m)     (((m) & IATTR_HIDE) ? vx_check(c,m) : 1)
16625 +
16626 +#endif /* __KERNEL__ */
16627 +
16628 +/* inode ioctls */
16629 +
16630 +#define FIOC_GETXFLG   _IOR('x', 5, long)
16631 +#define FIOC_SETXFLG   _IOW('x', 6, long)
16632 +
16633 +#else  /* _VX_INODE_H */
16634 +#warning duplicate inclusion
16635 +#endif /* _VX_INODE_H */
16636 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/inode_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/inode_cmd.h
16637 --- linux-2.6.16.20/include/linux/vserver/inode_cmd.h   1970-01-01 01:00:00 +0100
16638 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/inode_cmd.h      2006-04-26 19:07:00 +0200
16639 @@ -0,0 +1,59 @@
16640 +#ifndef _VX_INODE_CMD_H
16641 +#define _VX_INODE_CMD_H
16642 +
16643 +
16644 +/*  inode vserver commands */
16645 +
16646 +#define VCMD_get_iattr_v0      VC_CMD(INODE, 1, 0)
16647 +#define VCMD_set_iattr_v0      VC_CMD(INODE, 2, 0)
16648 +
16649 +#define VCMD_get_iattr         VC_CMD(INODE, 1, 1)
16650 +#define VCMD_set_iattr         VC_CMD(INODE, 2, 1)
16651 +
16652 +struct vcmd_ctx_iattr_v0 {
16653 +       /* device handle in id */
16654 +       uint64_t ino;
16655 +       uint32_t xid;
16656 +       uint32_t flags;
16657 +       uint32_t mask;
16658 +};
16659 +
16660 +struct vcmd_ctx_iattr_v1 {
16661 +       const char __user *name;
16662 +       uint32_t xid;
16663 +       uint32_t flags;
16664 +       uint32_t mask;
16665 +};
16666 +
16667 +
16668 +#ifdef __KERNEL__
16669 +
16670 +
16671 +#ifdef CONFIG_COMPAT
16672 +
16673 +struct vcmd_ctx_iattr_v1_x32 {
16674 +       compat_uptr_t name_ptr;
16675 +       uint32_t xid;
16676 +       uint32_t flags;
16677 +       uint32_t mask;
16678 +};
16679 +
16680 +#endif /* CONFIG_COMPAT */
16681 +
16682 +#include <linux/compiler.h>
16683 +
16684 +extern int vc_get_iattr_v0(uint32_t, void __user *);
16685 +extern int vc_set_iattr_v0(uint32_t, void __user *);
16686 +
16687 +extern int vc_get_iattr(uint32_t, void __user *);
16688 +extern int vc_set_iattr(uint32_t, void __user *);
16689 +
16690 +#ifdef CONFIG_COMPAT
16691 +
16692 +extern int vc_get_iattr_x32(uint32_t, void __user *);
16693 +extern int vc_set_iattr_x32(uint32_t, void __user *);
16694 +
16695 +#endif /* CONFIG_COMPAT */
16696 +
16697 +#endif /* __KERNEL__ */
16698 +#endif /* _VX_INODE_CMD_H */
16699 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/legacy.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/legacy.h
16700 --- linux-2.6.16.20/include/linux/vserver/legacy.h      1970-01-01 01:00:00 +0100
16701 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/legacy.h 2006-04-26 19:07:00 +0200
16702 @@ -0,0 +1,49 @@
16703 +#ifndef _VX_LEGACY_H
16704 +#define _VX_LEGACY_H
16705 +
16706 +#include "switch.h"
16707 +
16708 +
16709 +/*  compatibiliy vserver commands */
16710 +
16711 +#define VCMD_new_s_context     VC_CMD(COMPAT, 1, 1)
16712 +#define VCMD_set_ipv4root      VC_CMD(COMPAT, 2, 3)
16713 +
16714 +#define VCMD_create_context    VC_CMD(VSETUP, 1, 0)
16715 +
16716 +/*  compatibiliy vserver arguments */
16717 +
16718 +struct vcmd_new_s_context_v1 {
16719 +       uint32_t remove_cap;
16720 +       uint32_t flags;
16721 +};
16722 +
16723 +struct vcmd_set_ipv4root_v3 {
16724 +       /* number of pairs in id */
16725 +       uint32_t broadcast;
16726 +       struct {
16727 +               uint32_t ip;
16728 +               uint32_t mask;
16729 +       } nx_mask_pair[NB_IPV4ROOT];
16730 +};
16731 +
16732 +
16733 +#define VX_INFO_LOCK           1       /* Can't request a new vx_id */
16734 +#define VX_INFO_NPROC          4       /* Limit number of processes in a context */
16735 +#define VX_INFO_PRIVATE                8       /* Noone can join this security context */
16736 +#define VX_INFO_INIT           16      /* This process wants to become the */
16737 +                                       /* logical process 1 of the security */
16738 +                                       /* context */
16739 +#define VX_INFO_HIDEINFO       32      /* Hide some information in /proc */
16740 +#define VX_INFO_ULIMIT         64      /* Use ulimit of the current process */
16741 +                                       /* to become the global limits */
16742 +                                       /* of the context */
16743 +#define VX_INFO_NAMESPACE      128     /* save private namespace */
16744 +
16745 +
16746 +#ifdef __KERNEL__
16747 +extern int vc_new_s_context(uint32_t, void __user *);
16748 +extern int vc_set_ipv4root(uint32_t, void __user *);
16749 +
16750 +#endif /* __KERNEL__ */
16751 +#endif /* _VX_LEGACY_H */
16752 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/limit.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit.h
16753 --- linux-2.6.16.20/include/linux/vserver/limit.h       1970-01-01 01:00:00 +0100
16754 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit.h  2006-05-02 02:06:16 +0200
16755 @@ -0,0 +1,64 @@
16756 +#ifndef _VX_LIMIT_H
16757 +#define _VX_LIMIT_H
16758 +
16759 +
16760 +#define VLIMIT_NSOCK   16
16761 +#define VLIMIT_OPENFD  17
16762 +#define VLIMIT_ANON    18
16763 +#define VLIMIT_SHMEM   19
16764 +#define VLIMIT_SEMARY  20
16765 +#define VLIMIT_NSEMS   21
16766 +#define VLIMIT_DENTRY  22
16767 +
16768 +#ifdef __KERNEL__
16769 +
16770 +/*     keep in sync with CRLIM_INFINITY */
16771 +
16772 +#define        VLIM_INFINITY   (~0ULL)
16773 +
16774 +#ifndef RLIM_INFINITY
16775 +#warning RLIM_INFINITY is undefined
16776 +#endif
16777 +
16778 +#define __rlim_val(l,r,v)      ((l)->res[(r)].v)
16779 +
16780 +#define __rlim_soft(l,r)       __rlim_val(l,r,soft)
16781 +#define __rlim_hard(l,r)       __rlim_val(l,r,hard)
16782 +
16783 +#define __rlim_rcur(l,r)       __rlim_val(l,r,rcur)
16784 +#define __rlim_rmin(l,r)       __rlim_val(l,r,rmin)
16785 +#define __rlim_rmax(l,r)       __rlim_val(l,r,rmax)
16786 +
16787 +#define __rlim_lhit(l,r)       __rlim_val(l,r,lhit)
16788 +#define __rlim_hit(l,r)                atomic_inc(&__rlim_lhit(l,r))
16789 +
16790 +typedef atomic_long_t rlim_atomic_t;
16791 +typedef unsigned long rlim_t;
16792 +
16793 +#define __rlim_get(l,r)                atomic_long_read(&__rlim_rcur(l,r))
16794 +#define __rlim_set(l,r,v)      atomic_long_set(&__rlim_rcur(l,r), v)
16795 +#define __rlim_inc(l,r)                atomic_long_inc(&__rlim_rcur(l,r))
16796 +#define __rlim_dec(l,r)                atomic_long_dec(&__rlim_rcur(l,r))
16797 +#define __rlim_add(l,r,v)      atomic_long_add(v, &__rlim_rcur(l,r))
16798 +#define __rlim_sub(l,r,v)      atomic_long_sub(v, &__rlim_rcur(l,r))
16799 +
16800 +
16801 +#if    (RLIM_INFINITY == VLIM_INFINITY)
16802 +#define        VX_VLIM(r) ((long long)(long)(r))
16803 +#define        VX_RLIM(v) ((rlim_t)(v))
16804 +#else
16805 +#define        VX_VLIM(r) (((r) == RLIM_INFINITY) \
16806 +               ? VLIM_INFINITY : (long long)(r))
16807 +#define        VX_RLIM(v) (((v) == VLIM_INFINITY) \
16808 +               ? RLIM_INFINITY : (rlim_t)(v))
16809 +#endif
16810 +
16811 +struct sysinfo;
16812 +
16813 +void vx_vsi_meminfo(struct sysinfo *);
16814 +void vx_vsi_swapinfo(struct sysinfo *);
16815 +
16816 +#define NUM_LIMITS     24
16817 +
16818 +#endif /* __KERNEL__ */
16819 +#endif /* _VX_LIMIT_H */
16820 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/limit_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_cmd.h
16821 --- linux-2.6.16.20/include/linux/vserver/limit_cmd.h   1970-01-01 01:00:00 +0100
16822 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_cmd.h      2006-04-26 19:07:00 +0200
16823 @@ -0,0 +1,55 @@
16824 +#ifndef _VX_LIMIT_CMD_H
16825 +#define _VX_LIMIT_CMD_H
16826 +
16827 +
16828 +/*  rlimit vserver commands */
16829 +
16830 +#define VCMD_get_rlimit                VC_CMD(RLIMIT, 1, 0)
16831 +#define VCMD_set_rlimit                VC_CMD(RLIMIT, 2, 0)
16832 +#define VCMD_get_rlimit_mask   VC_CMD(RLIMIT, 3, 0)
16833 +
16834 +struct vcmd_ctx_rlimit_v0 {
16835 +       uint32_t id;
16836 +       uint64_t minimum;
16837 +       uint64_t softlimit;
16838 +       uint64_t maximum;
16839 +};
16840 +
16841 +struct vcmd_ctx_rlimit_mask_v0 {
16842 +       uint32_t minimum;
16843 +       uint32_t softlimit;
16844 +       uint32_t maximum;
16845 +};
16846 +
16847 +#define CRLIM_UNSET            (0ULL)
16848 +#define CRLIM_INFINITY         (~0ULL)
16849 +#define CRLIM_KEEP             (~1ULL)
16850 +
16851 +#ifdef __KERNEL__
16852 +
16853 +#ifdef CONFIG_IA32_EMULATION
16854 +
16855 +struct vcmd_ctx_rlimit_v0_x32 {
16856 +       uint32_t id;
16857 +       uint64_t minimum;
16858 +       uint64_t softlimit;
16859 +       uint64_t maximum;
16860 +} __attribute__ ((aligned (4)));
16861 +
16862 +#endif /* CONFIG_IA32_EMULATION */
16863 +
16864 +#include <linux/compiler.h>
16865 +
16866 +extern int vc_get_rlimit(uint32_t, void __user *);
16867 +extern int vc_set_rlimit(uint32_t, void __user *);
16868 +extern int vc_get_rlimit_mask(uint32_t, void __user *);
16869 +
16870 +#ifdef CONFIG_IA32_EMULATION
16871 +
16872 +extern int vc_get_rlimit_x32(uint32_t, void __user *);
16873 +extern int vc_set_rlimit_x32(uint32_t, void __user *);
16874 +
16875 +#endif /* CONFIG_IA32_EMULATION */
16876 +
16877 +#endif /* __KERNEL__ */
16878 +#endif /* _VX_LIMIT_CMD_H */
16879 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/limit_def.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_def.h
16880 --- linux-2.6.16.20/include/linux/vserver/limit_def.h   1970-01-01 01:00:00 +0100
16881 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_def.h      2006-04-26 19:07:00 +0200
16882 @@ -0,0 +1,47 @@
16883 +#ifndef _VX_LIMIT_DEF_H
16884 +#define _VX_LIMIT_DEF_H
16885 +
16886 +#include <asm/atomic.h>
16887 +#include <asm/resource.h>
16888 +
16889 +#include "limit.h"
16890 +
16891 +
16892 +struct _vx_res_limit {
16893 +       rlim_t soft;            /* Context soft limit */
16894 +       rlim_t hard;            /* Context hard limit */
16895 +
16896 +       rlim_atomic_t rcur;     /* Current value */
16897 +       rlim_t rmin;            /* Context minimum */
16898 +       rlim_t rmax;            /* Context maximum */
16899 +
16900 +       atomic_t lhit;          /* Limit hits */
16901 +};
16902 +
16903 +/* context sub struct */
16904 +
16905 +struct _vx_limit {
16906 +       struct _vx_res_limit res[NUM_LIMITS];
16907 +};
16908 +
16909 +#ifdef CONFIG_VSERVER_DEBUG
16910 +
16911 +static inline void __dump_vx_limit(struct _vx_limit *limit)
16912 +{
16913 +       int i;
16914 +
16915 +       printk("\t_vx_limit:");
16916 +       for (i=0; i<NUM_LIMITS; i++) {
16917 +               printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
16918 +                       i, (unsigned long)__rlim_get(limit, i),
16919 +                       (unsigned long)__rlim_rmin(limit, i),
16920 +                       (unsigned long)__rlim_rmax(limit, i),
16921 +                       (long)__rlim_soft(limit, i),
16922 +                       (long)__rlim_hard(limit, i),
16923 +                       atomic_read(&__rlim_lhit(limit, i)));
16924 +       }
16925 +}
16926 +
16927 +#endif
16928 +
16929 +#endif /* _VX_LIMIT_DEF_H */
16930 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/limit_int.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_int.h
16931 --- linux-2.6.16.20/include/linux/vserver/limit_int.h   1970-01-01 01:00:00 +0100
16932 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/limit_int.h      2006-04-26 19:07:00 +0200
16933 @@ -0,0 +1,83 @@
16934 +#ifndef _VX_LIMIT_INT_H
16935 +#define _VX_LIMIT_INT_H
16936 +
16937 +
16938 +#ifdef __KERNEL__
16939 +
16940 +#define VXD_RCRES_COND(r)      VXD_CBIT(cres, (r))
16941 +#define VXD_RLIMIT_COND(r)     VXD_CBIT(limit, (r))
16942 +
16943 +extern const char *vlimit_name[NUM_LIMITS];
16944 +
16945 +static inline void __vx_acc_cres(struct vx_info *vxi,
16946 +       int res, int dir, void *_data, char *_file, int _line)
16947 +{
16948 +       if (VXD_RCRES_COND(res))
16949 +               vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
16950 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
16951 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
16952 +                       (dir > 0) ? "++" : "--", _data, _file, _line);
16953 +       if (!vxi)
16954 +               return;
16955 +
16956 +       if (dir > 0)
16957 +               __rlim_inc(&vxi->limit, res);
16958 +       else
16959 +               __rlim_dec(&vxi->limit, res);
16960 +}
16961 +
16962 +static inline void __vx_add_cres(struct vx_info *vxi,
16963 +       int res, int amount, void *_data, char *_file, int _line)
16964 +{
16965 +       if (VXD_RCRES_COND(res))
16966 +               vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
16967 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
16968 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
16969 +                       amount, _data, _file, _line);
16970 +       if (amount == 0)
16971 +               return;
16972 +       if (!vxi)
16973 +               return;
16974 +       __rlim_add(&vxi->limit, res, amount);
16975 +}
16976 +
16977 +static inline int __vx_cres_avail(struct vx_info *vxi,
16978 +               int res, int num, char *_file, int _line)
16979 +{
16980 +       rlim_t value;
16981 +
16982 +       if (VXD_RLIMIT_COND(res))
16983 +               vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
16984 +                       (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
16985 +                       (vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
16986 +                       (vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
16987 +                       (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
16988 +                       num, _file, _line);
16989 +       if (num == 0)
16990 +               return 1;
16991 +       if (!vxi)
16992 +               return 1;
16993 +
16994 +       value = __rlim_get(&vxi->limit, res);
16995 +
16996 +       if (value > __rlim_rmax(&vxi->limit, res))
16997 +               __rlim_rmax(&vxi->limit, res) = value;
16998 +       else if (value < __rlim_rmin(&vxi->limit, res))
16999 +               __rlim_rmin(&vxi->limit, res) = value;
17000 +
17001 +       if (__rlim_soft(&vxi->limit, res) == RLIM_INFINITY)
17002 +               return -1;
17003 +       if (value + num <= __rlim_soft(&vxi->limit, res))
17004 +               return -1;
17005 +
17006 +       if (__rlim_hard(&vxi->limit, res) == RLIM_INFINITY)
17007 +               return 1;
17008 +       if (value + num <= __rlim_hard(&vxi->limit, res))
17009 +               return 1;
17010 +
17011 +       __rlim_hit(&vxi->limit, res);
17012 +       return 0;
17013 +}
17014 +
17015 +#endif /* __KERNEL__ */
17016 +#endif /* _VX_LIMIT_INT_H */
17017 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/monitor.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/monitor.h
17018 --- linux-2.6.16.20/include/linux/vserver/monitor.h     1970-01-01 01:00:00 +0100
17019 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/monitor.h        2006-04-26 19:07:00 +0200
17020 @@ -0,0 +1,97 @@
17021 +#ifndef _VX_MONITOR_H
17022 +#define _VX_MONITOR_H
17023 +
17024 +#include <linux/config.h>
17025 +
17026 +
17027 +enum {
17028 +       VXM_UNUSED = 0,
17029 +
17030 +       VXM_SYNC = 0x10,
17031 +
17032 +       VXM_UPDATE = 0x20,
17033 +       VXM_UPDATE_1,
17034 +       VXM_UPDATE_2,
17035 +
17036 +       VXM_RQINFO_1 = 0x24,
17037 +       VXM_RQINFO_2,
17038 +
17039 +       VXM_ACTIVATE = 0x40,
17040 +       VXM_DEACTIVATE,
17041 +       VXM_IDLE,
17042 +
17043 +       VXM_HOLD = 0x44,
17044 +       VXM_UNHOLD,
17045 +
17046 +       VXM_MIGRATE = 0x48,
17047 +       VXM_RESCHED,
17048 +
17049 +       /* all other bits are flags */
17050 +       VXM_SCHED = 0x80,
17051 +};
17052 +
17053 +struct _vxm_update_1 {
17054 +       uint32_t tokens_max;
17055 +       uint32_t fill_rate;
17056 +       uint32_t interval;
17057 +};
17058 +
17059 +struct _vxm_update_2 {
17060 +       uint32_t tokens_min;
17061 +       uint32_t fill_rate;
17062 +       uint32_t interval;
17063 +};
17064 +
17065 +struct _vxm_rqinfo_1 {
17066 +       uint16_t running;
17067 +       uint16_t onhold;
17068 +       uint16_t iowait;
17069 +       uint16_t uintr;
17070 +       uint32_t idle_tokens;
17071 +};
17072 +
17073 +struct _vxm_rqinfo_2 {
17074 +       uint32_t norm_time;
17075 +       uint32_t idle_time;
17076 +       uint32_t idle_skip;
17077 +};
17078 +
17079 +struct _vxm_sched {
17080 +       uint32_t tokens;
17081 +       uint32_t norm_time;
17082 +       uint32_t idle_time;
17083 +};
17084 +
17085 +struct _vxm_task {
17086 +       uint16_t pid;
17087 +       uint16_t state;
17088 +};
17089 +
17090 +struct _vxm_event {
17091 +       uint32_t jif;
17092 +       union {
17093 +               uint32_t seq;
17094 +               uint32_t sec;
17095 +       };
17096 +       union {
17097 +               uint32_t tokens;
17098 +               uint32_t nsec;
17099 +               struct _vxm_task tsk;
17100 +       };
17101 +};
17102 +
17103 +struct _vx_mon_entry {
17104 +       uint16_t type;
17105 +       uint16_t xid;
17106 +       union {
17107 +               struct _vxm_event ev;
17108 +               struct _vxm_sched sd;
17109 +               struct _vxm_update_1 u1;
17110 +               struct _vxm_update_2 u2;
17111 +               struct _vxm_rqinfo_1 q1;
17112 +               struct _vxm_rqinfo_2 q2;
17113 +       };
17114 +};
17115 +
17116 +
17117 +#endif /* _VX_MONITOR_H */
17118 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/namespace.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/namespace.h
17119 --- linux-2.6.16.20/include/linux/vserver/namespace.h   1970-01-01 01:00:00 +0100
17120 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/namespace.h      2006-04-26 19:07:00 +0200
17121 @@ -0,0 +1,15 @@
17122 +#ifndef _VX_NAMESPACE_H
17123 +#define _VX_NAMESPACE_H
17124 +
17125 +
17126 +#include <linux/types.h>
17127 +
17128 +struct vx_info;
17129 +struct namespace;
17130 +struct fs_struct;
17131 +
17132 +extern int vx_set_namespace(struct vx_info *, struct namespace *, struct fs_struct *);
17133 +
17134 +#else  /* _VX_NAMESPACE_H */
17135 +#warning duplicate inclusion
17136 +#endif /* _VX_NAMESPACE_H */
17137 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/namespace_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/namespace_cmd.h
17138 --- linux-2.6.16.20/include/linux/vserver/namespace_cmd.h       1970-01-01 01:00:00 +0100
17139 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/namespace_cmd.h  2006-04-26 19:07:00 +0200
17140 @@ -0,0 +1,19 @@
17141 +#ifndef _VX_NAMESPACE_CMD_H
17142 +#define _VX_NAMESPACE_CMD_H
17143 +
17144 +
17145 +#define VCMD_enter_namespace   VC_CMD(PROCALT, 1, 0)
17146 +#define VCMD_cleanup_namespace VC_CMD(PROCALT, 2, 0)
17147 +
17148 +#define VCMD_set_namespace_v0  VC_CMD(PROCALT, 3, 0)
17149 +#define VCMD_set_namespace     VC_CMD(PROCALT, 3, 1)
17150 +
17151 +
17152 +#ifdef __KERNEL__
17153 +
17154 +extern int vc_enter_namespace(uint32_t, void __user *);
17155 +extern int vc_cleanup_namespace(uint32_t, void __user *);
17156 +extern int vc_set_namespace(uint32_t, void __user *);
17157 +
17158 +#endif /* __KERNEL__ */
17159 +#endif /* _VX_NAMESPACE_CMD_H */
17160 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/network.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/network.h
17161 --- linux-2.6.16.20/include/linux/vserver/network.h     1970-01-01 01:00:00 +0100
17162 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/network.h        2006-04-27 20:28:48 +0200
17163 @@ -0,0 +1,139 @@
17164 +#ifndef _VX_NETWORK_H
17165 +#define _VX_NETWORK_H
17166 +
17167 +#include <linux/types.h>
17168 +
17169 +
17170 +#define MAX_N_CONTEXT  65535   /* Arbitrary limit */
17171 +
17172 +#define NX_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
17173 +
17174 +#define NB_IPV4ROOT    16
17175 +
17176 +
17177 +/* network flags */
17178 +
17179 +#define NXF_STATE_SETUP                (1ULL<<32)
17180 +
17181 +#define NXF_SC_HELPER          (1ULL<<36)
17182 +#define NXF_PERSISTENT         (1ULL<<38)
17183 +
17184 +#define NXF_ONE_TIME           (0x0001ULL<<32)
17185 +
17186 +#define NXF_INIT_SET           (0)
17187 +
17188 +
17189 +/* address types */
17190 +
17191 +#define NXA_TYPE_IPV4          1
17192 +#define NXA_TYPE_IPV6          2
17193 +
17194 +#define NXA_MOD_BCAST          (1<<8)
17195 +
17196 +#define NXA_TYPE_ANY           (~0)
17197 +
17198 +
17199 +#ifdef __KERNEL__
17200 +
17201 +#include <linux/list.h>
17202 +#include <linux/spinlock.h>
17203 +#include <linux/rcupdate.h>
17204 +#include <asm/atomic.h>
17205 +
17206 +
17207 +struct nx_info {
17208 +       struct hlist_node nx_hlist;     /* linked list of nxinfos */
17209 +       nid_t nx_id;                    /* vnet id */
17210 +       atomic_t nx_usecnt;             /* usage count */
17211 +       atomic_t nx_tasks;              /* tasks count */
17212 +       int nx_state;                   /* context state */
17213 +
17214 +       uint64_t nx_flags;              /* network flag word */
17215 +       uint64_t nx_ncaps;              /* network capabilities */
17216 +
17217 +       int nbipv4;
17218 +       __u32 ipv4[NB_IPV4ROOT];        /* Process can only bind to these IPs */
17219 +                                       /* The first one is used to connect */
17220 +                                       /* and for bind any service */
17221 +                                       /* The other must be used explicity */
17222 +       __u32 mask[NB_IPV4ROOT];        /* Netmask for each ipv4 */
17223 +                                       /* Used to select the proper source */
17224 +                                       /* address for sockets */
17225 +       __u32 v4_bcast;                 /* Broadcast address to receive UDP  */
17226 +
17227 +       char nx_name[65];               /* network context name */
17228 +};
17229 +
17230 +
17231 +/* status flags */
17232 +
17233 +#define NXS_HASHED      0x0001
17234 +#define NXS_SHUTDOWN    0x0100
17235 +#define NXS_RELEASED    0x8000
17236 +
17237 +/* check conditions */
17238 +
17239 +#define NX_ADMIN       0x0001
17240 +#define NX_WATCH       0x0002
17241 +#define NX_BLEND       0x0004
17242 +#define NX_HOSTID      0x0008
17243 +
17244 +#define NX_IDENT       0x0010
17245 +#define NX_EQUIV       0x0020
17246 +#define NX_PARENT      0x0040
17247 +#define NX_CHILD       0x0080
17248 +
17249 +#define NX_ARG_MASK    0x00F0
17250 +
17251 +#define NX_DYNAMIC     0x0100
17252 +#define NX_STATIC      0x0200
17253 +
17254 +#define NX_ATR_MASK    0x0F00
17255 +
17256 +
17257 +extern struct nx_info *lookup_nx_info(int);
17258 +
17259 +extern int get_nid_list(int, unsigned int *, int);
17260 +extern int nid_is_hashed(nid_t);
17261 +
17262 +extern int nx_migrate_task(struct task_struct *, struct nx_info *);
17263 +
17264 +extern long vs_net_change(struct nx_info *, unsigned int);
17265 +
17266 +struct in_ifaddr;
17267 +struct net_device;
17268 +
17269 +#ifdef CONFIG_INET
17270 +int ifa_in_nx_info(struct in_ifaddr *, struct nx_info *);
17271 +int dev_in_nx_info(struct net_device *, struct nx_info *);
17272 +
17273 +#else /* CONFIG_INET */
17274 +static inline
17275 +int ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
17276 +{
17277 +       return 1;
17278 +}
17279 +
17280 +static inline
17281 +int dev_in_nx_info(struct net_device *d, struct nx_info *n)
17282 +{
17283 +       return 1;
17284 +}
17285 +#endif /* CONFIG_INET */
17286 +
17287 +struct sock;
17288 +
17289 +#ifdef CONFIG_INET
17290 +int nx_addr_conflict(struct nx_info *, uint32_t, struct sock *);
17291 +#else /* CONFIG_INET */
17292 +static inline
17293 +int nx_addr_conflict(struct nx_info *n, uint32_t a, struct sock *s)
17294 +{
17295 +       return 1;
17296 +}
17297 +#endif /* CONFIG_INET */
17298 +
17299 +#endif /* __KERNEL__ */
17300 +#else  /* _VX_NETWORK_H */
17301 +#warning duplicate inclusion
17302 +#endif /* _VX_NETWORK_H */
17303 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/network_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/network_cmd.h
17304 --- linux-2.6.16.20/include/linux/vserver/network_cmd.h 1970-01-01 01:00:00 +0100
17305 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/network_cmd.h    2006-04-26 19:07:00 +0200
17306 @@ -0,0 +1,89 @@
17307 +#ifndef _VX_NETWORK_CMD_H
17308 +#define _VX_NETWORK_CMD_H
17309 +
17310 +
17311 +/* vinfo commands */
17312 +
17313 +#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
17314 +
17315 +#ifdef __KERNEL__
17316 +extern int vc_task_nid(uint32_t, void __user *);
17317 +
17318 +#endif /* __KERNEL__ */
17319 +
17320 +#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
17321 +
17322 +struct vcmd_nx_info_v0 {
17323 +       uint32_t nid;
17324 +       /* more to come */
17325 +};
17326 +
17327 +#ifdef __KERNEL__
17328 +extern int vc_nx_info(uint32_t, void __user *);
17329 +
17330 +#endif /* __KERNEL__ */
17331 +
17332 +#define VCMD_net_create_v0     VC_CMD(VNET, 1, 0)
17333 +#define VCMD_net_create                VC_CMD(VNET, 1, 1)
17334 +
17335 +struct  vcmd_net_create {
17336 +       uint64_t flagword;
17337 +};
17338 +
17339 +#define VCMD_net_migrate       VC_CMD(NETMIG, 1, 0)
17340 +
17341 +#define VCMD_net_add           VC_CMD(NETALT, 1, 0)
17342 +#define VCMD_net_remove                VC_CMD(NETALT, 2, 0)
17343 +
17344 +struct vcmd_net_addr_v0 {
17345 +       uint16_t type;
17346 +       uint16_t count;
17347 +       uint32_t ip[4];
17348 +       uint32_t mask[4];
17349 +       /* more to come */
17350 +};
17351 +
17352 +
17353 +#ifdef __KERNEL__
17354 +extern int vc_net_create(uint32_t, void __user *);
17355 +extern int vc_net_migrate(uint32_t, void __user *);
17356 +
17357 +extern int vc_net_add(uint32_t, void __user *);
17358 +extern int vc_net_remove(uint32_t, void __user *);
17359 +
17360 +#endif /* __KERNEL__ */
17361 +
17362 +
17363 +/* flag commands */
17364 +
17365 +#define VCMD_get_nflags                VC_CMD(FLAGS, 5, 0)
17366 +#define VCMD_set_nflags                VC_CMD(FLAGS, 6, 0)
17367 +
17368 +struct vcmd_net_flags_v0 {
17369 +       uint64_t flagword;
17370 +       uint64_t mask;
17371 +};
17372 +
17373 +#ifdef __KERNEL__
17374 +extern int vc_get_nflags(uint32_t, void __user *);
17375 +extern int vc_set_nflags(uint32_t, void __user *);
17376 +
17377 +#endif /* __KERNEL__ */
17378 +
17379 +
17380 +/* network caps commands */
17381 +
17382 +#define VCMD_get_ncaps         VC_CMD(FLAGS, 7, 0)
17383 +#define VCMD_set_ncaps         VC_CMD(FLAGS, 8, 0)
17384 +
17385 +struct vcmd_net_caps_v0 {
17386 +       uint64_t ncaps;
17387 +       uint64_t cmask;
17388 +};
17389 +
17390 +#ifdef __KERNEL__
17391 +extern int vc_get_ncaps(uint32_t, void __user *);
17392 +extern int vc_set_ncaps(uint32_t, void __user *);
17393 +
17394 +#endif /* __KERNEL__ */
17395 +#endif /* _VX_CONTEXT_CMD_H */
17396 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/sched.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched.h
17397 --- linux-2.6.16.20/include/linux/vserver/sched.h       1970-01-01 01:00:00 +0100
17398 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched.h  2006-04-26 19:07:00 +0200
17399 @@ -0,0 +1,26 @@
17400 +#ifndef _VX_SCHED_H
17401 +#define _VX_SCHED_H
17402 +
17403 +
17404 +#ifdef __KERNEL__
17405 +
17406 +struct timespec;
17407 +
17408 +void vx_vsi_uptime(struct timespec *, struct timespec *);
17409 +
17410 +
17411 +struct vx_info;
17412 +
17413 +void vx_update_load(struct vx_info *);
17414 +
17415 +
17416 +int vx_tokens_recalc(struct _vx_sched_pc *,
17417 +       unsigned long *, unsigned long *, int [2]);
17418 +
17419 +void vx_update_sched_param(struct _vx_sched *sched,
17420 +       struct _vx_sched_pc *sched_pc);
17421 +
17422 +#endif /* __KERNEL__ */
17423 +#else  /* _VX_SCHED_H */
17424 +#warning duplicate inclusion
17425 +#endif /* _VX_SCHED_H */
17426 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/sched_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched_cmd.h
17427 --- linux-2.6.16.20/include/linux/vserver/sched_cmd.h   1970-01-01 01:00:00 +0100
17428 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched_cmd.h      2006-04-26 19:07:00 +0200
17429 @@ -0,0 +1,72 @@
17430 +#ifndef _VX_SCHED_CMD_H
17431 +#define _VX_SCHED_CMD_H
17432 +
17433 +
17434 +/*  sched vserver commands */
17435 +
17436 +#define VCMD_set_sched_v2      VC_CMD(SCHED, 1, 2)
17437 +#define VCMD_set_sched_v3      VC_CMD(SCHED, 1, 3)
17438 +#define VCMD_set_sched         VC_CMD(SCHED, 1, 4)
17439 +
17440 +struct vcmd_set_sched_v2 {
17441 +       int32_t fill_rate;
17442 +       int32_t interval;
17443 +       int32_t tokens;
17444 +       int32_t tokens_min;
17445 +       int32_t tokens_max;
17446 +       uint64_t cpu_mask;
17447 +};
17448 +
17449 +struct vcmd_set_sched_v3 {
17450 +       uint32_t set_mask;
17451 +       int32_t fill_rate;
17452 +       int32_t interval;
17453 +       int32_t tokens;
17454 +       int32_t tokens_min;
17455 +       int32_t tokens_max;
17456 +       int32_t priority_bias;
17457 +};
17458 +
17459 +struct vcmd_set_sched_v4 {
17460 +       uint32_t set_mask;
17461 +       int32_t fill_rate;
17462 +       int32_t interval;
17463 +       int32_t tokens;
17464 +       int32_t tokens_min;
17465 +       int32_t tokens_max;
17466 +       int32_t prio_bias;
17467 +       int32_t cpu_id;
17468 +       int32_t bucket_id;
17469 +};
17470 +
17471 +
17472 +#define VXSM_FILL_RATE         0x0001
17473 +#define VXSM_INTERVAL          0x0002
17474 +#define VXSM_FILL_RATE2                0x0004
17475 +#define VXSM_INTERVAL2         0x0008
17476 +#define VXSM_TOKENS            0x0010
17477 +#define VXSM_TOKENS_MIN                0x0020
17478 +#define VXSM_TOKENS_MAX                0x0040
17479 +#define VXSM_PRIO_BIAS         0x0100
17480 +
17481 +#define VXSM_IDLE_TIME         0x0200
17482 +#define VXSM_FORCE             0x0400
17483 +
17484 +#define        VXSM_V3_MASK            0x0173
17485 +#define        VXSM_SET_MASK           0x01FF
17486 +
17487 +#define VXSM_CPU_ID            0x1000
17488 +#define VXSM_BUCKET_ID         0x2000
17489 +
17490 +#define SCHED_KEEP             (-2)    /* only for v2 */
17491 +
17492 +#ifdef __KERNEL__
17493 +
17494 +#include <linux/compiler.h>
17495 +
17496 +extern int vc_set_sched_v2(uint32_t, void __user *);
17497 +extern int vc_set_sched_v3(uint32_t, void __user *);
17498 +extern int vc_set_sched(uint32_t, void __user *);
17499 +
17500 +#endif /* __KERNEL__ */
17501 +#endif /* _VX_SCHED_CMD_H */
17502 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/sched_def.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched_def.h
17503 --- linux-2.6.16.20/include/linux/vserver/sched_def.h   1970-01-01 01:00:00 +0100
17504 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/sched_def.h      2006-04-26 19:07:00 +0200
17505 @@ -0,0 +1,67 @@
17506 +#ifndef _VX_SCHED_DEF_H
17507 +#define _VX_SCHED_DEF_H
17508 +
17509 +#include <linux/spinlock.h>
17510 +#include <linux/jiffies.h>
17511 +#include <linux/cpumask.h>
17512 +#include <asm/atomic.h>
17513 +#include <asm/param.h>
17514 +
17515 +
17516 +/* context sub struct */
17517 +
17518 +struct _vx_sched {
17519 +       spinlock_t tokens_lock;         /* lock for token bucket */
17520 +
17521 +       int tokens;                     /* number of CPU tokens */
17522 +       int fill_rate[2];               /* Fill rate: add X tokens... */
17523 +       int interval[2];                /* Divisor:   per Y jiffies   */
17524 +       int tokens_min;                 /* Limit:     minimum for unhold */
17525 +       int tokens_max;                 /* Limit:     no more than N tokens */
17526 +
17527 +       unsigned update_mask;           /* which features should be updated */
17528 +       cpumask_t update;               /* CPUs which should update */
17529 +
17530 +       int prio_bias;                  /* bias offset for priority */
17531 +       int vavavoom;                   /* last calculated vavavoom */
17532 +};
17533 +
17534 +struct _vx_sched_pc {
17535 +       int tokens;                     /* number of CPU tokens */
17536 +       int flags;                      /* bucket flags */
17537 +
17538 +       int fill_rate[2];               /* Fill rate: add X tokens... */
17539 +       int interval[2];                /* Divisor:   per Y jiffies   */
17540 +       int tokens_min;                 /* Limit:     minimum for unhold */
17541 +       int tokens_max;                 /* Limit:     no more than N tokens */
17542 +
17543 +       unsigned long norm_time;        /* last time accounted */
17544 +       unsigned long idle_time;        /* non linear time for fair sched */
17545 +       unsigned long token_time;       /* token time for accounting */
17546 +       unsigned long onhold;           /* jiffies when put on hold */
17547 +
17548 +       uint64_t user_ticks;            /* token tick events */
17549 +       uint64_t sys_ticks;             /* token tick events */
17550 +       uint64_t hold_ticks;            /* token ticks paused */
17551 +};
17552 +
17553 +
17554 +#define VXSF_ONHOLD    0x0001
17555 +#define VXSF_IDLE_TIME 0x0100
17556 +
17557 +#ifdef CONFIG_VSERVER_DEBUG
17558 +
17559 +static inline void __dump_vx_sched(struct _vx_sched *sched)
17560 +{
17561 +       printk("\t_vx_sched:\n");
17562 +       printk("\t tokens: %4d/%4d, %4d/%4d, %4d, %4d\n",
17563 +               sched->fill_rate[0], sched->interval[0],
17564 +               sched->fill_rate[1], sched->interval[1],
17565 +               sched->tokens_min, sched->tokens_max);
17566 +       printk("\t priority = %4d, %4d\n",
17567 +               sched->prio_bias, sched->vavavoom);
17568 +}
17569 +
17570 +#endif
17571 +
17572 +#endif /* _VX_SCHED_DEF_H */
17573 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/signal.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/signal.h
17574 --- linux-2.6.16.20/include/linux/vserver/signal.h      1970-01-01 01:00:00 +0100
17575 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/signal.h 2006-04-26 19:07:00 +0200
17576 @@ -0,0 +1,14 @@
17577 +#ifndef _VX_SIGNAL_H
17578 +#define _VX_SIGNAL_H
17579 +
17580 +
17581 +#ifdef __KERNEL__
17582 +
17583 +struct vx_info;
17584 +
17585 +int vx_info_kill(struct vx_info *, int, int);
17586 +
17587 +#endif /* __KERNEL__ */
17588 +#else  /* _VX_SIGNAL_H */
17589 +#warning duplicate inclusion
17590 +#endif /* _VX_SIGNAL_H */
17591 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/signal_cmd.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/signal_cmd.h
17592 --- linux-2.6.16.20/include/linux/vserver/signal_cmd.h  1970-01-01 01:00:00 +0100
17593 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/signal_cmd.h     2006-04-26 19:07:00 +0200
17594 @@ -0,0 +1,26 @@
17595 +#ifndef _VX_SIGNAL_CMD_H
17596 +#define _VX_SIGNAL_CMD_H
17597 +
17598 +
17599 +/*  signalling vserver commands */
17600 +
17601 +#define VCMD_ctx_kill          VC_CMD(PROCTRL, 1, 0)
17602 +#define VCMD_wait_exit         VC_CMD(EVENT, 99, 0)
17603 +
17604 +struct vcmd_ctx_kill_v0 {
17605 +       int32_t pid;
17606 +       int32_t sig;
17607 +};
17608 +
17609 +struct vcmd_wait_exit_v0 {
17610 +       int32_t reboot_cmd;
17611 +       int32_t exit_code;
17612 +};
17613 +
17614 +#ifdef __KERNEL__
17615 +
17616 +extern int vc_ctx_kill(uint32_t, void __user *);
17617 +extern int vc_wait_exit(uint32_t, void __user *);
17618 +
17619 +#endif /* __KERNEL__ */
17620 +#endif /* _VX_SIGNAL_CMD_H */
17621 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/switch.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/switch.h
17622 --- linux-2.6.16.20/include/linux/vserver/switch.h      1970-01-01 01:00:00 +0100
17623 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/switch.h 2006-04-26 19:07:00 +0200
17624 @@ -0,0 +1,99 @@
17625 +#ifndef _VX_SWITCH_H
17626 +#define _VX_SWITCH_H
17627 +
17628 +#include <linux/types.h>
17629 +
17630 +
17631 +#define VC_CATEGORY(c)         (((c) >> 24) & 0x3F)
17632 +#define VC_COMMAND(c)          (((c) >> 16) & 0xFF)
17633 +#define VC_VERSION(c)          ((c) & 0xFFF)
17634 +
17635 +#define VC_CMD(c,i,v)          ((((VC_CAT_ ## c) & 0x3F) << 24) \
17636 +                               | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
17637 +
17638 +/*
17639 +
17640 +  Syscall Matrix V2.8
17641 +
17642 +        |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
17643 +        |STATS  |DESTROY|ALTER  |CHANGE |LIMIT  |TEST   | |       |       |
17644 +        |INFO   |SETUP  |       |MOVE   |       |       | |       |       |
17645 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17646 +  SYSTEM |VERSION|VSETUP |VHOST  |       |       |       | |DEVICES|       |
17647 +  HOST   |     00|     01|     02|     03|     04|     05| |     06|     07|
17648 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17649 +  CPU    |       |VPROC  |PROCALT|PROCMIG|PROCTRL|       | |SCHED. |       |
17650 +  PROCESS|     08|     09|     10|     11|     12|     13| |     14|     15|
17651 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17652 +  MEMORY |       |       |       |       |       |       | |SWAP   |       |
17653 +        |     16|     17|     18|     19|     20|     21| |     22|     23|
17654 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17655 +  NETWORK|       |VNET   |NETALT |NETMIG |NETCTL |       | |SERIAL |       |
17656 +        |     24|     25|     26|     27|     28|     29| |     30|     31|
17657 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17658 +  DISK   |       |       |       |       |DLIMIT |       | |INODE  |       |
17659 +  VFS    |     32|     33|     34|     35|     36|     37| |     38|     39|
17660 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17661 +  OTHER  |       |       |       |       |       |       | |VINFO  |       |
17662 +        |     40|     41|     42|     43|     44|     45| |     46|     47|
17663 +  =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
17664 +  SPECIAL|EVENT  |       |       |       |FLAGS  |       | |       |       |
17665 +        |     48|     49|     50|     51|     52|     53| |     54|     55|
17666 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17667 +  SPECIAL|DEBUG  |       |       |       |RLIMIT |SYSCALL| |       |COMPAT |
17668 +        |     56|     57|     58|     59|     60|TEST 61| |     62|     63|
17669 +  -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
17670 +
17671 +*/
17672 +
17673 +#define VC_CAT_VERSION         0
17674 +
17675 +#define VC_CAT_VSETUP          1
17676 +#define VC_CAT_VHOST           2
17677 +
17678 +#define VC_CAT_VPROC           9
17679 +#define VC_CAT_PROCALT         10
17680 +#define VC_CAT_PROCMIG         11
17681 +#define VC_CAT_PROCTRL         12
17682 +
17683 +#define VC_CAT_SCHED           14
17684 +
17685 +#define VC_CAT_VNET            25
17686 +#define VC_CAT_NETALT          26
17687 +#define VC_CAT_NETMIG          27
17688 +#define VC_CAT_NETCTRL         28
17689 +
17690 +#define VC_CAT_DLIMIT          36
17691 +#define VC_CAT_INODE           38
17692 +
17693 +#define VC_CAT_VINFO           46
17694 +#define VC_CAT_EVENT           48
17695 +
17696 +#define VC_CAT_FLAGS           52
17697 +#define VC_CAT_DEBUG           56
17698 +#define VC_CAT_RLIMIT          60
17699 +
17700 +#define VC_CAT_SYSTEST         61
17701 +#define VC_CAT_COMPAT          63
17702 +
17703 +/*  interface version */
17704 +
17705 +#define VCI_VERSION            0x00020101
17706 +#define VCI_LEGACY_VERSION     0x000100FF
17707 +
17708 +/*  query version */
17709 +
17710 +#define VCMD_get_version       VC_CMD(VERSION, 0, 0)
17711 +#define VCMD_get_vci           VC_CMD(VERSION, 1, 0)
17712 +
17713 +
17714 +#ifdef __KERNEL__
17715 +
17716 +#include <linux/errno.h>
17717 +
17718 +
17719 +#else  /* __KERNEL__ */
17720 +#define __user
17721 +#endif /* __KERNEL__ */
17722 +
17723 +#endif /* _VX_SWITCH_H */
17724 diff -NurpP --minimal linux-2.6.16.20/include/linux/vserver/tag.h linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/tag.h
17725 --- linux-2.6.16.20/include/linux/vserver/tag.h 1970-01-01 01:00:00 +0100
17726 +++ linux-2.6.16.20-vs2.1.1-rc22/include/linux/vserver/tag.h    2006-04-26 19:07:00 +0200
17727 @@ -0,0 +1,153 @@
17728 +#ifndef _DX_TAG_H
17729 +#define _DX_TAG_H
17730 +
17731 +
17732 +#define DX_TAG(in)     (IS_TAGGED(in))
17733 +
17734 +
17735 +#ifdef CONFIG_DX_TAG_NFSD
17736 +#define DX_TAG_NFSD    1
17737 +#else
17738 +#define DX_TAG_NFSD    0
17739 +#endif
17740 +
17741 +
17742 +#ifdef CONFIG_TAGGING_NONE
17743 +
17744 +#define MAX_UID                0xFFFFFFFF
17745 +#define MAX_GID                0xFFFFFFFF
17746 +
17747 +#define INOTAG_TAG(cond, uid, gid, tag)        (0)
17748 +
17749 +#define TAGINO_UID(cond, uid, tag)     (uid)
17750 +#define TAGINO_GID(cond, gid, tag)     (gid)
17751 +
17752 +#endif
17753 +
17754 +
17755 +#ifdef CONFIG_TAGGING_GID16
17756 +
17757 +#define MAX_UID                0xFFFFFFFF
17758 +#define MAX_GID                0x0000FFFF
17759 +
17760 +#define INOTAG_TAG(cond, uid, gid, tag)        \
17761 +       ((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
17762 +
17763 +#define TAGINO_UID(cond, uid, tag)     (uid)
17764 +#define TAGINO_GID(cond, gid, tag)     \
17765 +       ((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
17766 +
17767 +#endif
17768 +
17769 +
17770 +#ifdef CONFIG_TAGGING_ID24
17771 +
17772 +#define MAX_UID                0x00FFFFFF
17773 +#define MAX_GID                0x00FFFFFF
17774 +
17775 +#define INOTAG_TAG(cond, uid, gid, tag)        \
17776 +       ((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
17777 +
17778 +#define TAGINO_UID(cond, uid, tag)     \
17779 +       ((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
17780 +#define TAGINO_GID(cond, gid, tag)     \
17781 +       ((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
17782 +
17783 +#endif
17784 +
17785 +
17786 +#ifdef CONFIG_TAGGING_UID16
17787 +
17788 +#define MAX_UID                0x0000FFFF
17789 +#define MAX_GID                0xFFFFFFFF
17790 +
17791 +#define INOTAG_TAG(cond, uid, gid, tag)        \
17792 +       ((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
17793 +
17794 +#define TAGINO_UID(cond, uid, tag)     \
17795 +       ((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
17796 +#define TAGINO_GID(cond, gid, tag)     (gid)
17797 +
17798 +#endif
17799 +
17800 +
17801 +#ifdef CONFIG_TAGGING_INTERN
17802 +
17803 +#define MAX_UID                0xFFFFFFFF
17804 +#define MAX_GID                0xFFFFFFFF
17805 +
17806 +#define INOTAG_TAG(cond, uid, gid, tag)        \
17807 +       ((cond) ? (tag) : 0)
17808 +
17809 +#define TAGINO_UID(cond, uid, tag)     (uid)
17810 +#define TAGINO_GID(cond, gid, tag)     (gid)
17811 +
17812 +#endif
17813 +
17814 +
17815 +#ifdef CONFIG_TAGGING_RUNTIME
17816 +
17817 +#define MAX_UID                0xFFFFFFFF
17818 +#define MAX_GID                0xFFFFFFFF
17819 +
17820 +#define INOTAG_TAG(cond, uid, gid, tag)        (0)
17821 +
17822 +#define TAGINO_UID(cond, uid, tag)     (uid)
17823 +#define TAGINO_GID(cond, gid, tag)     (gid)
17824 +
17825 +#endif
17826 +
17827 +
17828 +#ifndef CONFIG_TAGGING_NONE
17829 +#define dx_current_fstag(sb)   \
17830 +       ((sb)->s_flags & MS_TAGGED ? dx_current_tag(): 0)
17831 +#else
17832 +#define dx_current_fstag(sb)   (0)
17833 +#endif
17834 +
17835 +#ifndef CONFIG_TAGGING_INTERN
17836 +#define TAGINO_TAG(cond, tag)  (0)
17837 +#else
17838 +#define TAGINO_TAG(cond, tag)  ((cond) ? (tag) : 0)
17839 +#endif
17840 +
17841 +#define INOTAG_UID(cond, uid, gid)     \
17842 +       ((cond) ? ((uid) & MAX_UID) : (uid))
17843 +#define INOTAG_GID(cond, uid, gid)     \
17844 +       ((cond) ? ((gid) & MAX_GID) : (gid))
17845 +
17846 +
17847 +static inline uid_t dx_map_uid(uid_t uid)
17848 +{
17849 +       if ((uid > MAX_UID) && (uid != -1))
17850 +               uid = -2;
17851 +       return (uid & MAX_UID);
17852 +}
17853 +
17854 +static inline gid_t dx_map_gid(gid_t gid)
17855 +{
17856 +       if ((gid > MAX_GID) && (gid != -1))
17857 +               gid = -2;
17858 +       return (gid & MAX_GID);
17859 +}
17860 +
17861 +
17862 +#ifdef CONFIG_VSERVER_LEGACY
17863 +#define FIOC_GETTAG    _IOR('x', 1, long)
17864 +#define FIOC_SETTAG    _IOW('x', 2, long)
17865 +#define FIOC_SETTAGJ   _IOW('x', 3, long)
17866 +#endif
17867 +
17868 +#ifdef CONFIG_PROPAGATE
17869 +
17870 +int dx_parse_tag(char *string, tag_t *tag, int remove);
17871 +
17872 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
17873 +
17874 +#define dx_propagate_tag(n,i)  __dx_propagate_tag(n,i)
17875 +
17876 +#else
17877 +#define dx_propagate_tag(n,i)  do { } while (0)
17878 +#endif
17879 +
17880 +#endif /* _DX_TAG_H */
17881 diff -NurpP --minimal linux-2.6.16.20/include/net/af_unix.h linux-2.6.16.20-vs2.1.1-rc22/include/net/af_unix.h
17882 --- linux-2.6.16.20/include/net/af_unix.h       2006-02-18 14:40:36 +0100
17883 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/af_unix.h  2006-04-26 19:07:00 +0200
17884 @@ -17,9 +17,9 @@ extern spinlock_t unix_table_lock;
17885  
17886  extern atomic_t unix_tot_inflight;
17887  
17888 -static inline struct sock *first_unix_socket(int *i)
17889 +static inline struct sock *next_unix_socket_table(int *i)
17890  {
17891 -       for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
17892 +       for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
17893                 if (!hlist_empty(&unix_socket_table[*i]))
17894                         return __sk_head(&unix_socket_table[*i]);
17895         }
17896 @@ -28,16 +28,19 @@ static inline struct sock *first_unix_so
17897  
17898  static inline struct sock *next_unix_socket(int *i, struct sock *s)
17899  {
17900 -       struct sock *next = sk_next(s);
17901 -       /* More in this chain? */
17902 -       if (next)
17903 -               return next;
17904 -       /* Look for next non-empty chain. */
17905 -       for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
17906 -               if (!hlist_empty(&unix_socket_table[*i]))
17907 -                       return __sk_head(&unix_socket_table[*i]);
17908 -       }
17909 -       return NULL;
17910 +       do {
17911 +               if (s)
17912 +                       s = sk_next(s);
17913 +               if (!s)
17914 +                       s = next_unix_socket_table(i);
17915 +       } while (s && !vx_check(s->sk_xid, VX_IDENT|VX_WATCH));
17916 +       return s;
17917 +}
17918 +
17919 +static inline struct sock *first_unix_socket(int *i)
17920 +{
17921 +       *i = 0;
17922 +       return next_unix_socket(i, NULL);
17923  }
17924  
17925  #define forall_unix_sockets(i, s) \
17926 diff -NurpP --minimal linux-2.6.16.20/include/net/inet_hashtables.h linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_hashtables.h
17927 --- linux-2.6.16.20/include/net/inet_hashtables.h       2006-04-09 13:49:58 +0200
17928 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_hashtables.h  2006-04-26 19:07:00 +0200
17929 @@ -272,6 +272,25 @@ static inline int inet_iif(const struct 
17930         return ((struct rtable *)skb->dst)->rt_iif;
17931  }
17932  
17933 +/*
17934 + *      Check if a given address matches for an inet socket
17935 + *
17936 + *      nxi:   the socket's nx_info if any
17937 + *      addr:  to be verified address
17938 + *      saddr: socket addresses
17939 + */
17940 +static inline int inet_addr_match (
17941 +       struct nx_info *nxi,
17942 +       uint32_t addr,
17943 +       uint32_t saddr)
17944 +{
17945 +       if (addr && (saddr == addr))
17946 +               return 1;
17947 +       if (!saddr)
17948 +               return addr_in_nx_info(nxi, addr);
17949 +       return 0;
17950 +}
17951 +
17952  extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
17953                                            const u32 daddr,
17954                                            const unsigned short hnum,
17955 @@ -292,7 +311,7 @@ static inline struct sock *
17956                 const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
17957  
17958                 if (inet->num == hnum && !sk->sk_node.next &&
17959 -                   (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
17960 +                   inet_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) &&
17961                     (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
17962                     !sk->sk_bound_dev_if)
17963                         goto sherry_cache;
17964 diff -NurpP --minimal linux-2.6.16.20/include/net/inet_sock.h linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_sock.h
17965 --- linux-2.6.16.20/include/net/inet_sock.h     2006-04-09 13:49:58 +0200
17966 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_sock.h        2006-04-26 19:07:00 +0200
17967 @@ -115,6 +115,7 @@ struct inet_sock {
17968         /* Socket demultiplex comparisons on incoming packets. */
17969         __u32                   daddr;
17970         __u32                   rcv_saddr;
17971 +       __u32                   rcv_saddr2;     /* Second bound ipv4 addr, for ipv4root */
17972         __u16                   dport;
17973         __u16                   num;
17974         __u32                   saddr;
17975 diff -NurpP --minimal linux-2.6.16.20/include/net/inet_timewait_sock.h linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_timewait_sock.h
17976 --- linux-2.6.16.20/include/net/inet_timewait_sock.h    2006-04-09 13:49:58 +0200
17977 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/inet_timewait_sock.h       2006-04-26 19:07:00 +0200
17978 @@ -116,6 +116,10 @@ struct inet_timewait_sock {
17979  #define tw_refcnt              __tw_common.skc_refcnt
17980  #define tw_hash                        __tw_common.skc_hash
17981  #define tw_prot                        __tw_common.skc_prot
17982 +#define tw_xid         __tw_common.skc_xid
17983 +#define tw_vx_info             __tw_common.skc_vx_info
17984 +#define tw_nid         __tw_common.skc_nid
17985 +#define tw_nx_info             __tw_common.skc_nx_info
17986         volatile unsigned char  tw_substate;
17987         /* 3 bits hole, try to pack */
17988         unsigned char           tw_rcv_wscale;
17989 diff -NurpP --minimal linux-2.6.16.20/include/net/route.h linux-2.6.16.20-vs2.1.1-rc22/include/net/route.h
17990 --- linux-2.6.16.20/include/net/route.h 2006-02-18 14:40:36 +0100
17991 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/route.h    2006-05-21 23:37:08 +0200
17992 @@ -28,11 +28,14 @@
17993  #include <net/dst.h>
17994  #include <net/inetpeer.h>
17995  #include <net/flow.h>
17996 +#include <net/inet_sock.h>
17997  #include <linux/in_route.h>
17998  #include <linux/rtnetlink.h>
17999  #include <linux/route.h>
18000  #include <linux/ip.h>
18001  #include <linux/cache.h>
18002 +#include <linux/vs_network.h>
18003 +#include <linux/in.h>
18004  
18005  #ifndef __KERNEL__
18006  #warning This file is not supposed to be used outside of kernel.
18007 @@ -144,6 +147,59 @@ static inline char rt_tos2priority(u8 to
18008         return ip_tos2prio[IPTOS_TOS(tos)>>1];
18009  }
18010  
18011 +#define IPI_LOOPBACK   htonl(INADDR_LOOPBACK)
18012 +
18013 +static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl)
18014 +{
18015 +       int err;
18016 +       int i, n = nxi->nbipv4;
18017 +       u32 ipv4root = nxi->ipv4[0];
18018 +
18019 +       if (ipv4root == 0)
18020 +               return 0;
18021 +
18022 +       if (fl->fl4_src == 0) {
18023 +               if (n > 1) {
18024 +                       u32 foundsrc;
18025 +
18026 +                       err = __ip_route_output_key(rp, fl);
18027 +                       if (err) {
18028 +                               fl->fl4_src = ipv4root;
18029 +                               err = __ip_route_output_key(rp, fl);
18030 +                       }
18031 +                       if (err)
18032 +                               return err;
18033 +
18034 +                       foundsrc = (*rp)->rt_src;
18035 +                       ip_rt_put(*rp);
18036 +
18037 +                       for (i=0; i<n; i++){
18038 +                               u32 mask = nxi->mask[i];
18039 +                               u32 ipv4 = nxi->ipv4[i];
18040 +                               u32 net4 = ipv4 & mask;
18041 +
18042 +                               if (foundsrc == ipv4) {
18043 +                                       fl->fl4_src = ipv4;
18044 +                                       break;
18045 +                               }
18046 +                               if (!fl->fl4_src && (foundsrc & mask) == net4)
18047 +                                       fl->fl4_src = ipv4;
18048 +                       }
18049 +               }
18050 +               if (fl->fl4_src == 0)
18051 +                       fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK)
18052 +                               ? IPI_LOOPBACK : ipv4root;
18053 +       } else {
18054 +               for (i=0; i<n; i++) {
18055 +                       if (nxi->ipv4[i] == fl->fl4_src)
18056 +                               break;
18057 +               }
18058 +               if (i == n)
18059 +                       return -EPERM;
18060 +       }
18061 +       return 0;
18062 +}
18063 +
18064  static inline int ip_route_connect(struct rtable **rp, u32 dst,
18065                                    u32 src, u32 tos, int oif, u8 protocol,
18066                                    u16 sport, u16 dport, struct sock *sk)
18067 @@ -158,7 +214,27 @@ static inline int ip_route_connect(struc
18068                                          .dport = dport } } };
18069  
18070         int err;
18071 -       if (!dst || !src) {
18072 +       struct nx_info *nx_info = current->nx_info;
18073 +
18074 +       if (sk)
18075 +               nx_info = sk->sk_nx_info;
18076 +       vxdprintk(VXD_CBIT(net, 4),
18077 +               "ip_route_connect(%p) %p,%p;%lx",
18078 +               sk, nx_info, sk->sk_socket,
18079 +               (sk->sk_socket?sk->sk_socket->flags:0));
18080 +
18081 +       if (nx_info) {
18082 +               err = ip_find_src(nx_info, rp, &fl);
18083 +               if (err)
18084 +                       return err;
18085 +               if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
18086 +                       fl.fl4_dst = nx_info->ipv4[0];
18087 +#ifdef CONFIG_VSERVER_REMAP_SADDR
18088 +               if (fl.fl4_src == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
18089 +                       fl.fl4_src = nx_info->ipv4[0];
18090 +#endif
18091 +       }
18092 +       if (!fl.fl4_dst || !fl.fl4_src) {
18093                 err = __ip_route_output_key(rp, &fl);
18094                 if (err)
18095                         return err;
18096 diff -NurpP --minimal linux-2.6.16.20/include/net/sock.h linux-2.6.16.20-vs2.1.1-rc22/include/net/sock.h
18097 --- linux-2.6.16.20/include/net/sock.h  2006-04-09 13:49:58 +0200
18098 +++ linux-2.6.16.20-vs2.1.1-rc22/include/net/sock.h     2006-04-26 19:07:00 +0200
18099 @@ -115,6 +115,10 @@ struct sock_common {
18100         atomic_t                skc_refcnt;
18101         unsigned int            skc_hash;
18102         struct proto            *skc_prot;
18103 +       xid_t                   skc_xid;
18104 +       struct vx_info  *skc_vx_info;
18105 +       nid_t                   skc_nid;
18106 +       struct nx_info  *skc_nx_info;
18107  };
18108  
18109  /**
18110 @@ -189,6 +193,10 @@ struct sock {
18111  #define sk_refcnt              __sk_common.skc_refcnt
18112  #define sk_hash                        __sk_common.skc_hash
18113  #define sk_prot                        __sk_common.skc_prot
18114 +#define sk_xid                 __sk_common.skc_xid
18115 +#define sk_vx_info             __sk_common.skc_vx_info
18116 +#define sk_nid                 __sk_common.skc_nid
18117 +#define sk_nx_info             __sk_common.skc_nx_info
18118         unsigned char           sk_shutdown : 2,
18119                                 sk_no_check : 2,
18120                                 sk_userlocks : 4;
18121 diff -NurpP --minimal linux-2.6.16.20/init/version.c linux-2.6.16.20-vs2.1.1-rc22/init/version.c
18122 --- linux-2.6.16.20/init/version.c      2005-03-02 12:39:08 +0100
18123 +++ linux-2.6.16.20-vs2.1.1-rc22/init/version.c 2006-04-26 19:07:00 +0200
18124 @@ -31,3 +31,8 @@ EXPORT_SYMBOL(system_utsname);
18125  const char linux_banner[] =
18126         "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
18127         LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
18128 +
18129 +const char vx_linux_banner[] =
18130 +       "Linux version %s (" LINUX_COMPILE_BY "@"
18131 +       LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") %s\n";
18132 +
18133 diff -NurpP --minimal linux-2.6.16.20/ipc/mqueue.c linux-2.6.16.20-vs2.1.1-rc22/ipc/mqueue.c
18134 --- linux-2.6.16.20/ipc/mqueue.c        2006-02-18 14:40:37 +0100
18135 +++ linux-2.6.16.20-vs2.1.1-rc22/ipc/mqueue.c   2006-04-26 19:07:00 +0200
18136 @@ -25,6 +25,8 @@
18137  #include <linux/netlink.h>
18138  #include <linux/syscalls.h>
18139  #include <linux/signal.h>
18140 +#include <linux/vs_context.h>
18141 +#include <linux/vs_limit.h>
18142  #include <net/sock.h>
18143  #include "util.h"
18144  
18145 @@ -148,17 +150,20 @@ static struct inode *mqueue_get_inode(st
18146                         spin_lock(&mq_lock);
18147                         if (u->mq_bytes + mq_bytes < u->mq_bytes ||
18148                             u->mq_bytes + mq_bytes >
18149 -                           p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
18150 +                           p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur ||
18151 +                           !vx_ipcmsg_avail(p->vx_info, mq_bytes)) {
18152                                 spin_unlock(&mq_lock);
18153                                 goto out_inode;
18154                         }
18155                         u->mq_bytes += mq_bytes;
18156 +                       vx_ipcmsg_add(p->vx_info, u, mq_bytes);
18157                         spin_unlock(&mq_lock);
18158  
18159                         info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
18160                         if (!info->messages) {
18161                                 spin_lock(&mq_lock);
18162                                 u->mq_bytes -= mq_bytes;
18163 +                               vx_ipcmsg_sub(p->vx_info, u, mq_bytes);
18164                                 spin_unlock(&mq_lock);
18165                                 goto out_inode;
18166                         }
18167 @@ -256,10 +261,14 @@ static void mqueue_delete_inode(struct i
18168                    (info->attr.mq_maxmsg * info->attr.mq_msgsize));
18169         user = info->user;
18170         if (user) {
18171 +               struct vx_info *vxi = lookup_vx_info(user->xid);
18172 +
18173                 spin_lock(&mq_lock);
18174                 user->mq_bytes -= mq_bytes;
18175 +               vx_ipcmsg_sub(vxi, user, mq_bytes);
18176                 queues_count--;
18177                 spin_unlock(&mq_lock);
18178 +               put_vx_info(vxi);
18179                 free_uid(user);
18180         }
18181  }
18182 @@ -738,7 +747,7 @@ asmlinkage long sys_mq_unlink(const char
18183         if (inode)
18184                 atomic_inc(&inode->i_count);
18185  
18186 -       err = vfs_unlink(dentry->d_parent->d_inode, dentry);
18187 +       err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
18188  out_err:
18189         dput(dentry);
18190  
18191 diff -NurpP --minimal linux-2.6.16.20/ipc/msg.c linux-2.6.16.20-vs2.1.1-rc22/ipc/msg.c
18192 --- linux-2.6.16.20/ipc/msg.c   2006-02-18 14:40:37 +0100
18193 +++ linux-2.6.16.20-vs2.1.1-rc22/ipc/msg.c      2006-04-26 19:07:00 +0200
18194 @@ -100,6 +100,7 @@ static int newque (key_t key, int msgflg
18195  
18196         msq->q_perm.mode = (msgflg & S_IRWXUGO);
18197         msq->q_perm.key = key;
18198 +       msq->q_perm.xid = vx_current_xid();
18199  
18200         msq->q_perm.security = NULL;
18201         retval = security_msg_queue_alloc(msq);
18202 @@ -815,6 +816,9 @@ static int sysvipc_msg_proc_show(struct 
18203  {
18204         struct msg_queue *msq = it;
18205  
18206 +       if (!vx_check(msq->q_perm.xid, VX_IDENT))
18207 +               return 0;
18208 +
18209         return seq_printf(s,
18210                           "%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
18211                           msq->q_perm.key,
18212 diff -NurpP --minimal linux-2.6.16.20/ipc/sem.c linux-2.6.16.20-vs2.1.1-rc22/ipc/sem.c
18213 --- linux-2.6.16.20/ipc/sem.c   2006-04-09 13:49:58 +0200
18214 +++ linux-2.6.16.20-vs2.1.1-rc22/ipc/sem.c      2006-04-26 19:07:00 +0200
18215 @@ -75,6 +75,7 @@
18216  #include <linux/audit.h>
18217  #include <linux/capability.h>
18218  #include <linux/seq_file.h>
18219 +#include <linux/vs_limit.h>
18220  #include <asm/uaccess.h>
18221  #include "util.h"
18222  
18223 @@ -179,6 +180,7 @@ static int newary (key_t key, int nsems,
18224  
18225         sma->sem_perm.mode = (semflg & S_IRWXUGO);
18226         sma->sem_perm.key = key;
18227 +       sma->sem_perm.xid = vx_current_xid();
18228  
18229         sma->sem_perm.security = NULL;
18230         retval = security_sem_alloc(sma);
18231 @@ -194,6 +196,8 @@ static int newary (key_t key, int nsems,
18232                 return -ENOSPC;
18233         }
18234         used_sems += nsems;
18235 +       vx_semary_inc(sma);
18236 +       vx_nsems_add(sma, nsems);
18237  
18238         sma->sem_id = sem_buildid(id, sma->sem_perm.seq);
18239         sma->sem_base = (struct sem *) &sma[1];
18240 @@ -473,6 +477,8 @@ static void freeary (struct sem_array *s
18241         sem_unlock(sma);
18242  
18243         used_sems -= sma->sem_nsems;
18244 +       vx_nsems_sub(sma, sma->sem_nsems);
18245 +       vx_semary_dec(sma);
18246         size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
18247         security_sem_free(sma);
18248         ipc_rcu_putref(sma);
18249 @@ -1337,6 +1343,9 @@ static int sysvipc_sem_proc_show(struct 
18250  {
18251         struct sem_array *sma = it;
18252  
18253 +       if (!vx_check(sma->sem_perm.xid, VX_IDENT))
18254 +               return 0;
18255 +
18256         return seq_printf(s,
18257                           "%10d %10d  %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
18258                           sma->sem_perm.key,
18259 diff -NurpP --minimal linux-2.6.16.20/ipc/shm.c linux-2.6.16.20-vs2.1.1-rc22/ipc/shm.c
18260 --- linux-2.6.16.20/ipc/shm.c   2006-05-11 21:25:36 +0200
18261 +++ linux-2.6.16.20-vs2.1.1-rc22/ipc/shm.c      2006-04-26 19:07:00 +0200
18262 @@ -30,6 +30,8 @@
18263  #include <linux/capability.h>
18264  #include <linux/ptrace.h>
18265  #include <linux/seq_file.h>
18266 +#include <linux/vs_context.h>
18267 +#include <linux/vs_limit.h>
18268  
18269  #include <asm/uaccess.h>
18270  
18271 @@ -114,7 +116,12 @@ static void shm_open (struct vm_area_str
18272   */
18273  static void shm_destroy (struct shmid_kernel *shp)
18274  {
18275 -       shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
18276 +       struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
18277 +       int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
18278 +
18279 +       vx_ipcshm_sub(vxi, shp, numpages);
18280 +       shm_tot -= numpages;
18281 +
18282         shm_rmid (shp->id);
18283         shm_unlock(shp);
18284         if (!is_file_hugepages(shp->shm_file))
18285 @@ -124,6 +131,7 @@ static void shm_destroy (struct shmid_ke
18286                                                 shp->mlock_user);
18287         fput (shp->shm_file);
18288         security_shm_free(shp);
18289 +       put_vx_info(vxi);
18290         ipc_rcu_putref(shp);
18291  }
18292  
18293 @@ -200,12 +208,15 @@ static int newseg (key_t key, int shmflg
18294  
18295         if (shm_tot + numpages >= shm_ctlall)
18296                 return -ENOSPC;
18297 +       if (!vx_ipcshm_avail(current->vx_info, numpages))
18298 +               return -ENOSPC;
18299  
18300         shp = ipc_rcu_alloc(sizeof(*shp));
18301         if (!shp)
18302                 return -ENOMEM;
18303  
18304         shp->shm_perm.key = key;
18305 +       shp->shm_perm.xid = vx_current_xid();
18306         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
18307         shp->mlock_user = NULL;
18308  
18309 @@ -256,6 +267,7 @@ static int newseg (key_t key, int shmflg
18310                 file->f_op = &shm_file_operations;
18311  
18312         shm_tot += numpages;
18313 +       vx_ipcshm_add(current->vx_info, key, numpages);
18314         shm_unlock(shp);
18315         return shp->id;
18316  
18317 @@ -897,6 +909,9 @@ static int sysvipc_shm_proc_show(struct 
18318  #define SMALL_STRING "%10d %10d  %4o %10u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
18319  #define BIG_STRING   "%10d %10d  %4o %21u %5u %5u  %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
18320  
18321 +       if (!vx_check(shp->shm_perm.xid, VX_IDENT))
18322 +               return 0;
18323 +
18324         if (sizeof(size_t) <= sizeof(int))
18325                 format = SMALL_STRING;
18326         else
18327 diff -NurpP --minimal linux-2.6.16.20/ipc/util.c linux-2.6.16.20-vs2.1.1-rc22/ipc/util.c
18328 --- linux-2.6.16.20/ipc/util.c  2006-05-11 21:25:36 +0200
18329 +++ linux-2.6.16.20-vs2.1.1-rc22/ipc/util.c     2006-04-26 19:07:00 +0200
18330 @@ -154,7 +154,9 @@ int ipc_findkey(struct ipc_ids* ids, key
18331          */
18332         for (id = 0; id <= max_id; id++) {
18333                 p = ids->entries->p[id];
18334 -               if(p==NULL)
18335 +               if (p==NULL)
18336 +                       continue;
18337 +               if (!vx_check(p->xid, VX_IDENT))
18338                         continue;
18339                 if (key == p->key)
18340                         return id;
18341 @@ -467,6 +469,8 @@ int ipcperms (struct kern_ipc_perm *ipcp
18342  {      /* flag will most probably be 0 or S_...UGO from <linux/stat.h> */
18343         int requested_mode, granted_mode;
18344  
18345 +       if (!vx_check(ipcp->xid, VX_ADMIN|VX_IDENT)) /* maybe just VX_IDENT? */
18346 +               return -1;
18347         requested_mode = (flag >> 6) | (flag >> 3) | flag;
18348         granted_mode = ipcp->mode;
18349         if (current->euid == ipcp->cuid || current->euid == ipcp->uid)
18350 diff -NurpP --minimal linux-2.6.16.20/kernel/Makefile linux-2.6.16.20-vs2.1.1-rc22/kernel/Makefile
18351 --- linux-2.6.16.20/kernel/Makefile     2006-02-18 14:40:37 +0100
18352 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/Makefile        2006-04-26 19:07:00 +0200
18353 @@ -10,6 +10,8 @@ obj-y     = sched.o fork.o exec_domain.o
18354             kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
18355             hrtimer.o
18356  
18357 +obj-y    += vserver/
18358 +
18359  obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
18360  obj-$(CONFIG_FUTEX) += futex.o
18361  obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
18362 diff -NurpP --minimal linux-2.6.16.20/kernel/capability.c linux-2.6.16.20-vs2.1.1-rc22/kernel/capability.c
18363 --- linux-2.6.16.20/kernel/capability.c 2006-02-18 14:40:37 +0100
18364 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/capability.c    2006-04-26 19:07:00 +0200
18365 @@ -12,6 +12,7 @@
18366  #include <linux/module.h>
18367  #include <linux/security.h>
18368  #include <linux/syscalls.h>
18369 +#include <linux/vs_pid.h>
18370  #include <asm/uaccess.h>
18371  
18372  unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
18373 diff -NurpP --minimal linux-2.6.16.20/kernel/compat.c linux-2.6.16.20-vs2.1.1-rc22/kernel/compat.c
18374 --- linux-2.6.16.20/kernel/compat.c     2006-02-18 14:40:37 +0100
18375 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/compat.c        2006-05-29 16:58:56 +0200
18376 @@ -841,7 +841,7 @@ asmlinkage long compat_sys_time(compat_t
18377         compat_time_t i;
18378         struct timeval tv;
18379  
18380 -       do_gettimeofday(&tv);
18381 +       vx_gettimeofday(&tv);
18382         i = tv.tv_sec;
18383  
18384         if (tloc) {
18385 @@ -865,7 +865,7 @@ asmlinkage long compat_sys_stime(compat_
18386         if (err)
18387                 return err;
18388  
18389 -       do_settimeofday(&tv);
18390 +       vx_settimeofday(&tv);
18391         return 0;
18392  }
18393  
18394 diff -NurpP --minimal linux-2.6.16.20/kernel/cpuset.c linux-2.6.16.20-vs2.1.1-rc22/kernel/cpuset.c
18395 --- linux-2.6.16.20/kernel/cpuset.c     2006-02-18 14:40:37 +0100
18396 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/cpuset.c        2006-04-26 19:07:00 +0200
18397 @@ -50,6 +50,7 @@
18398  #include <linux/time.h>
18399  #include <linux/backing-dev.h>
18400  #include <linux/sort.h>
18401 +#include <linux/vs_pid.h>
18402  
18403  #include <asm/uaccess.h>
18404  #include <asm/atomic.h>
18405 diff -NurpP --minimal linux-2.6.16.20/kernel/exit.c linux-2.6.16.20-vs2.1.1-rc22/kernel/exit.c
18406 --- linux-2.6.16.20/kernel/exit.c       2006-04-09 13:49:58 +0200
18407 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/exit.c  2006-05-02 03:05:29 +0200
18408 @@ -31,6 +31,10 @@
18409  #include <linux/signal.h>
18410  #include <linux/cn_proc.h>
18411  #include <linux/mutex.h>
18412 +#include <linux/vs_limit.h>
18413 +#include <linux/vs_context.h>
18414 +#include <linux/vs_network.h>
18415 +#include <linux/vs_pid.h>
18416  
18417  #include <asm/uaccess.h>
18418  #include <asm/unistd.h>
18419 @@ -396,6 +400,7 @@ static void close_files(struct files_str
18420                                 struct file * file = xchg(&fdt->fd[i], NULL);
18421                                 if (file)
18422                                         filp_close(file, files);
18423 +                               vx_openfd_dec(i);
18424                         }
18425                         i++;
18426                         set >>= 1;
18427 @@ -536,8 +541,13 @@ static void exit_mm(struct task_struct *
18428         mmput(mm);
18429  }
18430  
18431 -static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
18432 +static inline void choose_new_parent(task_t *p, task_t *reaper)
18433  {
18434 +       /* check for reaper context */
18435 +       vxwprintk((p->xid != reaper->xid) && (reaper != child_reaper),
18436 +               "rogue reaper: %p[%d,#%u] <> %p[%d,#%u]",
18437 +               p, p->pid, p->xid, reaper, reaper->pid, reaper->xid);
18438 +
18439         /*
18440          * Make sure we're not reparenting to ourselves and that
18441          * the parent is not a zombie.
18442 @@ -619,7 +629,7 @@ static void forget_original_parent(struc
18443         do {
18444                 reaper = next_thread(reaper);
18445                 if (reaper == father) {
18446 -                       reaper = child_reaper;
18447 +                       reaper = vx_child_reaper(father);
18448                         break;
18449                 }
18450         } while (reaper->exit_state);
18451 @@ -643,7 +653,7 @@ static void forget_original_parent(struc
18452  
18453                 if (father == p->real_parent) {
18454                         /* reparent with a reaper, real father it's us */
18455 -                       choose_new_parent(p, reaper, child_reaper);
18456 +                       choose_new_parent(p, vx_child_reaper(p));
18457                         reparent_thread(p, father, 0);
18458                 } else {
18459                         /* reparent ptraced task to its real parent */
18460 @@ -664,7 +674,8 @@ static void forget_original_parent(struc
18461         }
18462         list_for_each_safe(_p, _n, &father->ptrace_children) {
18463                 p = list_entry(_p,struct task_struct,ptrace_list);
18464 -               choose_new_parent(p, reaper, child_reaper);
18465 +
18466 +               choose_new_parent(p, reaper);
18467                 reparent_thread(p, father, 1);
18468         }
18469  }
18470 @@ -858,6 +869,8 @@ fastcall NORET_TYPE void do_exit(long co
18471         __exit_files(tsk);
18472         __exit_fs(tsk);
18473         exit_namespace(tsk);
18474 +       exit_vx_info(tsk, code);
18475 +       exit_nx_info(tsk);
18476         exit_thread();
18477         cpuset_exit(tsk);
18478         exit_keys(tsk);
18479 diff -NurpP --minimal linux-2.6.16.20/kernel/fork.c linux-2.6.16.20-vs2.1.1-rc22/kernel/fork.c
18480 --- linux-2.6.16.20/kernel/fork.c       2006-05-11 21:25:36 +0200
18481 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/fork.c  2006-04-26 19:07:00 +0200
18482 @@ -44,6 +44,10 @@
18483  #include <linux/rmap.h>
18484  #include <linux/acct.h>
18485  #include <linux/cn_proc.h>
18486 +#include <linux/vs_context.h>
18487 +#include <linux/vs_network.h>
18488 +#include <linux/vs_limit.h>
18489 +#include <linux/vs_memory.h>
18490  
18491  #include <asm/pgtable.h>
18492  #include <asm/pgalloc.h>
18493 @@ -104,6 +108,8 @@ static kmem_cache_t *mm_cachep;
18494  void free_task(struct task_struct *tsk)
18495  {
18496         free_thread_info(tsk->thread_info);
18497 +       clr_vx_info(&tsk->vx_info);
18498 +       clr_nx_info(&tsk->nx_info);
18499         free_task_struct(tsk);
18500  }
18501  EXPORT_SYMBOL(free_task);
18502 @@ -203,6 +209,8 @@ static inline int dup_mmap(struct mm_str
18503         mm->free_area_cache = oldmm->mmap_base;
18504         mm->cached_hole_size = ~0UL;
18505         mm->map_count = 0;
18506 +       __set_mm_counter(mm, file_rss, 0);
18507 +       __set_mm_counter(mm, anon_rss, 0);
18508         cpus_clear(mm->cpu_vm_mask);
18509         mm->mm_rb = RB_ROOT;
18510         rb_link = &mm->mm_rb.rb_node;
18511 @@ -214,7 +222,7 @@ static inline int dup_mmap(struct mm_str
18512  
18513                 if (mpnt->vm_flags & VM_DONTCOPY) {
18514                         long pages = vma_pages(mpnt);
18515 -                       mm->total_vm -= pages;
18516 +                       vx_vmpages_sub(mm, pages);
18517                         vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
18518                                                                 -pages);
18519                         continue;
18520 @@ -321,8 +329,6 @@ static struct mm_struct * mm_init(struct
18521         INIT_LIST_HEAD(&mm->mmlist);
18522         mm->core_waiters = 0;
18523         mm->nr_ptes = 0;
18524 -       set_mm_counter(mm, file_rss, 0);
18525 -       set_mm_counter(mm, anon_rss, 0);
18526         spin_lock_init(&mm->page_table_lock);
18527         rwlock_init(&mm->ioctx_list_lock);
18528         mm->ioctx_list = NULL;
18529 @@ -331,6 +337,7 @@ static struct mm_struct * mm_init(struct
18530  
18531         if (likely(!mm_alloc_pgd(mm))) {
18532                 mm->def_flags = 0;
18533 +               set_vx_info(&mm->mm_vx_info, current->vx_info);
18534                 return mm;
18535         }
18536         free_mm(mm);
18537 @@ -362,6 +369,7 @@ void fastcall __mmdrop(struct mm_struct 
18538         BUG_ON(mm == &init_mm);
18539         mm_free_pgd(mm);
18540         destroy_context(mm);
18541 +       clr_vx_info(&mm->mm_vx_info);
18542         free_mm(mm);
18543  }
18544  
18545 @@ -465,6 +473,7 @@ static struct mm_struct *dup_mm(struct t
18546                 goto fail_nomem;
18547  
18548         memcpy(mm, oldmm, sizeof(*mm));
18549 +       mm->mm_vx_info = NULL;
18550  
18551         if (!mm_init(mm))
18552                 goto fail_nomem;
18553 @@ -492,6 +501,7 @@ fail_nocontext:
18554          * If init_new_context() failed, we cannot use mmput() to free the mm
18555          * because it calls destroy_context()
18556          */
18557 +       clr_vx_info(&mm->mm_vx_info);
18558         mm_free_pgd(mm);
18559         free_mm(mm);
18560         return NULL;
18561 @@ -685,6 +695,8 @@ static struct files_struct *dup_fd(struc
18562                 struct file *f = *old_fds++;
18563                 if (f) {
18564                         get_file(f);
18565 +                       /* FIXME: sum it first for check and performance */
18566 +                       vx_openfd_inc(open_files - i);
18567                 } else {
18568                         /*
18569                          * The fd may be claimed in the fd bitmap but not yet
18570 @@ -917,6 +929,8 @@ static task_t *copy_process(unsigned lon
18571  {
18572         int retval;
18573         struct task_struct *p = NULL;
18574 +       struct vx_info *vxi;
18575 +       struct nx_info *nxi;
18576  
18577         if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
18578                 return ERR_PTR(-EINVAL);
18579 @@ -945,12 +959,30 @@ static task_t *copy_process(unsigned lon
18580         if (!p)
18581                 goto fork_out;
18582  
18583 +       init_vx_info(&p->vx_info, current->vx_info);
18584 +       init_nx_info(&p->nx_info, current->nx_info);
18585 +
18586 +       /* check vserver memory */
18587 +       if (p->mm && !(clone_flags & CLONE_VM)) {
18588 +               if (vx_vmpages_avail(p->mm, p->mm->total_vm))
18589 +                       vx_pages_add(p->vx_info, RLIMIT_AS, p->mm->total_vm);
18590 +               else
18591 +                       goto bad_fork_free;
18592 +       }
18593 +       if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
18594 +               if (!vx_rsspages_avail(p->mm, get_mm_counter(p->mm, file_rss)))
18595 +                       goto bad_fork_cleanup_vm;
18596 +       }
18597 +
18598         retval = -EAGAIN;
18599 +       if (!vx_nproc_avail(1))
18600 +               goto bad_fork_cleanup_vm;
18601 +
18602         if (atomic_read(&p->user->processes) >=
18603                         p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
18604                 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
18605                                 p->user != &root_user)
18606 -                       goto bad_fork_free;
18607 +                       goto bad_fork_cleanup_vm;
18608         }
18609  
18610         atomic_inc(&p->user->__count);
18611 @@ -1196,6 +1228,18 @@ static task_t *copy_process(unsigned lon
18612         nr_threads++;
18613         total_forks++;
18614         spin_unlock(&current->sighand->siglock);
18615 +
18616 +       /* p is copy of current */
18617 +       vxi = p->vx_info;
18618 +       if (vxi) {
18619 +               claim_vx_info(vxi, p);
18620 +               atomic_inc(&vxi->cvirt.nr_threads);
18621 +               atomic_inc(&vxi->cvirt.total_forks);
18622 +               vx_nproc_inc(p);
18623 +       }
18624 +       nxi = p->nx_info;
18625 +       if (nxi)
18626 +               claim_nx_info(nxi, p);
18627         write_unlock_irq(&tasklist_lock);
18628         proc_fork_connector(p);
18629         return p;
18630 @@ -1236,6 +1280,9 @@ bad_fork_cleanup_count:
18631         put_group_info(p->group_info);
18632         atomic_dec(&p->user->processes);
18633         free_uid(p->user);
18634 +bad_fork_cleanup_vm:
18635 +       if (p->mm && !(clone_flags & CLONE_VM))
18636 +               vx_pages_sub(p->vx_info, RLIMIT_AS, p->mm->total_vm);
18637  bad_fork_free:
18638         free_task(p);
18639  fork_out:
18640 @@ -1296,6 +1343,15 @@ long do_fork(unsigned long clone_flags,
18641  
18642         if (pid < 0)
18643                 return -EAGAIN;
18644 +
18645 +       /* kernel threads are host only */
18646 +       if ((clone_flags & CLONE_KTHREAD) && !vx_check(0, VX_ADMIN)) {
18647 +               vxwprintk(1, "xid=%d tried to spawn a kernel thread.",
18648 +                       vx_current_xid());
18649 +               free_pidmap(pid);
18650 +               return -EPERM;
18651 +       }
18652 +
18653         if (unlikely(current->ptrace)) {
18654                 trace = fork_traceflag (clone_flags);
18655                 if (trace)
18656 diff -NurpP --minimal linux-2.6.16.20/kernel/kthread.c linux-2.6.16.20-vs2.1.1-rc22/kernel/kthread.c
18657 --- linux-2.6.16.20/kernel/kthread.c    2006-01-03 17:30:12 +0100
18658 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/kthread.c       2006-04-26 19:07:00 +0200
18659 @@ -114,7 +114,7 @@ static void keventd_create_kthread(void 
18660                 create->result = ERR_PTR(pid);
18661         } else {
18662                 wait_for_completion(&create->started);
18663 -               create->result = find_task_by_pid(pid);
18664 +               create->result = find_task_by_real_pid(pid);
18665         }
18666         complete(&create->done);
18667  }
18668 diff -NurpP --minimal linux-2.6.16.20/kernel/posix-cpu-timers.c linux-2.6.16.20-vs2.1.1-rc22/kernel/posix-cpu-timers.c
18669 --- linux-2.6.16.20/kernel/posix-cpu-timers.c   2006-02-18 14:40:37 +0100
18670 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/posix-cpu-timers.c      2006-04-26 19:07:00 +0200
18671 @@ -6,6 +6,7 @@
18672  #include <linux/posix-timers.h>
18673  #include <asm/uaccess.h>
18674  #include <linux/errno.h>
18675 +#include <linux/vs_pid.h>
18676  
18677  static int check_clock(const clockid_t which_clock)
18678  {
18679 diff -NurpP --minimal linux-2.6.16.20/kernel/posix-timers.c linux-2.6.16.20-vs2.1.1-rc22/kernel/posix-timers.c
18680 --- linux-2.6.16.20/kernel/posix-timers.c       2006-04-09 13:49:58 +0200
18681 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/posix-timers.c  2006-04-28 05:07:10 +0200
18682 @@ -47,6 +47,7 @@
18683  #include <linux/wait.h>
18684  #include <linux/workqueue.h>
18685  #include <linux/module.h>
18686 +#include <linux/vs_context.h>
18687  
18688  /*
18689   * Management arrays for POSIX timers.  Timers are kept in slab memory
18690 @@ -294,6 +295,10 @@ void do_schedule_next_timer(struct sigin
18691  
18692  int posix_timer_event(struct k_itimer *timr,int si_private)
18693  {
18694 +       struct vx_info_save vxis;
18695 +       int ret;
18696 +
18697 +       enter_vx_info(task_get_vx_info(timr->it_process), &vxis);
18698         memset(&timr->sigq->info, 0, sizeof(siginfo_t));
18699         timr->sigq->info.si_sys_private = si_private;
18700         /* Send signal to the process that owns this timer.*/
18701 @@ -306,11 +311,11 @@ int posix_timer_event(struct k_itimer *t
18702  
18703         if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
18704                 struct task_struct *leader;
18705 -               int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
18706 -                                       timr->it_process);
18707  
18708 +               ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
18709 +                                   timr->it_process);
18710                 if (likely(ret >= 0))
18711 -                       return ret;
18712 +                       goto out;
18713  
18714                 timr->it_sigev_notify = SIGEV_SIGNAL;
18715                 leader = timr->it_process->group_leader;
18716 @@ -318,8 +323,12 @@ int posix_timer_event(struct k_itimer *t
18717                 timr->it_process = leader;
18718         }
18719  
18720 -       return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
18721 -                                  timr->it_process);
18722 +       ret = send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
18723 +                                 timr->it_process);
18724 +out:
18725 +       leave_vx_info(&vxis);
18726 +       put_vx_info(vxis.vxi);
18727 +       return ret;
18728  }
18729  EXPORT_SYMBOL_GPL(posix_timer_event);
18730  
18731 @@ -366,7 +375,7 @@ static struct task_struct * good_sigeven
18732         struct task_struct *rtn = current->group_leader;
18733  
18734         if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
18735 -               (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
18736 +               (!(rtn = find_task_by_real_pid(event->sigev_notify_thread_id)) ||
18737                  rtn->tgid != current->tgid ||
18738                  (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
18739                 return NULL;
18740 diff -NurpP --minimal linux-2.6.16.20/kernel/printk.c linux-2.6.16.20-vs2.1.1-rc22/kernel/printk.c
18741 --- linux-2.6.16.20/kernel/printk.c     2006-02-18 14:40:37 +0100
18742 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/printk.c        2006-04-26 19:07:00 +0200
18743 @@ -31,6 +31,8 @@
18744  #include <linux/security.h>
18745  #include <linux/bootmem.h>
18746  #include <linux/syscalls.h>
18747 +#include <linux/vs_context.h>
18748 +#include <linux/vserver/cvirt.h>
18749  
18750  #include <asm/uaccess.h>
18751  
18752 @@ -221,18 +223,13 @@ int do_syslog(int type, char __user *buf
18753         unsigned long i, j, limit, count;
18754         int do_clear = 0;
18755         char c;
18756 -       int error = 0;
18757 +       int error;
18758  
18759         error = security_syslog(type);
18760         if (error)
18761                 return error;
18762  
18763 -       switch (type) {
18764 -       case 0:         /* Close log */
18765 -               break;
18766 -       case 1:         /* Open log */
18767 -               break;
18768 -       case 2:         /* Read from log */
18769 +       if ((type >= 2) && (type <= 4)) {
18770                 error = -EINVAL;
18771                 if (!buf || len < 0)
18772                         goto out;
18773 @@ -243,6 +240,16 @@ int do_syslog(int type, char __user *buf
18774                         error = -EFAULT;
18775                         goto out;
18776                 }
18777 +       }
18778 +       if (!vx_check(0, VX_ADMIN|VX_WATCH))
18779 +               return vx_do_syslog(type, buf, len);
18780 +
18781 +       switch (type) {
18782 +       case 0:         /* Close log */
18783 +               break;
18784 +       case 1:         /* Open log */
18785 +               break;
18786 +       case 2:         /* Read from log */
18787                 error = wait_event_interruptible(log_wait,
18788                                                         (log_start - log_end));
18789                 if (error)
18790 @@ -267,16 +274,6 @@ int do_syslog(int type, char __user *buf
18791                 do_clear = 1;
18792                 /* FALL THRU */
18793         case 3:         /* Read last kernel messages */
18794 -               error = -EINVAL;
18795 -               if (!buf || len < 0)
18796 -                       goto out;
18797 -               error = 0;
18798 -               if (!len)
18799 -                       goto out;
18800 -               if (!access_ok(VERIFY_WRITE, buf, len)) {
18801 -                       error = -EFAULT;
18802 -                       goto out;
18803 -               }
18804                 count = len;
18805                 if (count > log_buf_len)
18806                         count = log_buf_len;
18807 @@ -513,11 +510,14 @@ __attribute__((weak)) unsigned long long
18808  
18809  asmlinkage int printk(const char *fmt, ...)
18810  {
18811 +       struct vx_info_save vxis;
18812         va_list args;
18813         int r;
18814  
18815         va_start(args, fmt);
18816 +       __enter_vx_admin(&vxis);
18817         r = vprintk(fmt, args);
18818 +       __leave_vx_admin(&vxis);
18819         va_end(args);
18820  
18821         return r;
18822 diff -NurpP --minimal linux-2.6.16.20/kernel/ptrace.c linux-2.6.16.20-vs2.1.1-rc22/kernel/ptrace.c
18823 --- linux-2.6.16.20/kernel/ptrace.c     2006-05-22 16:25:40 +0200
18824 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/ptrace.c        2006-05-22 06:19:46 +0200
18825 @@ -18,6 +18,7 @@
18826  #include <linux/ptrace.h>
18827  #include <linux/security.h>
18828  #include <linux/signal.h>
18829 +#include <linux/vs_pid.h>
18830  
18831  #include <asm/pgtable.h>
18832  #include <asm/uaccess.h>
18833 @@ -507,6 +508,10 @@ asmlinkage long sys_ptrace(long request,
18834                 goto out;
18835         }
18836  
18837 +       ret = -EPERM;
18838 +       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
18839 +               goto out_put_task_struct;
18840 +
18841         if (request == PTRACE_ATTACH) {
18842                 ret = ptrace_attach(child);
18843                 goto out_put_task_struct;
18844 diff -NurpP --minimal linux-2.6.16.20/kernel/sched.c linux-2.6.16.20-vs2.1.1-rc22/kernel/sched.c
18845 --- linux-2.6.16.20/kernel/sched.c      2006-05-11 21:25:36 +0200
18846 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/sched.c 2006-04-26 19:07:00 +0200
18847 @@ -52,6 +52,10 @@
18848  #include <asm/tlb.h>
18849  
18850  #include <asm/unistd.h>
18851 +#include <linux/vs_context.h>
18852 +#include <linux/vs_cvirt.h>
18853 +#include <linux/vs_pid.h>
18854 +#include <linux/vs_sched.h>
18855  
18856  /*
18857   * Convert user-nice values [ -20 ... 0 ... 19 ]
18858 @@ -239,6 +243,16 @@ struct runqueue {
18859         struct list_head migration_queue;
18860         int cpu;
18861  #endif
18862 +       unsigned long norm_time;
18863 +       unsigned long idle_time;
18864 +#ifdef CONFIG_VSERVER_IDLETIME
18865 +       int idle_skip;
18866 +#endif
18867 +#ifdef CONFIG_VSERVER_HARDCPU
18868 +       struct list_head hold_queue;
18869 +       unsigned long nr_onhold;
18870 +       int idle_tokens;
18871 +#endif
18872  
18873  #ifdef CONFIG_SCHEDSTATS
18874         /* latency stats */
18875 @@ -599,6 +613,7 @@ static inline void sched_info_switch(tas
18876   */
18877  static void dequeue_task(struct task_struct *p, prio_array_t *array)
18878  {
18879 +       BUG_ON(p->state & TASK_ONHOLD);
18880         array->nr_active--;
18881         list_del(&p->run_list);
18882         if (list_empty(array->queue + p->prio))
18883 @@ -607,6 +622,7 @@ static void dequeue_task(struct task_str
18884  
18885  static void enqueue_task(struct task_struct *p, prio_array_t *array)
18886  {
18887 +       BUG_ON(p->state & TASK_ONHOLD);
18888         sched_info_queued(p);
18889         list_add_tail(&p->run_list, array->queue + p->prio);
18890         __set_bit(p->prio, array->bitmap);
18891 @@ -620,11 +636,13 @@ static void enqueue_task(struct task_str
18892   */
18893  static void requeue_task(struct task_struct *p, prio_array_t *array)
18894  {
18895 +       BUG_ON(p->state & TASK_ONHOLD);
18896         list_move_tail(&p->run_list, array->queue + p->prio);
18897  }
18898  
18899  static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
18900  {
18901 +       BUG_ON(p->state & TASK_ONHOLD);
18902         list_add(&p->run_list, array->queue + p->prio);
18903         __set_bit(p->prio, array->bitmap);
18904         array->nr_active++;
18905 @@ -655,6 +673,10 @@ static int effective_prio(task_t *p)
18906         bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
18907  
18908         prio = p->static_prio - bonus;
18909 +
18910 +       /* adjust effective priority */
18911 +       prio = vx_adjust_prio(p, prio, MAX_USER_PRIO);
18912 +
18913         if (prio < MAX_RT_PRIO)
18914                 prio = MAX_RT_PRIO;
18915         if (prio > MAX_PRIO-1)
18916 @@ -662,11 +684,15 @@ static int effective_prio(task_t *p)
18917         return prio;
18918  }
18919  
18920 +#include "sched_mon.h"
18921 +
18922 +
18923  /*
18924   * __activate_task - move a task to the runqueue.
18925   */
18926  static inline void __activate_task(task_t *p, runqueue_t *rq)
18927  {
18928 +       vxm_activate_task(p, rq);
18929         enqueue_task(p, rq->active);
18930         rq->nr_running++;
18931  }
18932 @@ -676,6 +702,7 @@ static inline void __activate_task(task_
18933   */
18934  static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
18935  {
18936 +       vxm_activate_idle(p, rq);
18937         enqueue_task_head(p, rq->active);
18938         rq->nr_running++;
18939  }
18940 @@ -793,19 +820,31 @@ static void activate_task(task_t *p, run
18941         }
18942         p->timestamp = now;
18943  
18944 +       vx_activate_task(p);
18945         __activate_task(p, rq);
18946  }
18947  
18948  /*
18949   * deactivate_task - remove a task from the runqueue.
18950   */
18951 -static void deactivate_task(struct task_struct *p, runqueue_t *rq)
18952 +static void __deactivate_task(struct task_struct *p, runqueue_t *rq)
18953  {
18954         rq->nr_running--;
18955         dequeue_task(p, p->array);
18956 +       vxm_deactivate_task(p, rq);
18957         p->array = NULL;
18958  }
18959  
18960 +static inline
18961 +void deactivate_task(struct task_struct *p, runqueue_t *rq)
18962 +{
18963 +       vx_deactivate_task(p);
18964 +       __deactivate_task(p, rq);
18965 +}
18966 +
18967 +
18968 +#include "sched_hard.h"
18969 +
18970  /*
18971   * resched_task - mark a task 'to be rescheduled now'.
18972   *
18973 @@ -869,6 +908,7 @@ static int migrate_task(task_t *p, int d
18974  {
18975         runqueue_t *rq = task_rq(p);
18976  
18977 +       vxm_migrate_task(p, rq, dest_cpu);
18978         /*
18979          * If the task is not on a runqueue (and not running), then
18980          * it is sufficient to simply update the task's cpu field.
18981 @@ -1169,6 +1209,12 @@ static int try_to_wake_up(task_t *p, uns
18982  
18983         rq = task_rq_lock(p, &flags);
18984         old_state = p->state;
18985 +
18986 +       /* we need to unhold suspended tasks */
18987 +       if (old_state & TASK_ONHOLD) {
18988 +               vx_unhold_task(p, rq);
18989 +               old_state = p->state;
18990 +       }
18991         if (!(old_state & state))
18992                 goto out;
18993  
18994 @@ -1285,10 +1331,16 @@ out_activate:
18995          * sleep is handled in a priority-neutral manner, no priority
18996          * boost and no penalty.)
18997          */
18998 -       if (old_state & TASK_NONINTERACTIVE)
18999 +       if (old_state & TASK_NONINTERACTIVE) {
19000 +               vx_activate_task(p);
19001                 __activate_task(p, rq);
19002 -       else
19003 +       } else
19004                 activate_task(p, rq, cpu == this_cpu);
19005 +
19006 +       /* this is to get the accounting behind the load update */
19007 +       if (old_state & TASK_UNINTERRUPTIBLE)
19008 +               vx_uninterruptible_dec(p);
19009 +
19010         /*
19011          * Sync wakeups (i.e. those types of wakeups where the waker
19012          * has indicated that it will leave the CPU in short order)
19013 @@ -1412,6 +1464,7 @@ void fastcall wake_up_new_task(task_t *p
19014  
19015         p->prio = effective_prio(p);
19016  
19017 +       vx_activate_task(p);
19018         if (likely(cpu == this_cpu)) {
19019                 if (!(clone_flags & CLONE_VM)) {
19020                         /*
19021 @@ -1423,6 +1476,7 @@ void fastcall wake_up_new_task(task_t *p
19022                                 __activate_task(p, rq);
19023                         else {
19024                                 p->prio = current->prio;
19025 +                               BUG_ON(p->state & TASK_ONHOLD);
19026                                 list_add_tail(&p->run_list, &current->run_list);
19027                                 p->array = current->array;
19028                                 p->array->nr_active++;
19029 @@ -2489,13 +2543,16 @@ unsigned long long current_sched_time(co
19030  void account_user_time(struct task_struct *p, cputime_t cputime)
19031  {
19032         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
19033 +       struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
19034         cputime64_t tmp;
19035 +       int nice = (TASK_NICE(p) > 0);
19036  
19037         p->utime = cputime_add(p->utime, cputime);
19038 +       vx_account_user(vxi, cputime, nice);
19039  
19040         /* Add user time to cpustat. */
19041         tmp = cputime_to_cputime64(cputime);
19042 -       if (TASK_NICE(p) > 0)
19043 +       if (nice)
19044                 cpustat->nice = cputime64_add(cpustat->nice, tmp);
19045         else
19046                 cpustat->user = cputime64_add(cpustat->user, tmp);
19047 @@ -2511,10 +2568,12 @@ void account_system_time(struct task_str
19048                          cputime_t cputime)
19049  {
19050         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
19051 +       struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
19052         runqueue_t *rq = this_rq();
19053         cputime64_t tmp;
19054  
19055         p->stime = cputime_add(p->stime, cputime);
19056 +       vx_account_system(vxi, cputime, (p == rq->idle));
19057  
19058         /* Add system time to cpustat. */
19059         tmp = cputime_to_cputime64(cputime);
19060 @@ -2568,12 +2627,14 @@ void scheduler_tick(void)
19061         unsigned long long now = sched_clock();
19062  
19063         update_cpu_clock(p, rq, now);
19064 +       vxm_sync(now, cpu);
19065  
19066         rq->timestamp_last_tick = now;
19067  
19068         if (p == rq->idle) {
19069                 if (wake_priority_sleeper(rq))
19070                         goto out;
19071 +               vx_idle_resched(rq);
19072                 rebalance_tick(cpu, rq, SCHED_IDLE);
19073                 return;
19074         }
19075 @@ -2606,7 +2667,7 @@ void scheduler_tick(void)
19076                 }
19077                 goto out_unlock;
19078         }
19079 -       if (!--p->time_slice) {
19080 +       if (vx_need_resched(p, --p->time_slice, cpu)) {
19081                 dequeue_task(p, rq->active);
19082                 set_tsk_need_resched(p);
19083                 p->prio = effective_prio(p);
19084 @@ -2930,15 +2991,26 @@ need_resched_nonpreemptible:
19085                                 unlikely(signal_pending(prev))))
19086                         prev->state = TASK_RUNNING;
19087                 else {
19088 -                       if (prev->state == TASK_UNINTERRUPTIBLE)
19089 +                       if (prev->state == TASK_UNINTERRUPTIBLE) {
19090                                 rq->nr_uninterruptible++;
19091 +                               vx_uninterruptible_inc(prev);
19092 +                       }
19093                         deactivate_task(prev, rq);
19094                 }
19095         }
19096  
19097         cpu = smp_processor_id();
19098 +       vx_set_rq_time(rq, jiffies);
19099 +try_unhold:
19100 +       vx_try_unhold(rq, cpu);
19101 +pick_next:
19102 +
19103         if (unlikely(!rq->nr_running)) {
19104  go_idle:
19105 +               /* can we skip idle time? */
19106 +               if (vx_try_skip(rq, cpu))
19107 +                       goto try_unhold;
19108 +
19109                 idle_balance(cpu, rq);
19110                 if (!rq->nr_running) {
19111                         next = rq->idle;
19112 @@ -2983,6 +3055,10 @@ go_idle:
19113         queue = array->queue + idx;
19114         next = list_entry(queue->next, task_t, run_list);
19115  
19116 +       /* check before we schedule this context */
19117 +       if (!vx_schedule(next, rq, cpu))
19118 +               goto pick_next;
19119 +
19120         if (!rt_task(next) && next->activated > 0) {
19121                 unsigned long long delta = now - next->timestamp;
19122                 if (unlikely((long long)(now - next->timestamp) < 0))
19123 @@ -3538,7 +3614,7 @@ asmlinkage long sys_nice(int increment)
19124                 nice = 19;
19125  
19126         if (increment < 0 && !can_nice(current, nice))
19127 -               return -EPERM;
19128 +               return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
19129  
19130         retval = security_task_setnice(current, nice);
19131         if (retval)
19132 @@ -3698,6 +3774,7 @@ recheck:
19133         oldprio = p->prio;
19134         __setscheduler(p, policy, param->sched_priority);
19135         if (array) {
19136 +               vx_activate_task(p);
19137                 __activate_task(p, rq);
19138                 /*
19139                  * Reschedule if we are currently running on this runqueue and
19140 @@ -6042,7 +6119,10 @@ void __init sched_init(void)
19141                 rq->cpu = i;
19142  #endif
19143                 atomic_set(&rq->nr_iowait, 0);
19144 -
19145 +#ifdef CONFIG_VSERVER_HARDCPU
19146 +               INIT_LIST_HEAD(&rq->hold_queue);
19147 +               rq->nr_onhold = 0;
19148 +#endif
19149                 for (j = 0; j < 2; j++) {
19150                         array = rq->arrays + j;
19151                         for (k = 0; k < MAX_PRIO; k++) {
19152 @@ -6111,6 +6191,7 @@ void normalize_rt_tasks(void)
19153                         deactivate_task(p, task_rq(p));
19154                 __setscheduler(p, SCHED_NORMAL, 0);
19155                 if (array) {
19156 +                       vx_activate_task(p);
19157                         __activate_task(p, task_rq(p));
19158                         resched_task(rq->curr);
19159                 }
19160 diff -NurpP --minimal linux-2.6.16.20/kernel/sched_hard.h linux-2.6.16.20-vs2.1.1-rc22/kernel/sched_hard.h
19161 --- linux-2.6.16.20/kernel/sched_hard.h 1970-01-01 01:00:00 +0100
19162 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/sched_hard.h    2006-04-26 19:07:00 +0200
19163 @@ -0,0 +1,324 @@
19164 +
19165 +#ifdef CONFIG_VSERVER_IDLELIMIT
19166 +
19167 +/*
19168 + * vx_idle_resched - reschedule after maxidle
19169 + */
19170 +static inline
19171 +void vx_idle_resched(runqueue_t *rq)
19172 +{
19173 +       /* maybe have a better criterion for paused */
19174 +       if (!--rq->idle_tokens && !list_empty(&rq->hold_queue))
19175 +               set_need_resched();
19176 +}
19177 +
19178 +#else /* !CONFIG_VSERVER_IDLELIMIT */
19179 +
19180 +#define vx_idle_resched(rq)
19181 +
19182 +#endif /* CONFIG_VSERVER_IDLELIMIT */
19183 +
19184 +
19185 +
19186 +#ifdef CONFIG_VSERVER_IDLETIME
19187 +
19188 +#define vx_set_rq_min_skip(rq, min)            \
19189 +       (rq)->idle_skip = (min)
19190 +
19191 +#define vx_save_min_skip(ret, min, val)                \
19192 +       __vx_save_min_skip(ret, min, val)
19193 +
19194 +static inline
19195 +void __vx_save_min_skip(int ret, int *min, int val)
19196 +{
19197 +       if (ret > -2)
19198 +               return;
19199 +       if ((*min > val) || !*min)
19200 +               *min = val;
19201 +}
19202 +
19203 +static inline
19204 +int vx_try_skip(runqueue_t *rq, int cpu)
19205 +{
19206 +       /* artificially advance time */
19207 +       if (rq->idle_skip > 0) {
19208 +               vxdprintk(list_empty(&rq->hold_queue),
19209 +                       "hold queue empty on cpu %d", cpu);
19210 +               rq->idle_time += rq->idle_skip;
19211 +               vxm_idle_skip(rq, cpu);
19212 +               return 1;
19213 +       }
19214 +       return 0;
19215 +}
19216 +
19217 +#else /* !CONFIG_VSERVER_IDLETIME */
19218 +
19219 +#define vx_set_rq_min_skip(rq, min)            \
19220 +       ({ int dummy = (min); dummy; })
19221 +
19222 +#define vx_save_min_skip(ret, min, val)
19223 +
19224 +static inline
19225 +int vx_try_skip(runqueue_t *rq, int cpu)
19226 +{
19227 +       return 0;
19228 +}
19229 +
19230 +#endif /* CONFIG_VSERVER_IDLETIME */
19231 +
19232 +
19233 +
19234 +#ifdef CONFIG_VSERVER_HARDCPU
19235 +
19236 +#define vx_set_rq_max_idle(rq, max)            \
19237 +       (rq)->idle_tokens = (max)
19238 +
19239 +#define vx_save_max_idle(ret, min, val)                \
19240 +       __vx_save_max_idle(ret, min, val)
19241 +
19242 +static inline
19243 +void __vx_save_max_idle(int ret, int *min, int val)
19244 +{
19245 +       if (*min > val)
19246 +               *min = val;
19247 +}
19248 +
19249 +
19250 +/*
19251 + * vx_hold_task - put a task on the hold queue
19252 + */
19253 +static inline
19254 +void vx_hold_task(struct task_struct *p, runqueue_t *rq)
19255 +{
19256 +       __deactivate_task(p, rq);
19257 +       p->state |= TASK_ONHOLD;
19258 +       /* a new one on hold */
19259 +       rq->nr_onhold++;
19260 +       vxm_hold_task(p, rq);
19261 +       list_add_tail(&p->run_list, &rq->hold_queue);
19262 +}
19263 +
19264 +/*
19265 + * vx_unhold_task - put a task back to the runqueue
19266 + */
19267 +static inline
19268 +void vx_unhold_task(struct task_struct *p, runqueue_t *rq)
19269 +{
19270 +       list_del(&p->run_list);
19271 +       /* one less waiting */
19272 +       rq->nr_onhold--;
19273 +       p->state &= ~TASK_ONHOLD;
19274 +       enqueue_task(p, rq->expired);
19275 +       rq->nr_running++;
19276 +       vxm_unhold_task(p, rq);
19277 +
19278 +       if (p->static_prio < rq->best_expired_prio)
19279 +               rq->best_expired_prio = p->static_prio;
19280 +}
19281 +
19282 +unsigned long nr_onhold(void)
19283 +{
19284 +       unsigned long i, sum = 0;
19285 +
19286 +       for_each_online_cpu(i)
19287 +               sum += cpu_rq(i)->nr_onhold;
19288 +
19289 +       return sum;
19290 +}
19291 +
19292 +
19293 +
19294 +static inline
19295 +int __vx_tokens_avail(struct _vx_sched_pc *sched_pc)
19296 +{
19297 +       return sched_pc->tokens;
19298 +}
19299 +
19300 +static inline
19301 +void __vx_consume_token(struct _vx_sched_pc *sched_pc)
19302 +{
19303 +       sched_pc->tokens--;
19304 +}
19305 +
19306 +static inline
19307 +int vx_need_resched(struct task_struct *p, int slice, int cpu)
19308 +{
19309 +       struct vx_info *vxi = p->vx_info;
19310 +
19311 +       if (vx_info_flags(vxi, VXF_SCHED_HARD|VXF_SCHED_PRIO, 0)) {
19312 +               struct _vx_sched_pc *sched_pc =
19313 +                       &vx_per_cpu(vxi, sched_pc, cpu);
19314 +               int tokens;
19315 +
19316 +               /* maybe we can simplify that to decrement
19317 +                  the token counter unconditional? */
19318 +
19319 +               if ((tokens = __vx_tokens_avail(sched_pc)) > 0)
19320 +                       __vx_consume_token(sched_pc);
19321 +
19322 +               /* for tokens > 0, one token was consumed */
19323 +               if (tokens < 2)
19324 +                       slice = 0;
19325 +       }
19326 +       vxm_need_resched(p, slice, cpu);
19327 +       return (slice == 0);
19328 +}
19329 +
19330 +
19331 +#define vx_set_rq_time(rq, time) do {  \
19332 +       rq->norm_time = time;           \
19333 +} while (0)
19334 +
19335 +
19336 +static inline
19337 +void vx_try_unhold(runqueue_t *rq, int cpu)
19338 +{
19339 +       struct vx_info *vxi = NULL;
19340 +       struct list_head *l, *n;
19341 +       int maxidle = HZ;
19342 +       int minskip = 0;
19343 +
19344 +       /* nothing to do? what about pause? */
19345 +       if (list_empty(&rq->hold_queue))
19346 +               return;
19347 +
19348 +       list_for_each_safe(l, n, &rq->hold_queue) {
19349 +               int ret, delta_min[2];
19350 +               struct _vx_sched_pc *sched_pc;
19351 +               struct task_struct *p;
19352 +
19353 +               p = list_entry(l, task_t, run_list);
19354 +               /* don't bother with same context */
19355 +               if (vxi == p->vx_info)
19356 +                       continue;
19357 +
19358 +               vxi = p->vx_info;
19359 +               /* ignore paused contexts */
19360 +               if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
19361 +                       continue;
19362 +
19363 +               sched_pc = &vx_per_cpu(vxi, sched_pc, cpu);
19364 +
19365 +               /* recalc tokens */
19366 +               vxm_sched_info(sched_pc, vxi, cpu);
19367 +               ret = vx_tokens_recalc(sched_pc,
19368 +                       &rq->norm_time, &rq->idle_time, delta_min);
19369 +               vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
19370 +
19371 +               if (ret > 0) {
19372 +                       /* we found a runable context */
19373 +                       vx_unhold_task(p, rq);
19374 +                       break;
19375 +               }
19376 +               vx_save_max_idle(ret, &maxidle, delta_min[0]);
19377 +               vx_save_min_skip(ret, &minskip, delta_min[1]);
19378 +       }
19379 +       vx_set_rq_max_idle(rq, maxidle);
19380 +       vx_set_rq_min_skip(rq, minskip);
19381 +       vxm_rq_max_min(rq, cpu);
19382 +}
19383 +
19384 +
19385 +static inline
19386 +int vx_schedule(struct task_struct *next, runqueue_t *rq, int cpu)
19387 +{
19388 +       struct vx_info *vxi = next->vx_info;
19389 +       struct _vx_sched_pc *sched_pc;
19390 +       int delta_min[2];
19391 +       int flags, ret;
19392 +
19393 +       if (!vxi)
19394 +               return 1;
19395 +
19396 +       flags = vxi->vx_flags;
19397 +
19398 +       if (unlikely(vx_check_flags(flags , VXF_SCHED_PAUSE, 0)))
19399 +               goto put_on_hold;
19400 +       if (!vx_check_flags(flags , VXF_SCHED_HARD|VXF_SCHED_PRIO, 0))
19401 +               return 1;
19402 +
19403 +       sched_pc = &vx_per_cpu(vxi, sched_pc, cpu);
19404 +#ifdef CONFIG_SMP
19405 +       /* update scheduler params */
19406 +       if (cpu_isset(cpu, vxi->sched.update)) {
19407 +               vx_update_sched_param(&vxi->sched, sched_pc);
19408 +               vxm_update_sched(sched_pc, vxi, cpu);
19409 +               cpu_clear(cpu, vxi->sched.update);
19410 +       }
19411 +#endif
19412 +       vxm_sched_info(sched_pc, vxi, cpu);
19413 +       ret  = vx_tokens_recalc(sched_pc,
19414 +               &rq->norm_time, &rq->idle_time, delta_min);
19415 +       vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
19416 +
19417 +       if (!vx_check_flags(flags , VXF_SCHED_HARD, 0))
19418 +               return 1;
19419 +
19420 +       if (unlikely(ret < 0)) {
19421 +               vx_save_max_idle(ret, &rq->idle_tokens, delta_min[0]);
19422 +               vx_save_min_skip(ret, &rq->idle_skip, delta_min[1]);
19423 +               vxm_rq_max_min(rq, cpu);
19424 +       put_on_hold:
19425 +               vx_hold_task(next, rq);
19426 +               return 0;
19427 +       }
19428 +       return 1;
19429 +}
19430 +
19431 +
19432 +#else /* CONFIG_VSERVER_HARDCPU */
19433 +
19434 +static inline
19435 +void vx_hold_task(struct task_struct *p, runqueue_t *rq)
19436 +{
19437 +       return;
19438 +}
19439 +
19440 +static inline
19441 +void vx_unhold_task(struct task_struct *p, runqueue_t *rq)
19442 +{
19443 +       return;
19444 +}
19445 +
19446 +unsigned long nr_onhold(void)
19447 +{
19448 +       return 0;
19449 +}
19450 +
19451 +
19452 +static inline
19453 +int vx_need_resched(struct task_struct *p, int slice, int cpu)
19454 +{
19455 +       return (slice == 0);
19456 +}
19457 +
19458 +
19459 +#define vx_set_rq_time(rq, time)
19460 +
19461 +static inline
19462 +void vx_try_unhold(runqueue_t *rq, int cpu)
19463 +{
19464 +       return;
19465 +}
19466 +
19467 +static inline
19468 +int vx_schedule(struct task_struct *next, runqueue_t *rq, int cpu)
19469 +{
19470 +       struct vx_info *vxi = next->vx_info;
19471 +       struct _vx_sched_pc *sched_pc;
19472 +       int delta_min[2];
19473 +       int ret;
19474 +
19475 +       if (!vx_info_flags(vxi, VXF_SCHED_PRIO, 0))
19476 +               return 1;
19477 +
19478 +       sched_pc = &vx_per_cpu(vxi, sched_pc, cpu);
19479 +       vxm_sched_info(sched_pc, vxi, cpu);
19480 +       ret  = vx_tokens_recalc(sched_pc,
19481 +               &rq->norm_time, &rq->idle_time, delta_min);
19482 +       vxm_tokens_recalc(sched_pc, rq, vxi, cpu);
19483 +       return 1;
19484 +}
19485 +
19486 +#endif /* CONFIG_VSERVER_HARDCPU */
19487 +
19488 diff -NurpP --minimal linux-2.6.16.20/kernel/sched_mon.h linux-2.6.16.20-vs2.1.1-rc22/kernel/sched_mon.h
19489 --- linux-2.6.16.20/kernel/sched_mon.h  1970-01-01 01:00:00 +0100
19490 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/sched_mon.h     2006-04-26 19:07:00 +0200
19491 @@ -0,0 +1,188 @@
19492 +
19493 +#include <linux/vserver/monitor.h>
19494 +
19495 +#ifdef  CONFIG_VSERVER_MONITOR
19496 +
19497 +struct _vx_mon_entry *vxm_advance(int cpu);
19498 +
19499 +
19500 +static inline
19501 +void   __vxm_basic(struct _vx_mon_entry *entry, xid_t xid, int type)
19502 +{
19503 +       entry->type = type;
19504 +       entry->xid = xid;
19505 +}
19506 +
19507 +static inline
19508 +void   __vxm_sync(int cpu)
19509 +{
19510 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19511 +
19512 +       __vxm_basic(entry, 0, VXM_SYNC);
19513 +       entry->ev.sec = xtime.tv_sec;
19514 +       entry->ev.nsec = xtime.tv_nsec;
19515 +}
19516 +
19517 +static inline
19518 +void   __vxm_task(struct task_struct *p, int type)
19519 +{
19520 +       struct _vx_mon_entry *entry = vxm_advance(task_cpu(p));
19521 +
19522 +       __vxm_basic(entry, p->xid, type);
19523 +       entry->ev.tsk.pid = p->pid;
19524 +       entry->ev.tsk.state = p->state;
19525 +}
19526 +
19527 +static inline
19528 +void   __vxm_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
19529 +{
19530 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19531 +
19532 +       __vxm_basic(entry, vxi->vx_id, (VXM_SCHED | s->flags));
19533 +       entry->sd.tokens = s->tokens;
19534 +       entry->sd.norm_time = s->norm_time;
19535 +       entry->sd.idle_time = s->idle_time;
19536 +}
19537 +
19538 +static inline
19539 +void   __vxm_rqinfo1(runqueue_t *q, int cpu)
19540 +{
19541 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19542 +
19543 +       entry->type = VXM_RQINFO_1;
19544 +       entry->xid = ((unsigned long)q >> 16) & 0xffff;
19545 +       entry->q1.running = q->nr_running;
19546 +       entry->q1.onhold = q->nr_onhold;
19547 +       entry->q1.iowait = atomic_read(&q->nr_iowait);
19548 +       entry->q1.uintr = q->nr_uninterruptible;
19549 +       entry->q1.idle_tokens = q->idle_tokens;
19550 +}
19551 +
19552 +static inline
19553 +void   __vxm_rqinfo2(runqueue_t *q, int cpu)
19554 +{
19555 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19556 +
19557 +       entry->type = VXM_RQINFO_2;
19558 +       entry->xid = (unsigned long)q & 0xffff;
19559 +       entry->q2.norm_time = q->norm_time;
19560 +       entry->q2.idle_time = q->idle_time;
19561 +       entry->q2.idle_skip = q->idle_skip;
19562 +}
19563 +
19564 +static inline
19565 +void   __vxm_update(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
19566 +{
19567 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19568 +
19569 +       __vxm_basic(entry, vxi->vx_id, VXM_UPDATE);
19570 +       entry->ev.tokens = s->tokens;
19571 +}
19572 +
19573 +static inline
19574 +void   __vxm_update1(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
19575 +{
19576 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19577 +
19578 +       __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_1);
19579 +       entry->u1.tokens_max = s->tokens_max;
19580 +       entry->u1.fill_rate = s->fill_rate[0];
19581 +       entry->u1.interval = s->interval[0];
19582 +}
19583 +
19584 +static inline
19585 +void   __vxm_update2(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
19586 +{
19587 +       struct _vx_mon_entry *entry = vxm_advance(cpu);
19588 +
19589 +       __vxm_basic(entry, vxi->vx_id, VXM_UPDATE_2);
19590 +       entry->u2.tokens_min = s->tokens_min;
19591 +       entry->u2.fill_rate = s->fill_rate[1];
19592 +       entry->u2.interval = s->interval[1];
19593 +}
19594 +
19595 +
19596 +#define        vxm_activate_task(p,q)          __vxm_task(p, VXM_ACTIVATE)
19597 +#define        vxm_activate_idle(p,q)          __vxm_task(p, VXM_IDLE)
19598 +#define        vxm_deactivate_task(p,q)        __vxm_task(p, VXM_DEACTIVATE)
19599 +#define        vxm_hold_task(p,q)              __vxm_task(p, VXM_HOLD)
19600 +#define        vxm_unhold_task(p,q)            __vxm_task(p, VXM_UNHOLD)
19601 +
19602 +static inline
19603 +void   vxm_migrate_task(struct task_struct *p, runqueue_t *rq, int dest)
19604 +{
19605 +       __vxm_task(p, VXM_MIGRATE);
19606 +       __vxm_rqinfo1(rq, task_cpu(p));
19607 +       __vxm_rqinfo2(rq, task_cpu(p));
19608 +}
19609 +
19610 +static inline
19611 +void   vxm_idle_skip(runqueue_t *rq, int cpu)
19612 +{
19613 +       __vxm_rqinfo1(rq, cpu);
19614 +       __vxm_rqinfo2(rq, cpu);
19615 +}
19616 +
19617 +static inline
19618 +void   vxm_need_resched(struct task_struct *p, int slice, int cpu)
19619 +{
19620 +       if (slice)
19621 +               return;
19622 +
19623 +       __vxm_task(p, VXM_RESCHED);
19624 +}
19625 +
19626 +static inline
19627 +void   vxm_sync(unsigned long now, int cpu)
19628 +{
19629 +       if (!CONFIG_VSERVER_MONITOR_SYNC ||
19630 +               (now % CONFIG_VSERVER_MONITOR_SYNC))
19631 +               return;
19632 +
19633 +       __vxm_sync(cpu);
19634 +}
19635 +
19636 +#define        vxm_sched_info(s,v,c)           __vxm_sched(s,v,c)
19637 +
19638 +static inline
19639 +void   vxm_tokens_recalc(struct _vx_sched_pc *s, runqueue_t *rq,
19640 +       struct vx_info *vxi, int cpu)
19641 +{
19642 +       __vxm_sched(s, vxi, cpu);
19643 +       __vxm_rqinfo2(rq, cpu);
19644 +}
19645 +
19646 +static inline
19647 +void   vxm_update_sched(struct _vx_sched_pc *s, struct vx_info *vxi, int cpu)
19648 +{
19649 +       __vxm_sched(s, vxi, cpu);
19650 +       __vxm_update(s, vxi, cpu);
19651 +       __vxm_update1(s, vxi, cpu);
19652 +       __vxm_update2(s, vxi, cpu);
19653 +}
19654 +
19655 +static inline
19656 +void   vxm_rq_max_min(runqueue_t *rq, int cpu)
19657 +{
19658 +       __vxm_rqinfo1(rq, cpu);
19659 +       __vxm_rqinfo2(rq, cpu);
19660 +}
19661 +
19662 +#else  /* CONFIG_VSERVER_MONITOR */
19663 +
19664 +#define        vxm_activate_task(t,q)          do { } while (0)
19665 +#define        vxm_activate_idle(t,q)          do { } while (0)
19666 +#define        vxm_deactivate_task(t,q)        do { } while (0)
19667 +#define        vxm_hold_task(t,q)              do { } while (0)
19668 +#define        vxm_unhold_task(t,q)            do { } while (0)
19669 +#define        vxm_migrate_task(t,q,d)         do { } while (0)
19670 +#define        vxm_idle_skip(q,c)              do { } while (0)
19671 +#define        vxm_need_resched(t,s,c)         do { } while (0)
19672 +#define        vxm_sync(s,c)                   do { } while (0)
19673 +#define        vxm_sched_info(s,v,c)           do { } while (0)
19674 +#define        vxm_tokens_recalc(s,q,v,c)      do { } while (0)
19675 +#define        vxm_update_sched(s,v,c)         do { } while (0)
19676 +#define        vxm_rq_max_min(q,c)             do { } while (0)
19677 +
19678 +#endif /* CONFIG_VSERVER_MONITOR */
19679 +
19680 diff -NurpP --minimal linux-2.6.16.20/kernel/signal.c linux-2.6.16.20-vs2.1.1-rc22/kernel/signal.c
19681 --- linux-2.6.16.20/kernel/signal.c     2006-05-11 21:25:36 +0200
19682 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/signal.c        2006-04-26 19:07:00 +0200
19683 @@ -26,6 +26,7 @@
19684  #include <linux/signal.h>
19685  #include <linux/audit.h>
19686  #include <linux/capability.h>
19687 +#include <linux/vs_pid.h>
19688  #include <asm/param.h>
19689  #include <asm/uaccess.h>
19690  #include <asm/unistd.h>
19691 @@ -676,18 +677,27 @@ static int rm_from_queue(unsigned long m
19692  static int check_kill_permission(int sig, struct siginfo *info,
19693                                  struct task_struct *t)
19694  {
19695 +       int user;
19696         int error = -EINVAL;
19697 +
19698         if (!valid_signal(sig))
19699                 return error;
19700 +
19701 +       user = ((info == SEND_SIG_NOINFO) ||
19702 +               (!is_si_special(info) && SI_FROMUSER(info)));
19703 +
19704         error = -EPERM;
19705 -       if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)))
19706 -           && ((sig != SIGCONT) ||
19707 +       if (user && ((sig != SIGCONT) ||
19708                 (current->signal->session != t->signal->session))
19709             && (current->euid ^ t->suid) && (current->euid ^ t->uid)
19710             && (current->uid ^ t->suid) && (current->uid ^ t->uid)
19711             && !capable(CAP_KILL))
19712                 return error;
19713  
19714 +       error = -ESRCH;
19715 +       if (user && !vx_check(vx_task_xid(t), VX_ADMIN|VX_IDENT))
19716 +               return error;
19717 +
19718         error = security_task_kill(t, info, sig);
19719         if (!error)
19720                 audit_signal_info(sig, t); /* Let audit system see the signal */
19721 @@ -1991,6 +2001,11 @@ relock:
19722                 if (current->pid == 1)
19723                         continue;
19724  
19725 +               /* virtual init is protected against user signals */
19726 +               if ((info->si_code == SI_USER) &&
19727 +                       vx_current_initpid(current->pid))
19728 +                       continue;
19729 +
19730                 if (sig_kernel_stop(signr)) {
19731                         /*
19732                          * The default action is to stop all threads in
19733 diff -NurpP --minimal linux-2.6.16.20/kernel/softirq.c linux-2.6.16.20-vs2.1.1-rc22/kernel/softirq.c
19734 --- linux-2.6.16.20/kernel/softirq.c    2006-01-03 17:30:12 +0100
19735 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/softirq.c       2006-04-26 19:07:00 +0200
19736 @@ -16,6 +16,7 @@
19737  #include <linux/cpu.h>
19738  #include <linux/kthread.h>
19739  #include <linux/rcupdate.h>
19740 +#include <linux/vs_context.h>
19741  
19742  #include <asm/irq.h>
19743  /*
19744 @@ -73,6 +74,7 @@ static inline void wakeup_softirqd(void)
19745  
19746  asmlinkage void __do_softirq(void)
19747  {
19748 +       struct vx_info_save vxis;
19749         struct softirq_action *h;
19750         __u32 pending;
19751         int max_restart = MAX_SOFTIRQ_RESTART;
19752 @@ -81,6 +83,7 @@ asmlinkage void __do_softirq(void)
19753         pending = local_softirq_pending();
19754  
19755         local_bh_disable();
19756 +       __enter_vx_admin(&vxis);
19757         cpu = smp_processor_id();
19758  restart:
19759         /* Reset the pending bitmask before enabling irqs */
19760 @@ -108,6 +111,7 @@ restart:
19761         if (pending)
19762                 wakeup_softirqd();
19763  
19764 +       __leave_vx_admin(&vxis);
19765         __local_bh_enable();
19766  }
19767  
19768 diff -NurpP --minimal linux-2.6.16.20/kernel/sys.c linux-2.6.16.20-vs2.1.1-rc22/kernel/sys.c
19769 --- linux-2.6.16.20/kernel/sys.c        2006-05-11 21:25:36 +0200
19770 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/sys.c   2006-04-29 02:58:07 +0200
19771 @@ -11,6 +11,7 @@
19772  #include <linux/mman.h>
19773  #include <linux/smp_lock.h>
19774  #include <linux/notifier.h>
19775 +#include <linux/kmod.h>
19776  #include <linux/reboot.h>
19777  #include <linux/prctl.h>
19778  #include <linux/init.h>
19779 @@ -30,6 +31,8 @@
19780  #include <linux/tty.h>
19781  #include <linux/signal.h>
19782  #include <linux/cn_proc.h>
19783 +#include <linux/vs_cvirt.h>
19784 +#include <linux/vs_pid.h>
19785  
19786  #include <linux/compat.h>
19787  #include <linux/syscalls.h>
19788 @@ -227,7 +230,9 @@ EXPORT_SYMBOL(unregister_reboot_notifier
19789  #ifndef CONFIG_SECURITY
19790  int capable(int cap)
19791  {
19792 -        if (cap_raised(current->cap_effective, cap)) {
19793 +       if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
19794 +               return 0;
19795 +       if (vx_cap_raised(current->vx_info, current->cap_effective, cap)) {
19796                current->flags |= PF_SUPERPRIV;
19797                return 1;
19798          }
19799 @@ -246,7 +251,10 @@ static int set_one_prio(struct task_stru
19800                 goto out;
19801         }
19802         if (niceval < task_nice(p) && !can_nice(p, niceval)) {
19803 -               error = -EACCES;
19804 +               if (vx_flags(VXF_IGNEG_NICE, 0))
19805 +                       error = 0;
19806 +               else
19807 +                       error = -EACCES;
19808                 goto out;
19809         }
19810         no_nice = security_task_setnice(p, niceval);
19811 @@ -298,7 +306,8 @@ asmlinkage long sys_setpriority(int whic
19812                         if (!who)
19813                                 who = current->uid;
19814                         else
19815 -                               if ((who != current->uid) && !(user = find_user(who)))
19816 +                               if ((who != current->uid) &&
19817 +                                       !(user = find_user(vx_current_xid(), who)))
19818                                         goto out_unlock;        /* No processes for this user */
19819  
19820                         do_each_thread(g, p)
19821 @@ -356,7 +365,8 @@ asmlinkage long sys_getpriority(int whic
19822                         if (!who)
19823                                 who = current->uid;
19824                         else
19825 -                               if ((who != current->uid) && !(user = find_user(who)))
19826 +                               if ((who != current->uid) &&
19827 +                                       !(user = find_user(vx_current_xid(), who)))
19828                                         goto out_unlock;        /* No processes for this user */
19829  
19830                         do_each_thread(g, p)
19831 @@ -473,6 +483,9 @@ void kernel_power_off(void)
19832         machine_power_off();
19833  }
19834  EXPORT_SYMBOL_GPL(kernel_power_off);
19835 +
19836 +long vs_reboot(unsigned int, void __user *);
19837 +
19838  /*
19839   * Reboot system call: for obvious reasons only root may call it,
19840   * and even root needs to set up some magic numbers in the registers
19841 @@ -503,6 +516,9 @@ asmlinkage long sys_reboot(int magic1, i
19842         if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
19843                 cmd = LINUX_REBOOT_CMD_HALT;
19844  
19845 +       if (!vx_check(0, VX_ADMIN|VX_WATCH))
19846 +               return vs_reboot(cmd, arg);
19847 +
19848         lock_kernel();
19849         switch (cmd) {
19850         case LINUX_REBOOT_CMD_RESTART:
19851 @@ -690,7 +706,7 @@ static int set_user(uid_t new_ruid, int 
19852  {
19853         struct user_struct *new_user;
19854  
19855 -       new_user = alloc_uid(new_ruid);
19856 +       new_user = alloc_uid(vx_current_xid(), new_ruid);
19857         if (!new_user)
19858                 return -EAGAIN;
19859  
19860 @@ -1099,15 +1115,18 @@ asmlinkage long sys_setpgid(pid_t pid, p
19861  {
19862         struct task_struct *p;
19863         struct task_struct *group_leader = current->group_leader;
19864 +       pid_t rpgid;
19865         int err = -EINVAL;
19866  
19867         if (!pid)
19868 -               pid = group_leader->pid;
19869 +               pid = vx_map_pid(group_leader->pid);
19870         if (!pgid)
19871                 pgid = pid;
19872         if (pgid < 0)
19873                 return -EINVAL;
19874  
19875 +       rpgid = vx_rmap_pid(pgid);
19876 +
19877         /* From this point forward we keep holding onto the tasklist lock
19878          * so that our parent does not change from under us. -DaveM
19879          */
19880 @@ -1142,22 +1161,22 @@ asmlinkage long sys_setpgid(pid_t pid, p
19881         if (pgid != pid) {
19882                 struct task_struct *p;
19883  
19884 -               do_each_task_pid(pgid, PIDTYPE_PGID, p) {
19885 +               do_each_task_pid(rpgid, PIDTYPE_PGID, p) {
19886                         if (p->signal->session == group_leader->signal->session)
19887                                 goto ok_pgid;
19888 -               } while_each_task_pid(pgid, PIDTYPE_PGID, p);
19889 +               } while_each_task_pid(rpgid, PIDTYPE_PGID, p);
19890                 goto out;
19891         }
19892  
19893  ok_pgid:
19894 -       err = security_task_setpgid(p, pgid);
19895 +       err = security_task_setpgid(p, rpgid);
19896         if (err)
19897                 goto out;
19898  
19899 -       if (process_group(p) != pgid) {
19900 +       if (process_group(p) != rpgid) {
19901                 detach_pid(p, PIDTYPE_PGID);
19902 -               p->signal->pgrp = pgid;
19903 -               attach_pid(p, PIDTYPE_PGID, pgid);
19904 +               p->signal->pgrp = rpgid;
19905 +               attach_pid(p, PIDTYPE_PGID, rpgid);
19906         }
19907  
19908         err = 0;
19909 @@ -1170,7 +1189,7 @@ out:
19910  asmlinkage long sys_getpgid(pid_t pid)
19911  {
19912         if (!pid) {
19913 -               return process_group(current);
19914 +               return vx_rmap_pid(process_group(current));
19915         } else {
19916                 int retval;
19917                 struct task_struct *p;
19918 @@ -1182,7 +1201,7 @@ asmlinkage long sys_getpgid(pid_t pid)
19919                 if (p) {
19920                         retval = security_task_getpgid(p);
19921                         if (!retval)
19922 -                               retval = process_group(p);
19923 +                               retval = vx_rmap_pid(process_group(p));
19924                 }
19925                 read_unlock(&tasklist_lock);
19926                 return retval;
19927 @@ -1518,7 +1537,7 @@ asmlinkage long sys_newuname(struct new_
19928         int errno = 0;
19929  
19930         down_read(&uts_sem);
19931 -       if (copy_to_user(name,&system_utsname,sizeof *name))
19932 +       if (copy_to_user(name, vx_new_utsname(), sizeof *name))
19933                 errno = -EFAULT;
19934         up_read(&uts_sem);
19935         return errno;
19936 @@ -1529,15 +1548,17 @@ asmlinkage long sys_sethostname(char __u
19937         int errno;
19938         char tmp[__NEW_UTS_LEN];
19939  
19940 -       if (!capable(CAP_SYS_ADMIN))
19941 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME))
19942                 return -EPERM;
19943         if (len < 0 || len > __NEW_UTS_LEN)
19944                 return -EINVAL;
19945         down_write(&uts_sem);
19946         errno = -EFAULT;
19947         if (!copy_from_user(tmp, name, len)) {
19948 -               memcpy(system_utsname.nodename, tmp, len);
19949 -               system_utsname.nodename[len] = 0;
19950 +               char *ptr = vx_new_uts(nodename);
19951 +
19952 +               memcpy(ptr, tmp, len);
19953 +               ptr[len] = 0;
19954                 errno = 0;
19955         }
19956         up_write(&uts_sem);
19957 @@ -1549,15 +1570,17 @@ asmlinkage long sys_sethostname(char __u
19958  asmlinkage long sys_gethostname(char __user *name, int len)
19959  {
19960         int i, errno;
19961 +       char *ptr;
19962  
19963         if (len < 0)
19964                 return -EINVAL;
19965         down_read(&uts_sem);
19966 -       i = 1 + strlen(system_utsname.nodename);
19967 +       ptr = vx_new_uts(nodename);
19968 +       i = 1 + strlen(ptr);
19969         if (i > len)
19970                 i = len;
19971         errno = 0;
19972 -       if (copy_to_user(name, system_utsname.nodename, i))
19973 +       if (copy_to_user(name, ptr, i))
19974                 errno = -EFAULT;
19975         up_read(&uts_sem);
19976         return errno;
19977 @@ -1574,7 +1597,7 @@ asmlinkage long sys_setdomainname(char _
19978         int errno;
19979         char tmp[__NEW_UTS_LEN];
19980  
19981 -       if (!capable(CAP_SYS_ADMIN))
19982 +       if (!vx_capable(CAP_SYS_ADMIN, VXC_SET_UTSNAME))
19983                 return -EPERM;
19984         if (len < 0 || len > __NEW_UTS_LEN)
19985                 return -EINVAL;
19986 @@ -1582,8 +1605,10 @@ asmlinkage long sys_setdomainname(char _
19987         down_write(&uts_sem);
19988         errno = -EFAULT;
19989         if (!copy_from_user(tmp, name, len)) {
19990 -               memcpy(system_utsname.domainname, tmp, len);
19991 -               system_utsname.domainname[len] = 0;
19992 +               char *ptr = vx_new_uts(domainname);
19993 +
19994 +               memcpy(ptr, tmp, len);
19995 +               ptr[len] = 0;
19996                 errno = 0;
19997         }
19998         up_write(&uts_sem);
19999 @@ -1640,7 +1665,7 @@ asmlinkage long sys_setrlimit(unsigned i
20000                 return -EINVAL;
20001         old_rlim = current->signal->rlim + resource;
20002         if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
20003 -           !capable(CAP_SYS_RESOURCE))
20004 +           !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
20005                 return -EPERM;
20006         if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
20007                         return -EPERM;
20008 diff -NurpP --minimal linux-2.6.16.20/kernel/sysctl.c linux-2.6.16.20-vs2.1.1-rc22/kernel/sysctl.c
20009 --- linux-2.6.16.20/kernel/sysctl.c     2006-04-09 13:49:58 +0200
20010 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/sysctl.c        2006-04-26 19:07:00 +0200
20011 @@ -46,6 +46,7 @@
20012  #include <linux/syscalls.h>
20013  #include <linux/nfs_fs.h>
20014  #include <linux/acpi.h>
20015 +#include <linux/vserver/cvirt.h>
20016  
20017  #include <asm/uaccess.h>
20018  #include <asm/processor.h>
20019 @@ -89,6 +90,7 @@ static int ngroups_max = NGROUPS_MAX;
20020  #ifdef CONFIG_KMOD
20021  extern char modprobe_path[];
20022  #endif
20023 +extern char vshelper_path[];
20024  #ifdef CONFIG_CHR_DEV_SG
20025  extern int sg_big_buff;
20026  #endif
20027 @@ -237,6 +239,7 @@ static ctl_table kern_table[] = {
20028                 .maxlen         = sizeof(system_utsname.sysname),
20029                 .mode           = 0444,
20030                 .proc_handler   = &proc_doutsstring,
20031 +               .virt_handler   = &vx_uts_virt_handler,
20032                 .strategy       = &sysctl_string,
20033         },
20034         {
20035 @@ -246,6 +249,7 @@ static ctl_table kern_table[] = {
20036                 .maxlen         = sizeof(system_utsname.release),
20037                 .mode           = 0444,
20038                 .proc_handler   = &proc_doutsstring,
20039 +               .virt_handler   = &vx_uts_virt_handler,
20040                 .strategy       = &sysctl_string,
20041         },
20042         {
20043 @@ -255,6 +259,7 @@ static ctl_table kern_table[] = {
20044                 .maxlen         = sizeof(system_utsname.version),
20045                 .mode           = 0444,
20046                 .proc_handler   = &proc_doutsstring,
20047 +               .virt_handler   = &vx_uts_virt_handler,
20048                 .strategy       = &sysctl_string,
20049         },
20050         {
20051 @@ -264,6 +269,7 @@ static ctl_table kern_table[] = {
20052                 .maxlen         = sizeof(system_utsname.nodename),
20053                 .mode           = 0644,
20054                 .proc_handler   = &proc_doutsstring,
20055 +               .virt_handler   = &vx_uts_virt_handler,
20056                 .strategy       = &sysctl_string,
20057         },
20058         {
20059 @@ -273,6 +279,7 @@ static ctl_table kern_table[] = {
20060                 .maxlen         = sizeof(system_utsname.domainname),
20061                 .mode           = 0644,
20062                 .proc_handler   = &proc_doutsstring,
20063 +               .virt_handler   = &vx_uts_virt_handler,
20064                 .strategy       = &sysctl_string,
20065         },
20066         {
20067 @@ -409,6 +416,15 @@ static ctl_table kern_table[] = {
20068                 .strategy       = &sysctl_string,
20069         },
20070  #endif
20071 +       {
20072 +               .ctl_name       = KERN_VSHELPER,
20073 +               .procname       = "vshelper",
20074 +               .data           = &vshelper_path,
20075 +               .maxlen         = 256,
20076 +               .mode           = 0644,
20077 +               .proc_handler   = &proc_dostring,
20078 +               .strategy       = &sysctl_string,
20079 +       },
20080  #ifdef CONFIG_CHR_DEV_SG
20081         {
20082                 .ctl_name       = KERN_SG_BIG_BUFF,
20083 @@ -1564,16 +1580,20 @@ static ssize_t proc_writesys(struct file
20084  int proc_dostring(ctl_table *table, int write, struct file *filp,
20085                   void __user *buffer, size_t *lenp, loff_t *ppos)
20086  {
20087 -       size_t len;
20088 +       size_t len, maxlen;
20089         char __user *p;
20090         char c;
20091 +       void *data;
20092 +
20093 +       data = table->data;
20094 +       maxlen = table->maxlen;
20095 +
20096 +       if (!data || !maxlen || !*lenp || (*ppos && !write))
20097 +               return (*lenp = 0);
20098         
20099 -       if (!table->data || !table->maxlen || !*lenp ||
20100 -           (*ppos && !write)) {
20101 -               *lenp = 0;
20102 -               return 0;
20103 -       }
20104 -       
20105 +       if (table->virt_handler)
20106 +               table->virt_handler(table, write, filp->f_xid, &data, &maxlen);
20107 +
20108         if (write) {
20109                 len = 0;
20110                 p = buffer;
20111 @@ -1584,20 +1604,20 @@ int proc_dostring(ctl_table *table, int 
20112                                 break;
20113                         len++;
20114                 }
20115 -               if (len >= table->maxlen)
20116 -                       len = table->maxlen-1;
20117 -               if(copy_from_user(table->data, buffer, len))
20118 +               if (len >= maxlen)
20119 +                       len = maxlen-1;
20120 +               if(copy_from_user(data, buffer, len))
20121                         return -EFAULT;
20122 -               ((char *) table->data)[len] = 0;
20123 +               ((char *) data)[len] = 0;
20124                 *ppos += *lenp;
20125         } else {
20126 -               len = strlen(table->data);
20127 -               if (len > table->maxlen)
20128 -                       len = table->maxlen;
20129 +               len = strlen(data);
20130 +               if (len > maxlen)
20131 +                       len = maxlen;
20132                 if (len > *lenp)
20133                         len = *lenp;
20134                 if (len)
20135 -                       if(copy_to_user(buffer, table->data, len))
20136 +                       if(copy_to_user(buffer, data, len))
20137                                 return -EFAULT;
20138                 if (len < *lenp) {
20139                         if(put_user('\n', ((char __user *) buffer) + len))
20140 diff -NurpP --minimal linux-2.6.16.20/kernel/time.c linux-2.6.16.20-vs2.1.1-rc22/kernel/time.c
20141 --- linux-2.6.16.20/kernel/time.c       2006-02-18 14:40:38 +0100
20142 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/time.c  2006-05-29 16:59:20 +0200
20143 @@ -61,7 +61,7 @@ asmlinkage long sys_time(time_t __user *
20144         time_t i;
20145         struct timeval tv;
20146  
20147 -       do_gettimeofday(&tv);
20148 +       vx_gettimeofday(&tv);
20149         i = tv.tv_sec;
20150  
20151         if (tloc) {
20152 @@ -92,7 +92,7 @@ asmlinkage long sys_stime(time_t __user 
20153         if (err)
20154                 return err;
20155  
20156 -       do_settimeofday(&tv);
20157 +       vx_settimeofday(&tv);
20158         return 0;
20159  }
20160  
20161 @@ -102,7 +102,7 @@ asmlinkage long sys_gettimeofday(struct 
20162  {
20163         if (likely(tv != NULL)) {
20164                 struct timeval ktv;
20165 -               do_gettimeofday(&ktv);
20166 +               vx_gettimeofday(&ktv);
20167                 if (copy_to_user(tv, &ktv, sizeof(ktv)))
20168                         return -EFAULT;
20169         }
20170 @@ -176,7 +176,7 @@ int do_sys_settimeofday(struct timespec 
20171                 /* SMP safe, again the code in arch/foo/time.c should
20172                  * globally block out interrupts when it runs.
20173                  */
20174 -               return do_settimeofday(tv);
20175 +               return vx_settimeofday(tv);
20176         }
20177         return 0;
20178  }
20179 @@ -558,7 +558,7 @@ void getnstimeofday(struct timespec *tv)
20180  {
20181         struct timeval x;
20182  
20183 -       do_gettimeofday(&x);
20184 +       vx_gettimeofday(&x);
20185         tv->tv_sec = x.tv_sec;
20186         tv->tv_nsec = x.tv_usec * NSEC_PER_USEC;
20187  }
20188 diff -NurpP --minimal linux-2.6.16.20/kernel/timer.c linux-2.6.16.20-vs2.1.1-rc22/kernel/timer.c
20189 --- linux-2.6.16.20/kernel/timer.c      2006-04-09 13:49:58 +0200
20190 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/timer.c 2006-04-26 19:07:00 +0200
20191 @@ -34,6 +34,9 @@
20192  #include <linux/time.h>
20193  #include <linux/jiffies.h>
20194  #include <linux/posix-timers.h>
20195 +#include <linux/vs_cvirt.h>
20196 +#include <linux/vs_pid.h>
20197 +#include <linux/vserver/sched.h>
20198  #include <linux/cpu.h>
20199  #include <linux/syscalls.h>
20200  #include <linux/delay.h>
20201 @@ -972,12 +975,6 @@ asmlinkage unsigned long sys_alarm(unsig
20202  
20203  #endif
20204  
20205 -#ifndef __alpha__
20206 -
20207 -/*
20208 - * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
20209 - * should be moved into arch/i386 instead?
20210 - */
20211  
20212  /**
20213   * sys_getpid - return the thread group id of the current process
20214 @@ -990,7 +987,7 @@ asmlinkage unsigned long sys_alarm(unsig
20215   */
20216  asmlinkage long sys_getpid(void)
20217  {
20218 -       return current->tgid;
20219 +       return vx_map_tgid(current->tgid);
20220  }
20221  
20222  /*
20223 @@ -1034,9 +1031,23 @@ asmlinkage long sys_getppid(void)
20224         pid = rcu_dereference(current->real_parent)->tgid;
20225         rcu_read_unlock();
20226  
20227 -       return pid;
20228 +       return vx_map_pid(pid);
20229  }
20230  
20231 +#ifdef __alpha__
20232 +
20233 +/*
20234 + * The Alpha uses getxpid, getxuid, and getxgid instead.
20235 + */
20236 +
20237 +asmlinkage long do_getxpid(long *ppid)
20238 +{
20239 +       *ppid = sys_getppid();
20240 +       return sys_getpid();
20241 +}
20242 +
20243 +#else /* _alpha_ */
20244 +
20245  asmlinkage long sys_getuid(void)
20246  {
20247         /* Only we change this so SMP safe */
20248 @@ -1197,6 +1208,8 @@ asmlinkage long sys_sysinfo(struct sysin
20249                         tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
20250                         tp.tv_sec++;
20251                 }
20252 +               if (vx_flags(VXF_VIRT_UPTIME, 0))
20253 +                       vx_vsi_uptime(&tp, NULL);
20254                 val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
20255  
20256                 val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
20257 diff -NurpP --minimal linux-2.6.16.20/kernel/user.c linux-2.6.16.20-vs2.1.1-rc22/kernel/user.c
20258 --- linux-2.6.16.20/kernel/user.c       2006-02-18 14:40:38 +0100
20259 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/user.c  2006-04-26 19:07:00 +0200
20260 @@ -23,8 +23,8 @@
20261  #define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
20262  #define UIDHASH_SZ             (1 << UIDHASH_BITS)
20263  #define UIDHASH_MASK           (UIDHASH_SZ - 1)
20264 -#define __uidhashfn(uid)       (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
20265 -#define uidhashentry(uid)      (uidhash_table + __uidhashfn((uid)))
20266 +#define __uidhashfn(xid,uid)   ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK)
20267 +#define uidhashentry(xid,uid)  (uidhash_table + __uidhashfn((xid),(uid)))
20268  
20269  static kmem_cache_t *uid_cachep;
20270  static struct list_head uidhash_table[UIDHASH_SZ];
20271 @@ -66,7 +66,7 @@ static inline void uid_hash_remove(struc
20272         list_del(&up->uidhash_list);
20273  }
20274  
20275 -static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent)
20276 +static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent)
20277  {
20278         struct list_head *up;
20279  
20280 @@ -75,7 +75,7 @@ static inline struct user_struct *uid_ha
20281  
20282                 user = list_entry(up, struct user_struct, uidhash_list);
20283  
20284 -               if(user->uid == uid) {
20285 +               if(user->uid == uid && user->xid == xid) {
20286                         atomic_inc(&user->__count);
20287                         return user;
20288                 }
20289 @@ -90,13 +90,13 @@ static inline struct user_struct *uid_ha
20290   *
20291   * If the user_struct could not be found, return NULL.
20292   */
20293 -struct user_struct *find_user(uid_t uid)
20294 +struct user_struct *find_user(xid_t xid, uid_t uid)
20295  {
20296         struct user_struct *ret;
20297         unsigned long flags;
20298  
20299         spin_lock_irqsave(&uidhash_lock, flags);
20300 -       ret = uid_hash_find(uid, uidhashentry(uid));
20301 +       ret = uid_hash_find(xid, uid, uidhashentry(xid, uid));
20302         spin_unlock_irqrestore(&uidhash_lock, flags);
20303         return ret;
20304  }
20305 @@ -116,13 +116,13 @@ void free_uid(struct user_struct *up)
20306         local_irq_restore(flags);
20307  }
20308  
20309 -struct user_struct * alloc_uid(uid_t uid)
20310 +struct user_struct * alloc_uid(xid_t xid, uid_t uid)
20311  {
20312 -       struct list_head *hashent = uidhashentry(uid);
20313 +       struct list_head *hashent = uidhashentry(xid, uid);
20314         struct user_struct *up;
20315  
20316         spin_lock_irq(&uidhash_lock);
20317 -       up = uid_hash_find(uid, hashent);
20318 +       up = uid_hash_find(xid, uid, hashent);
20319         spin_unlock_irq(&uidhash_lock);
20320  
20321         if (!up) {
20322 @@ -132,6 +132,7 @@ struct user_struct * alloc_uid(uid_t uid
20323                 if (!new)
20324                         return NULL;
20325                 new->uid = uid;
20326 +               new->xid = xid;
20327                 atomic_set(&new->__count, 1);
20328                 atomic_set(&new->processes, 0);
20329                 atomic_set(&new->files, 0);
20330 @@ -154,7 +155,7 @@ struct user_struct * alloc_uid(uid_t uid
20331                  * on adding the same user already..
20332                  */
20333                 spin_lock_irq(&uidhash_lock);
20334 -               up = uid_hash_find(uid, hashent);
20335 +               up = uid_hash_find(xid, uid, hashent);
20336                 if (up) {
20337                         key_put(new->uid_keyring);
20338                         key_put(new->session_keyring);
20339 @@ -200,7 +201,7 @@ static int __init uid_cache_init(void)
20340  
20341         /* Insert the root user immediately (init already runs as root) */
20342         spin_lock_irq(&uidhash_lock);
20343 -       uid_hash_insert(&root_user, uidhashentry(0));
20344 +       uid_hash_insert(&root_user, uidhashentry(0,0));
20345         spin_unlock_irq(&uidhash_lock);
20346  
20347         return 0;
20348 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/Kconfig linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/Kconfig
20349 --- linux-2.6.16.20/kernel/vserver/Kconfig      1970-01-01 01:00:00 +0100
20350 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/Kconfig 2006-05-29 18:02:20 +0200
20351 @@ -0,0 +1,265 @@
20352 +#
20353 +# Linux VServer configuration
20354 +#
20355 +
20356 +menu "Linux VServer"
20357 +
20358 +config VSERVER_LEGACY
20359 +       bool    "Enable Legacy Kernel API"
20360 +       default y
20361 +       help
20362 +         This enables the legacy API used in vs1.xx, maintaining
20363 +         compatibility with older vserver tools, and guest images
20364 +         that are configured using the legacy method.  This is
20365 +         probably a good idea for now, for migration purposes.
20366 +
20367 +         Note that some tools have not yet been altered to use
20368 +         this API, so disabling this option may reduce some
20369 +         functionality.
20370 +
20371 +config VSERVER_LEGACY_VERSION
20372 +       bool    "Show a Legacy Version ID"
20373 +       depends on VSERVER_LEGACY
20374 +       default n
20375 +       help
20376 +         This shows a special legacy version to very old tools
20377 +         which do not handle the current version correctly.
20378 +
20379 +         This will probably disable some features of newer tools
20380 +         so better avoid it, unless you really, really need it
20381 +         for backwards compatibility.
20382 +
20383 +config VSERVER_DYNAMIC_IDS
20384 +       bool    "Enable dynamic context IDs"
20385 +       depends on VSERVER_LEGACY
20386 +       default y
20387 +       help
20388 +         This enables support of in kernel dynamic context IDs,
20389 +         which is deprecated and will probably be removed in the
20390 +         next release.
20391 +
20392 +config VSERVER_NGNET
20393 +       bool    "Disable Legacy Networking Kernel API"
20394 +       depends on EXPERIMENTAL
20395 +       default n
20396 +       help
20397 +         This disables the legacy networking API which is required
20398 +         by the chbind tool. Do not disable it unless you exactly
20399 +         know what you are doing.
20400 +
20401 +config VSERVER_REMAP_SADDR
20402 +       bool    "Remap Source IP Address"
20403 +       depends on EXPERIMENTAL && !VSERVER_LEGACY
20404 +       default n
20405 +       help
20406 +         This allows to remap the source IP address of 'local'
20407 +         connections from 127.0.0.1 to the first assigned
20408 +         guest IP.
20409 +
20410 +config VSERVER_COWBL
20411 +       bool    "Enable COW Immutable Link Breaking"
20412 +       depends on EXPERIMENTAL
20413 +       default y
20414 +       help
20415 +         This enables the COW (Copy-On-Write) link break code.
20416 +         It allows you to treat unified files like normal files
20417 +         when writing to them (which will implicitely break the
20418 +         link and create a copy of the unified file)
20419 +
20420 +config VSERVER_VTIME
20421 +       bool    "Enable Virtualized Guest Time"
20422 +       depends on EXPERIMENTAL
20423 +       default n
20424 +       help
20425 +         This enables per guest time offsets to allow for
20426 +         adjusting the system clock individually per guest.
20427 +         this adds some overhead to the time functions and
20428 +         therefore should not be enabled without good reason.
20429 +
20430 +config VSERVER_PROC_SECURE
20431 +       bool    "Enable Proc Security"
20432 +       depends on PROC_FS
20433 +       default y
20434 +       help
20435 +         This configures ProcFS security to initially hide
20436 +         non-process entries for all contexts except the main and
20437 +         spectator context (i.e. for all guests), which is a secure
20438 +         default.
20439 +
20440 +         (note: on 1.2x the entries were visible by default)
20441 +
20442 +config VSERVER_HARDCPU
20443 +       bool    "Enable Hard CPU Limits"
20444 +       depends on EXPERIMENTAL
20445 +       default n
20446 +       help
20447 +         Activate the Hard CPU Limits
20448 +
20449 +         This will compile in code that allows the Token Bucket
20450 +         Scheduler to put processes on hold when a context's
20451 +         tokens are depleted (provided that its per-context
20452 +         sched_hard flag is set).
20453 +
20454 +         Processes belonging to that context will not be able
20455 +         to consume CPU resources again until a per-context
20456 +         configured minimum of tokens has been reached.
20457 +
20458 +config VSERVER_IDLETIME
20459 +       bool    "Avoid idle CPUs by skipping Time"
20460 +       depends on VSERVER_HARDCPU
20461 +       default n
20462 +       help
20463 +         This option allows the scheduler to artificially
20464 +         advance time (per cpu) when otherwise the idle
20465 +         task would be scheduled, thus keeping the cpu
20466 +         busy and sharing the available resources among
20467 +         certain contexts.
20468 +
20469 +config VSERVER_IDLELIMIT
20470 +       bool    "Limit the IDLE task"
20471 +       depends on VSERVER_HARDCPU
20472 +       default n
20473 +       help
20474 +         Limit the idle slices, so the the next context
20475 +         will be scheduled as soon as possible.
20476 +
20477 +         This might improve interactivity and latency, but
20478 +         will also marginally increase scheduling overhead.
20479 +
20480 +choice
20481 +       prompt  "Persistent Inode Tagging"
20482 +       default TAGGING_ID24
20483 +       help
20484 +         This adds persistent context information to filesystems
20485 +         mounted with the tagxid option. Tagging is a requirement
20486 +         for per-context disk limits and per-context quota.
20487 +
20488 +
20489 +config TAGGING_NONE
20490 +       bool    "Disabled"
20491 +       help
20492 +         do not store per-context information in inodes.
20493 +
20494 +config TAGGING_UID16
20495 +       bool    "UID16/GID32"
20496 +       help
20497 +         reduces UID to 16 bit, but leaves GID at 32 bit.
20498 +
20499 +config TAGGING_GID16
20500 +       bool    "UID32/GID16"
20501 +       help
20502 +         reduces GID to 16 bit, but leaves UID at 32 bit.
20503 +
20504 +config TAGGING_ID24
20505 +       bool    "UID24/GID24"
20506 +       help
20507 +         uses the upper 8bit from UID and GID for XID tagging
20508 +         which leaves 24bit for UID/GID each, which should be
20509 +         more than sufficient for normal use.
20510 +
20511 +config TAGGING_INTERN
20512 +       bool    "UID32/GID32"
20513 +       help
20514 +         this uses otherwise reserved inode fields in the on
20515 +         disk representation, which limits the use to a few
20516 +         filesystems (currently ext2 and ext3)
20517 +
20518 +config TAGGING_RUNTIME
20519 +       bool    "Runtime"
20520 +       depends on EXPERIMENTAL
20521 +       help
20522 +         inodes are tagged when first accessed, this doesn't
20523 +         require any persistant information, but might give
20524 +         funny results for mixed access.
20525 +
20526 +endchoice
20527 +
20528 +config TAG_NFSD
20529 +       bool    "Tag NFSD User Auth and Files"
20530 +       default n
20531 +       help
20532 +         Enable this if you do want the in-kernel NFS
20533 +         Server to use the tagging specified above.
20534 +         (will require patched clients too)
20535 +
20536 +config PROPAGATE
20537 +       bool    "Enable Inode Tag Propagation"
20538 +       default n
20539 +       depends on EXPERIMENTAL
20540 +       help
20541 +         This allows for the tagid= mount option to specify
20542 +         a tagid which is to be used for the entire mount
20543 +         tree.
20544 +
20545 +config VSERVER_DEBUG
20546 +       bool    "VServer Debugging Code"
20547 +       default n
20548 +       help
20549 +         Set this to yes if you want to be able to activate
20550 +         debugging output at runtime. It adds a probably small
20551 +         overhead to all vserver related functions and
20552 +         increases the kernel size by about 20k.
20553 +
20554 +config VSERVER_HISTORY
20555 +       bool    "VServer History Tracing"
20556 +       depends on VSERVER_DEBUG
20557 +       default n
20558 +       help
20559 +         Set this to yes if you want to record the history of
20560 +         linux-vserver activities, so they can be replayed in
20561 +         the event of a kernel panic or oops.
20562 +
20563 +config VSERVER_HISTORY_SIZE
20564 +       int "Per-CPU History Size (32-65536)"
20565 +       depends on VSERVER_HISTORY
20566 +       range 32 65536
20567 +       default 64
20568 +       help
20569 +         This allows you to specify the number of entries in
20570 +         the per-CPU history buffer.
20571 +
20572 +config VSERVER_MONITOR
20573 +       bool    "VServer Scheduling Monitor"
20574 +       depends on VSERVER_DEBUG
20575 +       default n
20576 +       help
20577 +         Set this to yes if you want to record the scheduling
20578 +         decisions, so that they can be relayed to userspace
20579 +         for detailed analysis.
20580 +
20581 +config VSERVER_MONITOR_SIZE
20582 +       int "Per-CPU Monitor Queue Size (32-65536)"
20583 +       depends on VSERVER_MONITOR
20584 +       range 32 65536
20585 +       default 1024
20586 +       help
20587 +         This allows you to specify the number of entries in
20588 +         the per-CPU scheduling monitor buffer.
20589 +
20590 +config VSERVER_MONITOR_SYNC
20591 +       int "Per-CPU Monitor Sync Interval (0-65536)"
20592 +       depends on VSERVER_MONITOR
20593 +       range 0 65536
20594 +       default 256
20595 +       help
20596 +         This allows you to specify the interval in ticks
20597 +         when a time sync entry is inserted.
20598 +
20599 +endmenu
20600 +
20601 +
20602 +config VSERVER
20603 +       bool
20604 +       default y
20605 +
20606 +config VSERVER_SECURITY
20607 +       bool
20608 +       depends on SECURITY
20609 +       default y
20610 +       select SECURITY_CAPABILITIES
20611 +
20612 +config VSERVER_LEGACYNET
20613 +       bool
20614 +       depends on !VSERVER_NGNET
20615 +       default y
20616 +
20617 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/Makefile linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/Makefile
20618 --- linux-2.6.16.20/kernel/vserver/Makefile     1970-01-01 01:00:00 +0100
20619 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/Makefile        2006-04-26 19:07:00 +0200
20620 @@ -0,0 +1,17 @@
20621 +#
20622 +# Makefile for the Linux vserver routines.
20623 +#
20624 +
20625 +
20626 +obj-y          += vserver.o
20627 +
20628 +vserver-y      := switch.o context.o namespace.o sched.o network.o inode.o \
20629 +                  limit.o cvirt.o signal.o helper.o init.o dlimit.o
20630 +
20631 +vserver-$(CONFIG_PROC_FS) += proc.o
20632 +vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
20633 +vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o
20634 +vserver-$(CONFIG_VSERVER_LEGACYNET) += legacynet.o
20635 +vserver-$(CONFIG_VSERVER_HISTORY) += history.o
20636 +vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
20637 +
20638 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/context.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/context.c
20639 --- linux-2.6.16.20/kernel/vserver/context.c    1970-01-01 01:00:00 +0100
20640 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/context.c       2006-05-02 04:01:31 +0200
20641 @@ -0,0 +1,1051 @@
20642 +/*
20643 + *  linux/kernel/vserver/context.c
20644 + *
20645 + *  Virtual Server: Context Support
20646 + *
20647 + *  Copyright (C) 2003-2005  Herbert Pötzl
20648 + *
20649 + *  V0.01  context helper
20650 + *  V0.02  vx_ctx_kill syscall command
20651 + *  V0.03  replaced context_info calls
20652 + *  V0.04  redesign of struct (de)alloc
20653 + *  V0.05  rlimit basic implementation
20654 + *  V0.06  task_xid and info commands
20655 + *  V0.07  context flags and caps
20656 + *  V0.08  switch to RCU based hash
20657 + *  V0.09  revert to non RCU for now
20658 + *  V0.10  and back to working RCU hash
20659 + *  V0.11  and back to locking again
20660 + *  V0.12  referenced context store
20661 + *  V0.13  separate per cpu data
20662 + *
20663 + */
20664 +
20665 +#include <linux/slab.h>
20666 +#include <linux/types.h>
20667 +#include <linux/namespace.h>
20668 +
20669 +#include <linux/sched.h>
20670 +#include <linux/vserver/network.h>
20671 +#include <linux/vserver/legacy.h>
20672 +#include <linux/vserver/limit.h>
20673 +#include <linux/vserver/debug.h>
20674 +#include <linux/vserver/limit_int.h>
20675 +
20676 +#include <linux/vs_context.h>
20677 +#include <linux/vs_limit.h>
20678 +#include <linux/vserver/context_cmd.h>
20679 +
20680 +#include <linux/err.h>
20681 +#include <asm/errno.h>
20682 +
20683 +#include "cvirt_init.h"
20684 +#include "limit_init.h"
20685 +#include "sched_init.h"
20686 +
20687 +
20688 +atomic_t vx_global_ctotal      = ATOMIC_INIT(0);
20689 +atomic_t vx_global_cactive     = ATOMIC_INIT(0);
20690 +
20691 +
20692 +/*     now inactive context structures */
20693 +
20694 +static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
20695 +
20696 +static spinlock_t vx_info_inactive_lock = SPIN_LOCK_UNLOCKED;
20697 +
20698 +
20699 +/*     __alloc_vx_info()
20700 +
20701 +       * allocate an initialized vx_info struct
20702 +       * doesn't make it visible (hash)                        */
20703 +
20704 +static struct vx_info *__alloc_vx_info(xid_t xid)
20705 +{
20706 +       struct vx_info *new = NULL;
20707 +       int cpu;
20708 +
20709 +       vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
20710 +
20711 +       /* would this benefit from a slab cache? */
20712 +       new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
20713 +       if (!new)
20714 +               return 0;
20715 +
20716 +       memset (new, 0, sizeof(struct vx_info));
20717 +#ifdef CONFIG_SMP
20718 +       new->ptr_pc = alloc_percpu(struct _vx_info_pc);
20719 +       if (!new->ptr_pc)
20720 +               goto error;
20721 +#endif
20722 +       new->vx_id = xid;
20723 +       INIT_HLIST_NODE(&new->vx_hlist);
20724 +       atomic_set(&new->vx_usecnt, 0);
20725 +       atomic_set(&new->vx_tasks, 0);
20726 +       new->vx_parent = NULL;
20727 +       new->vx_state = 0;
20728 +       init_waitqueue_head(&new->vx_wait);
20729 +
20730 +       /* prepare reaper */
20731 +       get_task_struct(child_reaper);
20732 +       new->vx_reaper = child_reaper;
20733 +
20734 +       /* rest of init goes here */
20735 +       vx_info_init_limit(&new->limit);
20736 +       vx_info_init_sched(&new->sched);
20737 +       vx_info_init_cvirt(&new->cvirt);
20738 +       vx_info_init_cacct(&new->cacct);
20739 +
20740 +       /* per cpu data structures */
20741 +       for_each_cpu(cpu) {
20742 +               vx_info_init_sched_pc(
20743 +                       &vx_per_cpu(new, sched_pc, cpu), cpu);
20744 +               vx_info_init_cvirt_pc(
20745 +                       &vx_per_cpu(new, cvirt_pc, cpu), cpu);
20746 +       }
20747 +
20748 +       new->vx_flags = VXF_INIT_SET;
20749 +       new->vx_bcaps = CAP_INIT_EFF_SET;
20750 +       new->vx_ccaps = 0;
20751 +       new->vx_cap_bset = cap_bset;
20752 +
20753 +       new->reboot_cmd = 0;
20754 +       new->exit_code = 0;
20755 +
20756 +       vxdprintk(VXD_CBIT(xid, 0),
20757 +               "alloc_vx_info(%d) = %p", xid, new);
20758 +       vxh_alloc_vx_info(new);
20759 +       atomic_inc(&vx_global_ctotal);
20760 +       return new;
20761 +#ifdef CONFIG_SMP
20762 +error:
20763 +       kfree(new);
20764 +       return 0;
20765 +#endif
20766 +}
20767 +
20768 +/*     __dealloc_vx_info()
20769 +
20770 +       * final disposal of vx_info                             */
20771 +
20772 +static void __dealloc_vx_info(struct vx_info *vxi)
20773 +{
20774 +       int cpu;
20775 +
20776 +       vxdprintk(VXD_CBIT(xid, 0),
20777 +               "dealloc_vx_info(%p)", vxi);
20778 +       vxh_dealloc_vx_info(vxi);
20779 +
20780 +       vxi->vx_id = -1;
20781 +
20782 +       vx_info_exit_limit(&vxi->limit);
20783 +       vx_info_exit_sched(&vxi->sched);
20784 +       vx_info_exit_cvirt(&vxi->cvirt);
20785 +       vx_info_exit_cacct(&vxi->cacct);
20786 +
20787 +       for_each_cpu(cpu) {
20788 +               vx_info_exit_sched_pc(
20789 +                       &vx_per_cpu(vxi, sched_pc, cpu), cpu);
20790 +               vx_info_exit_cvirt_pc(
20791 +                       &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
20792 +       }
20793 +
20794 +       vxi->vx_state |= VXS_RELEASED;
20795 +
20796 +#ifdef CONFIG_SMP
20797 +       free_percpu(vxi->ptr_pc);
20798 +#endif
20799 +       kfree(vxi);
20800 +       atomic_dec(&vx_global_ctotal);
20801 +}
20802 +
20803 +static void __shutdown_vx_info(struct vx_info *vxi)
20804 +{
20805 +       struct namespace *namespace;
20806 +       struct fs_struct *fs;
20807 +
20808 +       might_sleep();
20809 +
20810 +       vxi->vx_state |= VXS_SHUTDOWN;
20811 +       vs_state_change(vxi, VSC_SHUTDOWN);
20812 +
20813 +       namespace = xchg(&vxi->vx_namespace, NULL);
20814 +       if (namespace)
20815 +               put_namespace(namespace);
20816 +
20817 +       fs = xchg(&vxi->vx_fs, NULL);
20818 +       if (fs)
20819 +               put_fs_struct(fs);
20820 +}
20821 +
20822 +/* exported stuff */
20823 +
20824 +void free_vx_info(struct vx_info *vxi)
20825 +{
20826 +       /* context shutdown is mandatory */
20827 +       BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
20828 +
20829 +       BUG_ON(atomic_read(&vxi->vx_usecnt));
20830 +       BUG_ON(atomic_read(&vxi->vx_tasks));
20831 +
20832 +       BUG_ON(vx_info_state(vxi, VXS_HASHED));
20833 +
20834 +       BUG_ON(vxi->vx_namespace);
20835 +       BUG_ON(vxi->vx_fs);
20836 +
20837 +       spin_lock(&vx_info_inactive_lock);
20838 +       hlist_del(&vxi->vx_hlist);
20839 +       spin_unlock(&vx_info_inactive_lock);
20840 +
20841 +       __dealloc_vx_info(vxi);
20842 +}
20843 +
20844 +
20845 +/*     hash table for vx_info hash */
20846 +
20847 +#define VX_HASH_SIZE   13
20848 +
20849 +static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
20850 +       { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
20851 +
20852 +static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
20853 +
20854 +
20855 +static inline unsigned int __hashval(xid_t xid)
20856 +{
20857 +       return (xid % VX_HASH_SIZE);
20858 +}
20859 +
20860 +
20861 +
20862 +/*     __hash_vx_info()
20863 +
20864 +       * add the vxi to the global hash table
20865 +       * requires the hash_lock to be held                     */
20866 +
20867 +static inline void __hash_vx_info(struct vx_info *vxi)
20868 +{
20869 +       struct hlist_head *head;
20870 +
20871 +       vxd_assert_lock(&vx_info_hash_lock);
20872 +       vxdprintk(VXD_CBIT(xid, 4),
20873 +               "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
20874 +       vxh_hash_vx_info(vxi);
20875 +
20876 +       /* context must not be hashed */
20877 +       BUG_ON(vx_info_state(vxi, VXS_HASHED));
20878 +
20879 +       vxi->vx_state |= VXS_HASHED;
20880 +       head = &vx_info_hash[__hashval(vxi->vx_id)];
20881 +       hlist_add_head(&vxi->vx_hlist, head);
20882 +       atomic_inc(&vx_global_cactive);
20883 +}
20884 +
20885 +/*     __unhash_vx_info()
20886 +
20887 +       * remove the vxi from the global hash table
20888 +       * requires the hash_lock to be held                     */
20889 +
20890 +static inline void __unhash_vx_info(struct vx_info *vxi)
20891 +{
20892 +       vxd_assert_lock(&vx_info_hash_lock);
20893 +       vxdprintk(VXD_CBIT(xid, 4),
20894 +               "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id);
20895 +       vxh_unhash_vx_info(vxi);
20896 +
20897 +       /* context must be hashed */
20898 +       BUG_ON(!vx_info_state(vxi, VXS_HASHED));
20899 +
20900 +       vxi->vx_state &= ~VXS_HASHED;
20901 +       hlist_del_init(&vxi->vx_hlist);
20902 +       spin_lock(&vx_info_inactive_lock);
20903 +       hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
20904 +       spin_unlock(&vx_info_inactive_lock);
20905 +       atomic_dec(&vx_global_cactive);
20906 +}
20907 +
20908 +
20909 +/*     __lookup_vx_info()
20910 +
20911 +       * requires the hash_lock to be held
20912 +       * doesn't increment the vx_refcnt                       */
20913 +
20914 +static inline struct vx_info *__lookup_vx_info(xid_t xid)
20915 +{
20916 +       struct hlist_head *head = &vx_info_hash[__hashval(xid)];
20917 +       struct hlist_node *pos;
20918 +       struct vx_info *vxi;
20919 +
20920 +       vxd_assert_lock(&vx_info_hash_lock);
20921 +       hlist_for_each(pos, head) {
20922 +               vxi = hlist_entry(pos, struct vx_info, vx_hlist);
20923 +
20924 +               if (vxi->vx_id == xid)
20925 +                       goto found;
20926 +       }
20927 +       vxi = NULL;
20928 +found:
20929 +       vxdprintk(VXD_CBIT(xid, 0),
20930 +               "__lookup_vx_info(#%u): %p[#%u]",
20931 +               xid, vxi, vxi?vxi->vx_id:0);
20932 +       vxh_lookup_vx_info(vxi, xid);
20933 +       return vxi;
20934 +}
20935 +
20936 +
20937 +/*     __vx_dynamic_id()
20938 +
20939 +       * find unused dynamic xid
20940 +       * requires the hash_lock to be held                     */
20941 +
20942 +static inline xid_t __vx_dynamic_id(void)
20943 +{
20944 +       static xid_t seq = MAX_S_CONTEXT;
20945 +       xid_t barrier = seq;
20946 +
20947 +       vxd_assert_lock(&vx_info_hash_lock);
20948 +       do {
20949 +               if (++seq > MAX_S_CONTEXT)
20950 +                       seq = MIN_D_CONTEXT;
20951 +               if (!__lookup_vx_info(seq)) {
20952 +                       vxdprintk(VXD_CBIT(xid, 4),
20953 +                               "__vx_dynamic_id: [#%d]", seq);
20954 +                       return seq;
20955 +               }
20956 +       } while (barrier != seq);
20957 +       return 0;
20958 +}
20959 +
20960 +#ifdef CONFIG_VSERVER_LEGACY
20961 +
20962 +/*     __loc_vx_info()
20963 +
20964 +       * locate or create the requested context
20965 +       * get() it and if new hash it                           */
20966 +
20967 +static struct vx_info * __loc_vx_info(int id, int *err)
20968 +{
20969 +       struct vx_info *new, *vxi = NULL;
20970 +
20971 +       vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id);
20972 +
20973 +       if (!(new = __alloc_vx_info(id))) {
20974 +               *err = -ENOMEM;
20975 +               return NULL;
20976 +       }
20977 +
20978 +       /* required to make dynamic xids unique */
20979 +       spin_lock(&vx_info_hash_lock);
20980 +
20981 +       /* dynamic context requested */
20982 +       if (id == VX_DYNAMIC_ID) {
20983 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
20984 +               id = __vx_dynamic_id();
20985 +               if (!id) {
20986 +                       printk(KERN_ERR "no dynamic context available.\n");
20987 +                       goto out_unlock;
20988 +               }
20989 +               new->vx_id = id;
20990 +#else
20991 +               printk(KERN_ERR "dynamic contexts disabled.\n");
20992 +               goto out_unlock;
20993 +#endif
20994 +       }
20995 +       /* existing context requested */
20996 +       else if ((vxi = __lookup_vx_info(id))) {
20997 +               /* context in setup is not available */
20998 +               if (vxi->vx_flags & VXF_STATE_SETUP) {
20999 +                       vxdprintk(VXD_CBIT(xid, 0),
21000 +                               "loc_vx_info(%d) = %p (not available)", id, vxi);
21001 +                       vxi = NULL;
21002 +                       *err = -EBUSY;
21003 +               } else {
21004 +                       vxdprintk(VXD_CBIT(xid, 0),
21005 +                               "loc_vx_info(%d) = %p (found)", id, vxi);
21006 +                       get_vx_info(vxi);
21007 +                       *err = 0;
21008 +               }
21009 +               goto out_unlock;
21010 +       }
21011 +
21012 +       /* new context requested */
21013 +       vxdprintk(VXD_CBIT(xid, 0),
21014 +               "loc_vx_info(%d) = %p (new)", id, new);
21015 +       __hash_vx_info(get_vx_info(new));
21016 +       vxi = new, new = NULL;
21017 +       *err = 1;
21018 +
21019 +out_unlock:
21020 +       spin_unlock(&vx_info_hash_lock);
21021 +       vxh_loc_vx_info(vxi, id);
21022 +       if (new)
21023 +               __dealloc_vx_info(new);
21024 +       return vxi;
21025 +}
21026 +
21027 +#endif
21028 +
21029 +/*     __create_vx_info()
21030 +
21031 +       * create the requested context
21032 +       * get() and hash it                                     */
21033 +
21034 +static struct vx_info * __create_vx_info(int id)
21035 +{
21036 +       struct vx_info *new, *vxi = NULL;
21037 +
21038 +       vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
21039 +
21040 +       if (!(new = __alloc_vx_info(id)))
21041 +               return ERR_PTR(-ENOMEM);
21042 +
21043 +       /* required to make dynamic xids unique */
21044 +       spin_lock(&vx_info_hash_lock);
21045 +
21046 +       /* dynamic context requested */
21047 +       if (id == VX_DYNAMIC_ID) {
21048 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
21049 +               id = __vx_dynamic_id();
21050 +               if (!id) {
21051 +                       printk(KERN_ERR "no dynamic context available.\n");
21052 +                       vxi = ERR_PTR(-EAGAIN);
21053 +                       goto out_unlock;
21054 +               }
21055 +               new->vx_id = id;
21056 +#else
21057 +               printk(KERN_ERR "dynamic contexts disabled.\n");
21058 +               vxi = ERR_PTR(-EINVAL);
21059 +               goto out_unlock;
21060 +#endif
21061 +       }
21062 +       /* static context requested */
21063 +       else if ((vxi = __lookup_vx_info(id))) {
21064 +               vxdprintk(VXD_CBIT(xid, 0),
21065 +                       "create_vx_info(%d) = %p (already there)", id, vxi);
21066 +               if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
21067 +                       vxi = ERR_PTR(-EBUSY);
21068 +               else
21069 +                       vxi = ERR_PTR(-EEXIST);
21070 +               goto out_unlock;
21071 +       }
21072 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
21073 +       /* dynamic xid creation blocker */
21074 +       else if (id >= MIN_D_CONTEXT) {
21075 +               vxdprintk(VXD_CBIT(xid, 0),
21076 +                       "create_vx_info(%d) (dynamic rejected)", id);
21077 +               vxi = ERR_PTR(-EINVAL);
21078 +               goto out_unlock;
21079 +       }
21080 +#endif
21081 +
21082 +       /* new context */
21083 +       vxdprintk(VXD_CBIT(xid, 0),
21084 +               "create_vx_info(%d) = %p (new)", id, new);
21085 +       __hash_vx_info(get_vx_info(new));
21086 +       vxi = new, new = NULL;
21087 +
21088 +out_unlock:
21089 +       spin_unlock(&vx_info_hash_lock);
21090 +       vxh_create_vx_info(IS_ERR(vxi)?NULL:vxi, id);
21091 +       if (new)
21092 +               __dealloc_vx_info(new);
21093 +       return vxi;
21094 +}
21095 +
21096 +
21097 +/*     exported stuff                                          */
21098 +
21099 +
21100 +void unhash_vx_info(struct vx_info *vxi)
21101 +{
21102 +       __shutdown_vx_info(vxi);
21103 +       spin_lock(&vx_info_hash_lock);
21104 +       __unhash_vx_info(vxi);
21105 +       spin_unlock(&vx_info_hash_lock);
21106 +       __wakeup_vx_info(vxi);
21107 +}
21108 +
21109 +
21110 +/*     lookup_vx_info()
21111 +
21112 +       * search for a vx_info and get() it
21113 +       * negative id means current                             */
21114 +
21115 +struct vx_info *lookup_vx_info(int id)
21116 +{
21117 +       struct vx_info *vxi = NULL;
21118 +
21119 +       if (id < 0) {
21120 +               vxi = get_vx_info(current->vx_info);
21121 +       } else if (id > 1) {
21122 +               spin_lock(&vx_info_hash_lock);
21123 +               vxi = get_vx_info(__lookup_vx_info(id));
21124 +               spin_unlock(&vx_info_hash_lock);
21125 +       }
21126 +       return vxi;
21127 +}
21128 +
21129 +/*     xid_is_hashed()
21130 +
21131 +       * verify that xid is still hashed                       */
21132 +
21133 +int xid_is_hashed(xid_t xid)
21134 +{
21135 +       int hashed;
21136 +
21137 +       spin_lock(&vx_info_hash_lock);
21138 +       hashed = (__lookup_vx_info(xid) != NULL);
21139 +       spin_unlock(&vx_info_hash_lock);
21140 +       return hashed;
21141 +}
21142 +
21143 +#ifdef CONFIG_VSERVER_LEGACY
21144 +
21145 +struct vx_info *lookup_or_create_vx_info(int id)
21146 +{
21147 +       int err;
21148 +
21149 +       return __loc_vx_info(id, &err);
21150 +}
21151 +
21152 +#endif
21153 +
21154 +#ifdef CONFIG_PROC_FS
21155 +
21156 +/*     get_xid_list()
21157 +
21158 +       * get a subset of hashed xids for proc
21159 +       * assumes size is at least one                          */
21160 +
21161 +int get_xid_list(int index, unsigned int *xids, int size)
21162 +{
21163 +       int hindex, nr_xids = 0;
21164 +
21165 +       /* only show current and children */
21166 +       if (!vx_check(0, VX_ADMIN|VX_WATCH)) {
21167 +               if (index > 0)
21168 +                       return 0;
21169 +               xids[nr_xids] = vx_current_xid();
21170 +               return 1;
21171 +       }
21172 +
21173 +       for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
21174 +               struct hlist_head *head = &vx_info_hash[hindex];
21175 +               struct hlist_node *pos;
21176 +
21177 +               spin_lock(&vx_info_hash_lock);
21178 +               hlist_for_each(pos, head) {
21179 +                       struct vx_info *vxi;
21180 +
21181 +                       if (--index > 0)
21182 +                               continue;
21183 +
21184 +                       vxi = hlist_entry(pos, struct vx_info, vx_hlist);
21185 +                       xids[nr_xids] = vxi->vx_id;
21186 +                       if (++nr_xids >= size) {
21187 +                               spin_unlock(&vx_info_hash_lock);
21188 +                               goto out;
21189 +                       }
21190 +               }
21191 +               /* keep the lock time short */
21192 +               spin_unlock(&vx_info_hash_lock);
21193 +       }
21194 +out:
21195 +       return nr_xids;
21196 +}
21197 +#endif
21198 +
21199 +#ifdef CONFIG_VSERVER_DEBUG
21200 +
21201 +void   dump_vx_info_inactive(int level)
21202 +{
21203 +       struct hlist_node *entry, *next;
21204 +
21205 +       hlist_for_each_safe(entry, next, &vx_info_inactive) {
21206 +               struct vx_info *vxi =
21207 +                       list_entry(entry, struct vx_info, vx_hlist);
21208 +
21209 +               dump_vx_info(vxi, level);
21210 +       }
21211 +}
21212 +
21213 +#endif
21214 +
21215 +int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
21216 +{
21217 +       struct user_struct *new_user, *old_user;
21218 +
21219 +       if (!p || !vxi)
21220 +               BUG();
21221 +       new_user = alloc_uid(vxi->vx_id, p->uid);
21222 +       if (!new_user)
21223 +               return -ENOMEM;
21224 +
21225 +       old_user = p->user;
21226 +       if (new_user != old_user) {
21227 +               atomic_inc(&new_user->processes);
21228 +               atomic_dec(&old_user->processes);
21229 +               p->user = new_user;
21230 +       }
21231 +       free_uid(old_user);
21232 +       return 0;
21233 +}
21234 +
21235 +void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
21236 +{
21237 +       p->cap_effective &= vxi->vx_cap_bset;
21238 +       p->cap_inheritable &= vxi->vx_cap_bset;
21239 +       p->cap_permitted &= vxi->vx_cap_bset;
21240 +}
21241 +
21242 +
21243 +#include <linux/file.h>
21244 +
21245 +static int vx_openfd_task(struct task_struct *tsk)
21246 +{
21247 +       struct files_struct *files = tsk->files;
21248 +       struct fdtable *fdt;
21249 +       const unsigned long *bptr;
21250 +       int count, total;
21251 +
21252 +       /* no rcu_read_lock() because of spin_lock() */
21253 +       spin_lock(&files->file_lock);
21254 +       fdt = files_fdtable(files);
21255 +       bptr = fdt->open_fds->fds_bits;
21256 +       count = fdt->max_fds / (sizeof(unsigned long) * 8);
21257 +       for (total = 0; count > 0; count--) {
21258 +               if (*bptr)
21259 +                       total += hweight_long(*bptr);
21260 +               bptr++;
21261 +       }
21262 +       spin_unlock(&files->file_lock);
21263 +       return total;
21264 +}
21265 +
21266 +/*
21267 + *     migrate task to new context
21268 + *     gets vxi, puts old_vxi on change
21269 + */
21270 +
21271 +int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
21272 +{
21273 +       struct vx_info *old_vxi;
21274 +       int ret = 0;
21275 +
21276 +       if (!p || !vxi)
21277 +               BUG();
21278 +
21279 +       old_vxi = task_get_vx_info(p);
21280 +       if (old_vxi == vxi)
21281 +               goto out;
21282 +
21283 +       vxdprintk(VXD_CBIT(xid, 5),
21284 +               "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
21285 +               vxi->vx_id, atomic_read(&vxi->vx_usecnt));
21286 +
21287 +       if (!(ret = vx_migrate_user(p, vxi))) {
21288 +               int openfd;
21289 +
21290 +               task_lock(p);
21291 +               openfd = vx_openfd_task(p);
21292 +
21293 +               if (old_vxi) {
21294 +                       atomic_dec(&old_vxi->cvirt.nr_threads);
21295 +                       atomic_dec(&old_vxi->cvirt.nr_running);
21296 +                       __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
21297 +                       /* FIXME: what about the struct files here? */
21298 +                       __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
21299 +                       /* account for the executable */
21300 +                       __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
21301 +               }
21302 +               atomic_inc(&vxi->cvirt.nr_threads);
21303 +               atomic_inc(&vxi->cvirt.nr_running);
21304 +               __rlim_inc(&vxi->limit, RLIMIT_NPROC);
21305 +               /* FIXME: what about the struct files here? */
21306 +               __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
21307 +               /* account for the executable */
21308 +               __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
21309 +
21310 +               if (old_vxi) {
21311 +                       release_vx_info(old_vxi, p);
21312 +                       clr_vx_info(&p->vx_info);
21313 +               }
21314 +               claim_vx_info(vxi, p);
21315 +               set_vx_info(&p->vx_info, vxi);
21316 +               p->xid = vxi->vx_id;
21317 +
21318 +               vxdprintk(VXD_CBIT(xid, 5),
21319 +                       "moved task %p into vxi:%p[#%d]",
21320 +                       p, vxi, vxi->vx_id);
21321 +
21322 +               vx_mask_cap_bset(vxi, p);
21323 +               task_unlock(p);
21324 +       }
21325 +out:
21326 +       put_vx_info(old_vxi);
21327 +       return ret;
21328 +}
21329 +
21330 +int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
21331 +{
21332 +       struct task_struct *old_reaper;
21333 +
21334 +       if (!vxi)
21335 +               return -EINVAL;
21336 +
21337 +       vxdprintk(VXD_CBIT(xid, 6),
21338 +               "vx_set_reaper(%p[#%d],%p[#%d,%d])",
21339 +               vxi, vxi->vx_id, p, p->xid, p->pid);
21340 +
21341 +       old_reaper = vxi->vx_reaper;
21342 +       if (old_reaper == p)
21343 +               return 0;
21344 +
21345 +       /* set new child reaper */
21346 +       get_task_struct(p);
21347 +       vxi->vx_reaper = p;
21348 +       put_task_struct(old_reaper);
21349 +       return 0;
21350 +}
21351 +
21352 +int vx_set_init(struct vx_info *vxi, struct task_struct *p)
21353 +{
21354 +       if (!vxi)
21355 +               return -EINVAL;
21356 +
21357 +       vxdprintk(VXD_CBIT(xid, 6),
21358 +               "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
21359 +               vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
21360 +
21361 +       vxi->vx_initpid = p->tgid;
21362 +       return 0;
21363 +}
21364 +
21365 +void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
21366 +{
21367 +       vxdprintk(VXD_CBIT(xid, 6),
21368 +               "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
21369 +               vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
21370 +
21371 +       vxi->exit_code = code;
21372 +       vxi->vx_initpid = 0;
21373 +}
21374 +
21375 +void vx_set_persistent(struct vx_info *vxi)
21376 +{
21377 +       vxdprintk(VXD_CBIT(xid, 6),
21378 +               "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
21379 +
21380 +       if (vx_info_flags(vxi, VXF_PERSISTENT, 0)) {
21381 +               get_vx_info(vxi);
21382 +               claim_vx_info(vxi, current);
21383 +       } else {
21384 +               release_vx_info(vxi, current);
21385 +               put_vx_info(vxi);
21386 +       }
21387 +}
21388 +
21389 +
21390 +/*     task must be current or locked          */
21391 +
21392 +void   exit_vx_info(struct task_struct *p, int code)
21393 +{
21394 +       struct vx_info *vxi = p->vx_info;
21395 +
21396 +       if (vxi) {
21397 +               atomic_dec(&vxi->cvirt.nr_threads);
21398 +               vx_nproc_dec(p);
21399 +
21400 +               vxi->exit_code = code;
21401 +               if (vxi->vx_initpid == p->tgid)
21402 +                       vx_exit_init(vxi, p, code);
21403 +               if (vxi->vx_reaper == p)
21404 +                       vx_set_reaper(vxi, child_reaper);
21405 +               release_vx_info(vxi, p);
21406 +       }
21407 +}
21408 +
21409 +
21410 +/* vserver syscall commands below here */
21411 +
21412 +/* taks xid and vx_info functions */
21413 +
21414 +#include <asm/uaccess.h>
21415 +
21416 +
21417 +int vc_task_xid(uint32_t id, void __user *data)
21418 +{
21419 +       xid_t xid;
21420 +
21421 +       if (id) {
21422 +               struct task_struct *tsk;
21423 +
21424 +               if (!vx_check(0, VX_ADMIN|VX_WATCH))
21425 +                       return -EPERM;
21426 +
21427 +               read_lock(&tasklist_lock);
21428 +               tsk = find_task_by_real_pid(id);
21429 +               xid = (tsk) ? tsk->xid : -ESRCH;
21430 +               read_unlock(&tasklist_lock);
21431 +       }
21432 +       else
21433 +               xid = vx_current_xid();
21434 +       return xid;
21435 +}
21436 +
21437 +
21438 +int vc_vx_info(uint32_t id, void __user *data)
21439 +{
21440 +       struct vx_info *vxi;
21441 +       struct vcmd_vx_info_v0 vc_data;
21442 +
21443 +       if (!capable(CAP_SYS_RESOURCE))
21444 +               return -EPERM;
21445 +
21446 +       vxi = lookup_vx_info(id);
21447 +       if (!vxi)
21448 +               return -ESRCH;
21449 +
21450 +       vc_data.xid = vxi->vx_id;
21451 +       vc_data.initpid = vxi->vx_initpid;
21452 +       put_vx_info(vxi);
21453 +
21454 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21455 +               return -EFAULT;
21456 +       return 0;
21457 +}
21458 +
21459 +
21460 +/* context functions */
21461 +
21462 +int vc_ctx_create(uint32_t xid, void __user *data)
21463 +{
21464 +       struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
21465 +       struct vx_info *new_vxi;
21466 +       int ret;
21467 +
21468 +       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
21469 +               return -EFAULT;
21470 +
21471 +       if ((xid > MAX_S_CONTEXT) && (xid != VX_DYNAMIC_ID))
21472 +               return -EINVAL;
21473 +       if (xid < 2)
21474 +               return -EINVAL;
21475 +
21476 +       new_vxi = __create_vx_info(xid);
21477 +       if (IS_ERR(new_vxi))
21478 +               return PTR_ERR(new_vxi);
21479 +
21480 +       /* initial flags */
21481 +       new_vxi->vx_flags = vc_data.flagword;
21482 +
21483 +       /* get a reference for persistent contexts */
21484 +       if ((vc_data.flagword & VXF_PERSISTENT))
21485 +               vx_set_persistent(new_vxi);
21486 +
21487 +       vs_state_change(new_vxi, VSC_STARTUP);
21488 +       ret = new_vxi->vx_id;
21489 +       vx_migrate_task(current, new_vxi);
21490 +       /* if this fails, we might end up with a hashed vx_info */
21491 +       put_vx_info(new_vxi);
21492 +       return ret;
21493 +}
21494 +
21495 +
21496 +int vc_ctx_migrate(uint32_t id, void __user *data)
21497 +{
21498 +       struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
21499 +       struct vx_info *vxi;
21500 +
21501 +       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
21502 +               return -EFAULT;
21503 +
21504 +       /* dirty hack until Spectator becomes a cap */
21505 +       if (id == 1) {
21506 +               current->xid = 1;
21507 +               return 0;
21508 +       }
21509 +
21510 +       vxi = lookup_vx_info(id);
21511 +       if (!vxi)
21512 +               return -ESRCH;
21513 +       vx_migrate_task(current, vxi);
21514 +       if (vc_data.flagword & VXM_SET_INIT)
21515 +               vx_set_init(vxi, current);
21516 +       if (vc_data.flagword & VXM_SET_REAPER)
21517 +               vx_set_reaper(vxi, current);
21518 +       put_vx_info(vxi);
21519 +       return 0;
21520 +}
21521 +
21522 +
21523 +int vc_get_cflags(uint32_t id, void __user *data)
21524 +{
21525 +       struct vx_info *vxi;
21526 +       struct vcmd_ctx_flags_v0 vc_data;
21527 +
21528 +       vxi = lookup_vx_info(id);
21529 +       if (!vxi)
21530 +               return -ESRCH;
21531 +
21532 +       vc_data.flagword = vxi->vx_flags;
21533 +
21534 +       /* special STATE flag handling */
21535 +       vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
21536 +
21537 +       put_vx_info(vxi);
21538 +
21539 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21540 +               return -EFAULT;
21541 +       return 0;
21542 +}
21543 +
21544 +int vc_set_cflags(uint32_t id, void __user *data)
21545 +{
21546 +       struct vx_info *vxi;
21547 +       struct vcmd_ctx_flags_v0 vc_data;
21548 +       uint64_t mask, trigger;
21549 +
21550 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21551 +               return -EFAULT;
21552 +
21553 +       vxi = lookup_vx_info(id);
21554 +       if (!vxi)
21555 +               return -ESRCH;
21556 +
21557 +       /* special STATE flag handling */
21558 +       mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
21559 +       trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
21560 +
21561 +       if (vxi == current->vx_info) {
21562 +               if (trigger & VXF_STATE_SETUP)
21563 +                       vx_mask_cap_bset(vxi, current);
21564 +               if (trigger & VXF_STATE_INIT) {
21565 +                       vx_set_init(vxi, current);
21566 +                       vx_set_reaper(vxi, current);
21567 +               }
21568 +       }
21569 +
21570 +       vxi->vx_flags = vx_mask_flags(vxi->vx_flags,
21571 +               vc_data.flagword, mask);
21572 +       if (trigger & VXF_PERSISTENT)
21573 +               vx_set_persistent(vxi);
21574 +
21575 +       put_vx_info(vxi);
21576 +       return 0;
21577 +}
21578 +
21579 +static int do_get_caps(xid_t xid, uint64_t *bcaps, uint64_t *ccaps)
21580 +{
21581 +       struct vx_info *vxi;
21582 +
21583 +       vxi = lookup_vx_info(xid);
21584 +       if (!vxi)
21585 +               return -ESRCH;
21586 +
21587 +       if (bcaps)
21588 +               *bcaps = vxi->vx_bcaps;
21589 +       if (ccaps)
21590 +               *ccaps = vxi->vx_ccaps;
21591 +
21592 +       put_vx_info(vxi);
21593 +       return 0;
21594 +}
21595 +
21596 +int vc_get_ccaps_v0(uint32_t id, void __user *data)
21597 +{
21598 +       struct vcmd_ctx_caps_v0 vc_data;
21599 +       int ret;
21600 +
21601 +       ret = do_get_caps(id, &vc_data.bcaps, &vc_data.ccaps);
21602 +       if (ret)
21603 +               return ret;
21604 +       vc_data.cmask = ~0UL;
21605 +
21606 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21607 +               return -EFAULT;
21608 +       return 0;
21609 +}
21610 +
21611 +int vc_get_ccaps(uint32_t id, void __user *data)
21612 +{
21613 +       struct vcmd_ctx_caps_v1 vc_data;
21614 +       int ret;
21615 +
21616 +       ret = do_get_caps(id, NULL, &vc_data.ccaps);
21617 +       if (ret)
21618 +               return ret;
21619 +       vc_data.cmask = ~0UL;
21620 +
21621 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21622 +               return -EFAULT;
21623 +       return 0;
21624 +}
21625 +
21626 +static int do_set_caps(xid_t xid, uint64_t bcaps, uint64_t bmask,
21627 +       uint64_t ccaps, uint64_t cmask)
21628 +{
21629 +       struct vx_info *vxi;
21630 +
21631 +       vxi = lookup_vx_info(xid);
21632 +       if (!vxi)
21633 +               return -ESRCH;
21634 +
21635 +       vxi->vx_bcaps = vx_mask_flags(vxi->vx_bcaps, bcaps, bmask);
21636 +       vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps, ccaps, cmask);
21637 +
21638 +       put_vx_info(vxi);
21639 +       return 0;
21640 +}
21641 +
21642 +int vc_set_ccaps_v0(uint32_t id, void __user *data)
21643 +{
21644 +       struct vcmd_ctx_caps_v0 vc_data;
21645 +
21646 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21647 +               return -EFAULT;
21648 +
21649 +       /* simulate old &= behaviour for bcaps */
21650 +       return do_set_caps(id, 0, ~vc_data.bcaps,
21651 +               vc_data.ccaps, vc_data.cmask);
21652 +}
21653 +
21654 +int vc_set_ccaps(uint32_t id, void __user *data)
21655 +{
21656 +       struct vcmd_ctx_caps_v1 vc_data;
21657 +
21658 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21659 +               return -EFAULT;
21660 +
21661 +       return do_set_caps(id, 0, 0, vc_data.ccaps, vc_data.cmask);
21662 +}
21663 +
21664 +int vc_get_bcaps(uint32_t id, void __user *data)
21665 +{
21666 +       struct vcmd_bcaps vc_data;
21667 +       int ret;
21668 +
21669 +       ret = do_get_caps(id, &vc_data.bcaps, NULL);
21670 +       if (ret)
21671 +               return ret;
21672 +       vc_data.bmask = ~0UL;
21673 +
21674 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21675 +               return -EFAULT;
21676 +       return 0;
21677 +}
21678 +
21679 +int vc_set_bcaps(uint32_t id, void __user *data)
21680 +{
21681 +       struct vcmd_bcaps vc_data;
21682 +
21683 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21684 +               return -EFAULT;
21685 +
21686 +       return do_set_caps(id, vc_data.bcaps, vc_data.bmask, 0, 0);
21687 +}
21688 +
21689 +#include <linux/module.h>
21690 +
21691 +EXPORT_SYMBOL_GPL(free_vx_info);
21692 +
21693 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/cvirt.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt.c
21694 --- linux-2.6.16.20/kernel/vserver/cvirt.c      1970-01-01 01:00:00 +0100
21695 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt.c 2006-05-31 01:29:12 +0200
21696 @@ -0,0 +1,297 @@
21697 +/*
21698 + *  linux/kernel/vserver/cvirt.c
21699 + *
21700 + *  Virtual Server: Context Virtualization
21701 + *
21702 + *  Copyright (C) 2004-2005  Herbert Pötzl
21703 + *
21704 + *  V0.01  broken out from limit.c
21705 + *  V0.02  added utsname stuff
21706 + *
21707 + */
21708 +
21709 +#include <linux/sched.h>
21710 +#include <linux/sysctl.h>
21711 +#include <linux/types.h>
21712 +#include <linux/vs_context.h>
21713 +#include <linux/vs_cvirt.h>
21714 +#include <linux/vserver/switch.h>
21715 +#include <linux/vserver/cvirt_cmd.h>
21716 +
21717 +#include <asm/errno.h>
21718 +#include <asm/uaccess.h>
21719 +
21720 +
21721 +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
21722 +{
21723 +       struct vx_info *vxi = current->vx_info;
21724 +
21725 +       set_normalized_timespec(uptime,
21726 +               uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
21727 +               uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
21728 +       if (!idle)
21729 +               return;
21730 +       set_normalized_timespec(idle,
21731 +               idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
21732 +               idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
21733 +       return;
21734 +}
21735 +
21736 +uint64_t vx_idle_jiffies(void)
21737 +{
21738 +       return init_task.utime + init_task.stime;
21739 +}
21740 +
21741 +
21742 +
21743 +static inline uint32_t __update_loadavg(uint32_t load,
21744 +       int wsize, int delta, int n)
21745 +{
21746 +       unsigned long long calc, prev;
21747 +
21748 +       /* just set it to n */
21749 +       if (unlikely(delta >= wsize))
21750 +               return (n << FSHIFT);
21751 +
21752 +       calc = delta * n;
21753 +       calc <<= FSHIFT;
21754 +       prev = (wsize - delta);
21755 +       prev *= load;
21756 +       calc += prev;
21757 +       do_div(calc, wsize);
21758 +       return calc;
21759 +}
21760 +
21761 +
21762 +void vx_update_load(struct vx_info *vxi)
21763 +{
21764 +       uint32_t now, last, delta;
21765 +       unsigned int nr_running, nr_uninterruptible;
21766 +       unsigned int total;
21767 +
21768 +       spin_lock(&vxi->cvirt.load_lock);
21769 +
21770 +       now = jiffies;
21771 +       last = vxi->cvirt.load_last;
21772 +       delta = now - last;
21773 +
21774 +       if (delta < 5*HZ)
21775 +               goto out;
21776 +
21777 +       nr_running = atomic_read(&vxi->cvirt.nr_running);
21778 +       nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
21779 +       total = nr_running + nr_uninterruptible;
21780 +
21781 +       vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
21782 +               60*HZ, delta, total);
21783 +       vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
21784 +               5*60*HZ, delta, total);
21785 +       vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
21786 +               15*60*HZ, delta, total);
21787 +
21788 +       vxi->cvirt.load_last = now;
21789 +out:
21790 +       atomic_inc(&vxi->cvirt.load_updates);
21791 +       spin_unlock(&vxi->cvirt.load_lock);
21792 +}
21793 +
21794 +
21795 +int vx_uts_virt_handler(struct ctl_table *ctl, int write, xid_t xid,
21796 +       void **datap, size_t *lenp)
21797 +{
21798 +       switch (ctl->ctl_name) {
21799 +       case KERN_OSTYPE:
21800 +               *datap = vx_new_uts(sysname);
21801 +               break;
21802 +       case KERN_OSRELEASE:
21803 +               *datap = vx_new_uts(release);
21804 +               break;
21805 +       case KERN_VERSION:
21806 +               *datap = vx_new_uts(version);
21807 +               break;
21808 +       case KERN_NODENAME:
21809 +               *datap = vx_new_uts(nodename);
21810 +               break;
21811 +       case KERN_DOMAINNAME:
21812 +               *datap = vx_new_uts(domainname);
21813 +               break;
21814 +       }
21815 +
21816 +       return 0;
21817 +}
21818 +
21819 +
21820 +
21821 +/*
21822 + * Commands to do_syslog:
21823 + *
21824 + *      0 -- Close the log.  Currently a NOP.
21825 + *      1 -- Open the log. Currently a NOP.
21826 + *      2 -- Read from the log.
21827 + *      3 -- Read all messages remaining in the ring buffer.
21828 + *      4 -- Read and clear all messages remaining in the ring buffer
21829 + *      5 -- Clear ring buffer.
21830 + *      6 -- Disable printk's to console
21831 + *      7 -- Enable printk's to console
21832 + *      8 -- Set level of messages printed to console
21833 + *      9 -- Return number of unread characters in the log buffer
21834 + *     10 -- Return size of the log buffer
21835 + */
21836 +int vx_do_syslog(int type, char __user *buf, int len)
21837 +{
21838 +       int error = 0;
21839 +       int do_clear = 0;
21840 +       struct vx_info *vxi = current->vx_info;
21841 +       struct _vx_syslog *log;
21842 +
21843 +       if (!vxi)
21844 +               return -EINVAL;
21845 +       log = &vxi->cvirt.syslog;
21846 +
21847 +       switch (type) {
21848 +       case 0:         /* Close log */
21849 +       case 1:         /* Open log */
21850 +               break;
21851 +       case 2:         /* Read from log */
21852 +               error = wait_event_interruptible(log->log_wait,
21853 +                       (log->log_start - log->log_end));
21854 +               if (error)
21855 +                       break;
21856 +               spin_lock_irq(&log->logbuf_lock);
21857 +               spin_unlock_irq(&log->logbuf_lock);
21858 +               break;
21859 +       case 4:         /* Read/clear last kernel messages */
21860 +               do_clear = 1;
21861 +               /* fall through */
21862 +       case 3:         /* Read last kernel messages */
21863 +               return 0;
21864 +
21865 +       case 5:         /* Clear ring buffer */
21866 +               return 0;
21867 +
21868 +       case 6:         /* Disable logging to console */
21869 +       case 7:         /* Enable logging to console */
21870 +       case 8:         /* Set level of messages printed to console */
21871 +               break;
21872 +
21873 +       case 9:         /* Number of chars in the log buffer */
21874 +               return 0;
21875 +       case 10:        /* Size of the log buffer */
21876 +               return 0;
21877 +       default:
21878 +               error = -EINVAL;
21879 +               break;
21880 +       }
21881 +       return error;
21882 +}
21883 +
21884 +
21885 +/* virtual host info names */
21886 +
21887 +static char * vx_vhi_name(struct vx_info *vxi, int id)
21888 +{
21889 +       switch (id) {
21890 +       case VHIN_CONTEXT:
21891 +               return vxi->vx_name;
21892 +       case VHIN_SYSNAME:
21893 +               return vxi->cvirt.utsname.sysname;
21894 +       case VHIN_NODENAME:
21895 +               return vxi->cvirt.utsname.nodename;
21896 +       case VHIN_RELEASE:
21897 +               return vxi->cvirt.utsname.release;
21898 +       case VHIN_VERSION:
21899 +               return vxi->cvirt.utsname.version;
21900 +       case VHIN_MACHINE:
21901 +               return vxi->cvirt.utsname.machine;
21902 +       case VHIN_DOMAINNAME:
21903 +               return vxi->cvirt.utsname.domainname;
21904 +       default:
21905 +               return NULL;
21906 +       }
21907 +       return NULL;
21908 +}
21909 +
21910 +int vc_set_vhi_name(uint32_t id, void __user *data)
21911 +{
21912 +       struct vx_info *vxi;
21913 +       struct vcmd_vhi_name_v0 vc_data;
21914 +       char *name;
21915 +
21916 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21917 +               return -EFAULT;
21918 +
21919 +       vxi = lookup_vx_info(id);
21920 +       if (!vxi)
21921 +               return -ESRCH;
21922 +
21923 +       name = vx_vhi_name(vxi, vc_data.field);
21924 +       if (name)
21925 +               memcpy(name, vc_data.name, 65);
21926 +       put_vx_info(vxi);
21927 +       return (name ? 0 : -EFAULT);
21928 +}
21929 +
21930 +int vc_get_vhi_name(uint32_t id, void __user *data)
21931 +{
21932 +       struct vx_info *vxi;
21933 +       struct vcmd_vhi_name_v0 vc_data;
21934 +       char *name;
21935 +
21936 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
21937 +               return -EFAULT;
21938 +
21939 +       vxi = lookup_vx_info(id);
21940 +       if (!vxi)
21941 +               return -ESRCH;
21942 +
21943 +       name = vx_vhi_name(vxi, vc_data.field);
21944 +       if (!name)
21945 +               goto out_put;
21946 +
21947 +       memcpy(vc_data.name, name, 65);
21948 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
21949 +               return -EFAULT;
21950 +out_put:
21951 +       put_vx_info(vxi);
21952 +       return (name ? 0 : -EFAULT);
21953 +}
21954 +
21955 +#ifdef CONFIG_VSERVER_VTIME
21956 +
21957 +/* virtualized time base */
21958 +
21959 +void vx_gettimeofday(struct timeval *tv)
21960 +{
21961 +       do_gettimeofday(tv);
21962 +       if (!vx_flags(VXF_VIRT_TIME, 0))
21963 +               return;
21964 +
21965 +       tv->tv_sec += current->vx_info->cvirt.bias_tv.tv_sec;
21966 +       tv->tv_usec += current->vx_info->cvirt.bias_tv.tv_usec;
21967 +
21968 +       if (tv->tv_usec >= USEC_PER_SEC) {
21969 +               tv->tv_sec++;
21970 +               tv->tv_usec -= USEC_PER_SEC;
21971 +       } else if (tv->tv_usec < 0) {
21972 +               tv->tv_sec--;
21973 +               tv->tv_usec += USEC_PER_SEC;
21974 +       }
21975 +}
21976 +
21977 +int vx_settimeofday(struct timespec *ts)
21978 +{
21979 +       struct timeval tv;
21980 +
21981 +       if (!vx_flags(VXF_VIRT_TIME, 0))
21982 +               return do_settimeofday(ts);
21983 +
21984 +       do_gettimeofday(&tv);
21985 +       current->vx_info->cvirt.bias_tv.tv_sec =
21986 +               ts->tv_sec - tv.tv_sec;
21987 +       current->vx_info->cvirt.bias_tv.tv_usec =
21988 +               (ts->tv_nsec/NSEC_PER_USEC) - tv.tv_usec;
21989 +       return 0;
21990 +}
21991 +
21992 +#endif
21993 +
21994 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/cvirt_init.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt_init.h
21995 --- linux-2.6.16.20/kernel/vserver/cvirt_init.h 1970-01-01 01:00:00 +0100
21996 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt_init.h    2006-05-29 17:38:33 +0200
21997 @@ -0,0 +1,99 @@
21998 +
21999 +
22000 +extern uint64_t vx_idle_jiffies(void);
22001 +
22002 +static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
22003 +{
22004 +       uint64_t idle_jiffies = vx_idle_jiffies();
22005 +       uint64_t nsuptime;
22006 +
22007 +       do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
22008 +       nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
22009 +               * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
22010 +       cvirt->bias_clock = nsec_to_clock_t(nsuptime);
22011 +       cvirt->bias_tv.tv_sec = 0;
22012 +       cvirt->bias_tv.tv_usec = 0;
22013 +
22014 +       jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
22015 +       atomic_set(&cvirt->nr_threads, 0);
22016 +       atomic_set(&cvirt->nr_running, 0);
22017 +       atomic_set(&cvirt->nr_uninterruptible, 0);
22018 +       atomic_set(&cvirt->nr_onhold, 0);
22019 +
22020 +       down_read(&uts_sem);
22021 +       cvirt->utsname = system_utsname;
22022 +       up_read(&uts_sem);
22023 +
22024 +       spin_lock_init(&cvirt->load_lock);
22025 +       cvirt->load_last = jiffies;
22026 +       atomic_set(&cvirt->load_updates, 0);
22027 +       cvirt->load[0] = 0;
22028 +       cvirt->load[1] = 0;
22029 +       cvirt->load[2] = 0;
22030 +       atomic_set(&cvirt->total_forks, 0);
22031 +
22032 +       spin_lock_init(&cvirt->syslog.logbuf_lock);
22033 +       init_waitqueue_head(&cvirt->syslog.log_wait);
22034 +       cvirt->syslog.log_start = 0;
22035 +       cvirt->syslog.log_end = 0;
22036 +       cvirt->syslog.con_start = 0;
22037 +       cvirt->syslog.logged_chars = 0;
22038 +}
22039 +
22040 +static inline
22041 +void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
22042 +{
22043 +       // cvirt_pc->cpustat = { 0 };
22044 +}
22045 +
22046 +static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
22047 +{
22048 +#ifdef CONFIG_VSERVER_DEBUG
22049 +       int value;
22050 +
22051 +       vxwprintk((value = atomic_read(&cvirt->nr_threads)),
22052 +               "!!! cvirt: %p[nr_threads] = %d on exit.",
22053 +               cvirt, value);
22054 +       vxwprintk((value = atomic_read(&cvirt->nr_running)),
22055 +               "!!! cvirt: %p[nr_running] = %d on exit.",
22056 +               cvirt, value);
22057 +       vxwprintk((value = atomic_read(&cvirt->nr_uninterruptible)),
22058 +               "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
22059 +               cvirt, value);
22060 +       vxwprintk((value = atomic_read(&cvirt->nr_onhold)),
22061 +               "!!! cvirt: %p[nr_onhold] = %d on exit.",
22062 +               cvirt, value);
22063 +#endif
22064 +       return;
22065 +}
22066 +
22067 +static inline
22068 +void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
22069 +{
22070 +       return;
22071 +}
22072 +
22073 +
22074 +static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
22075 +{
22076 +       int i,j;
22077 +
22078 +
22079 +       for (i=0; i<5; i++) {
22080 +               for (j=0; j<3; j++) {
22081 +                       atomic_set(&cacct->sock[i][j].count, 0);
22082 +                       atomic_set(&cacct->sock[i][j].total, 0);
22083 +               }
22084 +       }
22085 +       for (i=0; i<8; i++)
22086 +               atomic_set(&cacct->slab[i], 0);
22087 +       for (i=0; i<5; i++)
22088 +               for (j=0; j<4; j++)
22089 +                       atomic_set(&cacct->page[i][j], 0);
22090 +}
22091 +
22092 +static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
22093 +{
22094 +       return;
22095 +}
22096 +
22097 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/cvirt_proc.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt_proc.h
22098 --- linux-2.6.16.20/kernel/vserver/cvirt_proc.h 1970-01-01 01:00:00 +0100
22099 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/cvirt_proc.h    2006-04-26 19:07:00 +0200
22100 @@ -0,0 +1,129 @@
22101 +#ifndef _VX_CVIRT_PROC_H
22102 +#define _VX_CVIRT_PROC_H
22103 +
22104 +#include <linux/sched.h>
22105 +
22106 +
22107 +#define LOAD_INT(x) ((x) >> FSHIFT)
22108 +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
22109 +
22110 +static inline
22111 +int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
22112 +{
22113 +       int length = 0;
22114 +       int a, b, c;
22115 +
22116 +       length += sprintf(buffer + length,
22117 +               "BiasUptime:\t%lu.%02lu\n",
22118 +                       (unsigned long)cvirt->bias_uptime.tv_sec,
22119 +                       (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
22120 +       length += sprintf(buffer + length,
22121 +               "SysName:\t%.*s\n"
22122 +               "NodeName:\t%.*s\n"
22123 +               "Release:\t%.*s\n"
22124 +               "Version:\t%.*s\n"
22125 +               "Machine:\t%.*s\n"
22126 +               "DomainName:\t%.*s\n"
22127 +               ,__NEW_UTS_LEN, cvirt->utsname.sysname
22128 +               ,__NEW_UTS_LEN, cvirt->utsname.nodename
22129 +               ,__NEW_UTS_LEN, cvirt->utsname.release
22130 +               ,__NEW_UTS_LEN, cvirt->utsname.version
22131 +               ,__NEW_UTS_LEN, cvirt->utsname.machine
22132 +               ,__NEW_UTS_LEN, cvirt->utsname.domainname
22133 +               );
22134 +
22135 +       a = cvirt->load[0] + (FIXED_1/200);
22136 +       b = cvirt->load[1] + (FIXED_1/200);
22137 +       c = cvirt->load[2] + (FIXED_1/200);
22138 +       length += sprintf(buffer + length,
22139 +               "nr_threads:\t%d\n"
22140 +               "nr_running:\t%d\n"
22141 +               "nr_unintr:\t%d\n"
22142 +               "nr_onhold:\t%d\n"
22143 +               "load_updates:\t%d\n"
22144 +               "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
22145 +               "total_forks:\t%d\n"
22146 +               ,atomic_read(&cvirt->nr_threads)
22147 +               ,atomic_read(&cvirt->nr_running)
22148 +               ,atomic_read(&cvirt->nr_uninterruptible)
22149 +               ,atomic_read(&cvirt->nr_onhold)
22150 +               ,atomic_read(&cvirt->load_updates)
22151 +               ,LOAD_INT(a), LOAD_FRAC(a)
22152 +               ,LOAD_INT(b), LOAD_FRAC(b)
22153 +               ,LOAD_INT(c), LOAD_FRAC(c)
22154 +               ,atomic_read(&cvirt->total_forks)
22155 +               );
22156 +       return length;
22157 +}
22158 +
22159 +static inline
22160 +int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
22161 +       char *buffer, int cpu)
22162 +{
22163 +       int length = 0;
22164 +       return length;
22165 +}
22166 +
22167 +static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
22168 +{
22169 +       return atomic_read(&cacct->sock[type][pos].count);
22170 +}
22171 +
22172 +
22173 +static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
22174 +{
22175 +       return atomic_read(&cacct->sock[type][pos].total);
22176 +}
22177 +
22178 +
22179 +#define VX_SOCKA_TOP   \
22180 +       "Type\t    recv #/bytes\t\t   send #/bytes\t\t    fail #/bytes\n"
22181 +
22182 +static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
22183 +{
22184 +       int i,j, length = 0;
22185 +       static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" };
22186 +
22187 +       length += sprintf(buffer + length, VX_SOCKA_TOP);
22188 +       for (i=0; i<5; i++) {
22189 +               length += sprintf(buffer + length,
22190 +                       "%s:", type[i]);
22191 +               for (j=0; j<3; j++) {
22192 +                       length += sprintf(buffer + length,
22193 +                               "\t%10lu/%-10lu"
22194 +                               ,vx_sock_count(cacct, i, j)
22195 +                               ,vx_sock_total(cacct, i, j)
22196 +                               );
22197 +               }
22198 +               buffer[length++] = '\n';
22199 +       }
22200 +
22201 +       length += sprintf(buffer + length, "\n");
22202 +       length += sprintf(buffer + length,
22203 +               "slab:\t %8u %8u %8u %8u\n"
22204 +               ,atomic_read(&cacct->slab[1])
22205 +               ,atomic_read(&cacct->slab[4])
22206 +               ,atomic_read(&cacct->slab[0])
22207 +               ,atomic_read(&cacct->slab[2])
22208 +               );
22209 +
22210 +       length += sprintf(buffer + length, "\n");
22211 +       for (i=0; i<5; i++) {
22212 +               length += sprintf(buffer + length,
22213 +                       "page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n"
22214 +                       ,i
22215 +                       ,atomic_read(&cacct->page[i][0])
22216 +                       ,atomic_read(&cacct->page[i][1])
22217 +                       ,atomic_read(&cacct->page[i][2])
22218 +                       ,atomic_read(&cacct->page[i][3])
22219 +                       ,atomic_read(&cacct->page[i][4])
22220 +                       ,atomic_read(&cacct->page[i][5])
22221 +                       ,atomic_read(&cacct->page[i][6])
22222 +                       ,atomic_read(&cacct->page[i][7])
22223 +                       );
22224 +       }
22225 +
22226 +       return length;
22227 +}
22228 +
22229 +#endif /* _VX_CVIRT_PROC_H */
22230 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/debug.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/debug.c
22231 --- linux-2.6.16.20/kernel/vserver/debug.c      1970-01-01 01:00:00 +0100
22232 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/debug.c 2006-04-26 19:07:00 +0200
22233 @@ -0,0 +1,36 @@
22234 +/*
22235 + *  kernel/vserver/debug.c
22236 + *
22237 + *  Copyright (C) 2005  Herbert Pötzl
22238 + *
22239 + *  V0.01  vx_info dump support
22240 + *
22241 + */
22242 +
22243 +#include <linux/errno.h>
22244 +#include <linux/kernel.h>
22245 +#include <linux/module.h>
22246 +
22247 +#include <linux/vserver/cvirt_def.h>
22248 +#include <linux/vserver/limit_def.h>
22249 +#include <linux/vserver/sched_def.h>
22250 +
22251 +
22252 +void   dump_vx_info(struct vx_info *vxi, int level)
22253 +{
22254 +       printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
22255 +               atomic_read(&vxi->vx_usecnt),
22256 +               atomic_read(&vxi->vx_tasks),
22257 +               vxi->vx_state);
22258 +       if (level > 0) {
22259 +               __dump_vx_limit(&vxi->limit);
22260 +               __dump_vx_sched(&vxi->sched);
22261 +               __dump_vx_cvirt(&vxi->cvirt);
22262 +               __dump_vx_cacct(&vxi->cacct);
22263 +       }
22264 +       printk("---\n");
22265 +}
22266 +
22267 +
22268 +EXPORT_SYMBOL_GPL(dump_vx_info);
22269 +
22270 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/dlimit.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/dlimit.c
22271 --- linux-2.6.16.20/kernel/vserver/dlimit.c     1970-01-01 01:00:00 +0100
22272 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/dlimit.c        2006-04-29 02:58:07 +0200
22273 @@ -0,0 +1,530 @@
22274 +/*
22275 + *  linux/kernel/vserver/dlimit.c
22276 + *
22277 + *  Virtual Server: Context Disk Limits
22278 + *
22279 + *  Copyright (C) 2004-2005  Herbert Pötzl
22280 + *
22281 + *  V0.01  initial version
22282 + *  V0.02  compat32 splitup
22283 + *
22284 + */
22285 +
22286 +#include <linux/fs.h>
22287 +#include <linux/namespace.h>
22288 +#include <linux/namei.h>
22289 +#include <linux/statfs.h>
22290 +#include <linux/compat.h>
22291 +#include <linux/vserver/switch.h>
22292 +#include <linux/vs_context.h>
22293 +#include <linux/vs_tag.h>
22294 +#include <linux/vs_dlimit.h>
22295 +#include <linux/vserver/dlimit_cmd.h>
22296 +
22297 +#include <asm/errno.h>
22298 +#include <asm/uaccess.h>
22299 +
22300 +/*     __alloc_dl_info()
22301 +
22302 +       * allocate an initialized dl_info struct
22303 +       * doesn't make it visible (hash)                        */
22304 +
22305 +static struct dl_info *__alloc_dl_info(struct super_block *sb, tag_t tag)
22306 +{
22307 +       struct dl_info *new = NULL;
22308 +
22309 +       vxdprintk(VXD_CBIT(dlim, 5),
22310 +               "alloc_dl_info(%p,%d)*", sb, tag);
22311 +
22312 +       /* would this benefit from a slab cache? */
22313 +       new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
22314 +       if (!new)
22315 +               return 0;
22316 +
22317 +       memset (new, 0, sizeof(struct dl_info));
22318 +       new->dl_tag = tag;
22319 +       new->dl_sb = sb;
22320 +       INIT_RCU_HEAD(&new->dl_rcu);
22321 +       INIT_HLIST_NODE(&new->dl_hlist);
22322 +       spin_lock_init(&new->dl_lock);
22323 +       atomic_set(&new->dl_refcnt, 0);
22324 +       atomic_set(&new->dl_usecnt, 0);
22325 +
22326 +       /* rest of init goes here */
22327 +
22328 +       vxdprintk(VXD_CBIT(dlim, 4),
22329 +               "alloc_dl_info(%p,%d) = %p", sb, tag, new);
22330 +       return new;
22331 +}
22332 +
22333 +/*     __dealloc_dl_info()
22334 +
22335 +       * final disposal of dl_info                             */
22336 +
22337 +static void __dealloc_dl_info(struct dl_info *dli)
22338 +{
22339 +       vxdprintk(VXD_CBIT(dlim, 4),
22340 +               "dealloc_dl_info(%p)", dli);
22341 +
22342 +       dli->dl_hlist.next = LIST_POISON1;
22343 +       dli->dl_tag = -1;
22344 +       dli->dl_sb = 0;
22345 +
22346 +       BUG_ON(atomic_read(&dli->dl_usecnt));
22347 +       BUG_ON(atomic_read(&dli->dl_refcnt));
22348 +
22349 +       kfree(dli);
22350 +}
22351 +
22352 +
22353 +/*     hash table for dl_info hash */
22354 +
22355 +#define DL_HASH_SIZE   13
22356 +
22357 +struct hlist_head dl_info_hash[DL_HASH_SIZE];
22358 +
22359 +static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED;
22360 +
22361 +
22362 +static inline unsigned int __hashval(struct super_block *sb, tag_t tag)
22363 +{
22364 +       return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
22365 +}
22366 +
22367 +
22368 +
22369 +/*     __hash_dl_info()
22370 +
22371 +       * add the dli to the global hash table
22372 +       * requires the hash_lock to be held                     */
22373 +
22374 +static inline void __hash_dl_info(struct dl_info *dli)
22375 +{
22376 +       struct hlist_head *head;
22377 +
22378 +       vxdprintk(VXD_CBIT(dlim, 6),
22379 +               "__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
22380 +       get_dl_info(dli);
22381 +       head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
22382 +       hlist_add_head_rcu(&dli->dl_hlist, head);
22383 +}
22384 +
22385 +/*     __unhash_dl_info()
22386 +
22387 +       * remove the dli from the global hash table
22388 +       * requires the hash_lock to be held                     */
22389 +
22390 +static inline void __unhash_dl_info(struct dl_info *dli)
22391 +{
22392 +       vxdprintk(VXD_CBIT(dlim, 6),
22393 +               "__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
22394 +       hlist_del_rcu(&dli->dl_hlist);
22395 +       put_dl_info(dli);
22396 +}
22397 +
22398 +
22399 +/*     __lookup_dl_info()
22400 +
22401 +       * requires the rcu_read_lock()
22402 +       * doesn't increment the dl_refcnt                       */
22403 +
22404 +static inline struct dl_info *__lookup_dl_info(struct super_block *sb, tag_t tag)
22405 +{
22406 +       struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
22407 +       struct hlist_node *pos;
22408 +       struct dl_info *dli;
22409 +
22410 +       hlist_for_each_entry_rcu(dli, pos, head, dl_hlist) {
22411 +//     hlist_for_each_rcu(pos, head) {
22412 +//             struct dl_info *dli =
22413 +//                     hlist_entry(pos, struct dl_info, dl_hlist);
22414 +
22415 +               if (dli->dl_tag == tag && dli->dl_sb == sb) {
22416 +                       return dli;
22417 +               }
22418 +       }
22419 +       return NULL;
22420 +}
22421 +
22422 +
22423 +struct dl_info *locate_dl_info(struct super_block *sb, tag_t tag)
22424 +{
22425 +       struct dl_info *dli;
22426 +
22427 +       rcu_read_lock();
22428 +       dli = get_dl_info(__lookup_dl_info(sb, tag));
22429 +       vxdprintk(VXD_CBIT(dlim, 7),
22430 +               "locate_dl_info(%p,#%d) = %p", sb, tag, dli);
22431 +       rcu_read_unlock();
22432 +       return dli;
22433 +}
22434 +
22435 +void rcu_free_dl_info(struct rcu_head *head)
22436 +{
22437 +       struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
22438 +       int usecnt, refcnt;
22439 +
22440 +       BUG_ON(!dli || !head);
22441 +
22442 +       usecnt = atomic_read(&dli->dl_usecnt);
22443 +       BUG_ON(usecnt < 0);
22444 +
22445 +       refcnt = atomic_read(&dli->dl_refcnt);
22446 +       BUG_ON(refcnt < 0);
22447 +
22448 +       vxdprintk(VXD_CBIT(dlim, 3),
22449 +               "rcu_free_dl_info(%p)", dli);
22450 +       if (!usecnt)
22451 +               __dealloc_dl_info(dli);
22452 +       else
22453 +               printk("!!! rcu didn't free\n");
22454 +}
22455 +
22456 +
22457 +
22458 +
22459 +static int do_addrem_dlimit(uint32_t id, const char __user *name,
22460 +       uint32_t flags, int add)
22461 +{
22462 +       struct nameidata nd;
22463 +       int ret;
22464 +
22465 +       ret = user_path_walk_link(name, &nd);
22466 +       if (!ret) {
22467 +               struct super_block *sb;
22468 +               struct dl_info *dli;
22469 +
22470 +               ret = -EINVAL;
22471 +               if (!nd.dentry->d_inode)
22472 +                       goto out_release;
22473 +               if (!(sb = nd.dentry->d_inode->i_sb))
22474 +                       goto out_release;
22475 +
22476 +               if (add) {
22477 +                       dli = __alloc_dl_info(sb, id);
22478 +                       spin_lock(&dl_info_hash_lock);
22479 +
22480 +                       ret = -EEXIST;
22481 +                       if (__lookup_dl_info(sb, id))
22482 +                               goto out_unlock;
22483 +                       __hash_dl_info(dli);
22484 +                       dli = NULL;
22485 +               } else {
22486 +                       spin_lock(&dl_info_hash_lock);
22487 +                       dli = __lookup_dl_info(sb, id);
22488 +
22489 +                       ret = -ESRCH;
22490 +                       if (!dli)
22491 +                               goto out_unlock;
22492 +                       __unhash_dl_info(dli);
22493 +               }
22494 +               ret = 0;
22495 +       out_unlock:
22496 +               spin_unlock(&dl_info_hash_lock);
22497 +               if (add && dli)
22498 +                       __dealloc_dl_info(dli);
22499 +       out_release:
22500 +               path_release(&nd);
22501 +       }
22502 +       return ret;
22503 +}
22504 +
22505 +int vc_add_dlimit(uint32_t id, void __user *data)
22506 +{
22507 +       struct vcmd_ctx_dlimit_base_v0 vc_data;
22508 +
22509 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22510 +               return -EFAULT;
22511 +
22512 +       return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
22513 +}
22514 +
22515 +int vc_rem_dlimit(uint32_t id, void __user *data)
22516 +{
22517 +       struct vcmd_ctx_dlimit_base_v0 vc_data;
22518 +
22519 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22520 +               return -EFAULT;
22521 +
22522 +       return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
22523 +}
22524 +
22525 +#ifdef CONFIG_COMPAT
22526 +
22527 +int vc_add_dlimit_x32(uint32_t id, void __user *data)
22528 +{
22529 +       struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
22530 +
22531 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22532 +               return -EFAULT;
22533 +
22534 +       return do_addrem_dlimit(id,
22535 +               compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
22536 +}
22537 +
22538 +int vc_rem_dlimit_x32(uint32_t id, void __user *data)
22539 +{
22540 +       struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
22541 +
22542 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22543 +               return -EFAULT;
22544 +
22545 +       return do_addrem_dlimit(id,
22546 +               compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
22547 +}
22548 +
22549 +#endif /* CONFIG_COMPAT */
22550 +
22551 +
22552 +static inline
22553 +int do_set_dlimit(uint32_t id, const char __user *name,
22554 +       uint32_t space_used, uint32_t space_total,
22555 +       uint32_t inodes_used, uint32_t inodes_total,
22556 +       uint32_t reserved, uint32_t flags)
22557 +{
22558 +       struct nameidata nd;
22559 +       int ret;
22560 +
22561 +       ret = user_path_walk_link(name, &nd);
22562 +       if (!ret) {
22563 +               struct super_block *sb;
22564 +               struct dl_info *dli;
22565 +
22566 +               ret = -EINVAL;
22567 +               if (!nd.dentry->d_inode)
22568 +                       goto out_release;
22569 +               if (!(sb = nd.dentry->d_inode->i_sb))
22570 +                       goto out_release;
22571 +               if ((reserved != CDLIM_KEEP &&
22572 +                       reserved > 100) ||
22573 +                       (inodes_used != CDLIM_KEEP &&
22574 +                       inodes_used > inodes_total) ||
22575 +                       (space_used != CDLIM_KEEP &&
22576 +                       space_used > space_total))
22577 +                       goto out_release;
22578 +
22579 +               ret = -ESRCH;
22580 +               dli = locate_dl_info(sb, id);
22581 +               if (!dli)
22582 +                       goto out_release;
22583 +
22584 +               spin_lock(&dli->dl_lock);
22585 +
22586 +               if (inodes_used != CDLIM_KEEP)
22587 +                       dli->dl_inodes_used = inodes_used;
22588 +               if (inodes_total != CDLIM_KEEP)
22589 +                       dli->dl_inodes_total = inodes_total;
22590 +               if (space_used != CDLIM_KEEP) {
22591 +                       dli->dl_space_used = space_used;
22592 +                       dli->dl_space_used <<= 10;
22593 +               }
22594 +               if (space_total == CDLIM_INFINITY)
22595 +                       dli->dl_space_total = DLIM_INFINITY;
22596 +               else if (space_total != CDLIM_KEEP) {
22597 +                       dli->dl_space_total = space_total;
22598 +                       dli->dl_space_total <<= 10;
22599 +               }
22600 +               if (reserved != CDLIM_KEEP)
22601 +                       dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
22602 +
22603 +               spin_unlock(&dli->dl_lock);
22604 +
22605 +               put_dl_info(dli);
22606 +               ret = 0;
22607 +
22608 +       out_release:
22609 +               path_release(&nd);
22610 +       }
22611 +       return ret;
22612 +}
22613 +
22614 +int vc_set_dlimit(uint32_t id, void __user *data)
22615 +{
22616 +       struct vcmd_ctx_dlimit_v0 vc_data;
22617 +
22618 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22619 +               return -EFAULT;
22620 +
22621 +       return do_set_dlimit(id, vc_data.name,
22622 +               vc_data.space_used, vc_data.space_total,
22623 +               vc_data.inodes_used, vc_data.inodes_total,
22624 +               vc_data.reserved, vc_data.flags);
22625 +}
22626 +
22627 +#ifdef CONFIG_COMPAT
22628 +
22629 +int vc_set_dlimit_x32(uint32_t id, void __user *data)
22630 +{
22631 +       struct vcmd_ctx_dlimit_v0_x32 vc_data;
22632 +
22633 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22634 +               return -EFAULT;
22635 +
22636 +       return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
22637 +               vc_data.space_used, vc_data.space_total,
22638 +               vc_data.inodes_used, vc_data.inodes_total,
22639 +               vc_data.reserved, vc_data.flags);
22640 +}
22641 +
22642 +#endif /* CONFIG_COMPAT */
22643 +
22644 +
22645 +static inline
22646 +int do_get_dlimit(uint32_t id, const char __user *name,
22647 +       uint32_t *space_used, uint32_t *space_total,
22648 +       uint32_t *inodes_used, uint32_t *inodes_total,
22649 +       uint32_t *reserved, uint32_t *flags)
22650 +{
22651 +       struct nameidata nd;
22652 +       int ret;
22653 +
22654 +       ret = user_path_walk_link(name, &nd);
22655 +       if (!ret) {
22656 +               struct super_block *sb;
22657 +               struct dl_info *dli;
22658 +
22659 +               ret = -EINVAL;
22660 +               if (!nd.dentry->d_inode)
22661 +                       goto out_release;
22662 +               if (!(sb = nd.dentry->d_inode->i_sb))
22663 +                       goto out_release;
22664 +
22665 +               ret = -ESRCH;
22666 +               dli = locate_dl_info(sb, id);
22667 +               if (!dli)
22668 +                       goto out_release;
22669 +
22670 +               spin_lock(&dli->dl_lock);
22671 +               *inodes_used = dli->dl_inodes_used;
22672 +               *inodes_total = dli->dl_inodes_total;
22673 +               *space_used = dli->dl_space_used >> 10;
22674 +               if (dli->dl_space_total == DLIM_INFINITY)
22675 +                       *space_total = CDLIM_INFINITY;
22676 +               else
22677 +                       *space_total = dli->dl_space_total >> 10;
22678 +
22679 +               *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
22680 +               spin_unlock(&dli->dl_lock);
22681 +
22682 +               put_dl_info(dli);
22683 +               ret = -EFAULT;
22684 +
22685 +               ret = 0;
22686 +       out_release:
22687 +               path_release(&nd);
22688 +       }
22689 +       return ret;
22690 +}
22691 +
22692 +
22693 +int vc_get_dlimit(uint32_t id, void __user *data)
22694 +{
22695 +       struct vcmd_ctx_dlimit_v0 vc_data;
22696 +       int ret;
22697 +
22698 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22699 +               return -EFAULT;
22700 +
22701 +       ret = do_get_dlimit(id, vc_data.name,
22702 +               &vc_data.space_used, &vc_data.space_total,
22703 +               &vc_data.inodes_used, &vc_data.inodes_total,
22704 +               &vc_data.reserved, &vc_data.flags);
22705 +       if (ret)
22706 +               return ret;
22707 +
22708 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
22709 +               return -EFAULT;
22710 +       return 0;
22711 +}
22712 +
22713 +#ifdef CONFIG_COMPAT
22714 +
22715 +int vc_get_dlimit_x32(uint32_t id, void __user *data)
22716 +{
22717 +       struct vcmd_ctx_dlimit_v0_x32 vc_data;
22718 +       int ret;
22719 +
22720 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
22721 +               return -EFAULT;
22722 +
22723 +       ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
22724 +               &vc_data.space_used, &vc_data.space_total,
22725 +               &vc_data.inodes_used, &vc_data.inodes_total,
22726 +               &vc_data.reserved, &vc_data.flags);
22727 +       if (ret)
22728 +               return ret;
22729 +
22730 +       if (copy_to_user(data, &vc_data, sizeof(vc_data)))
22731 +               return -EFAULT;
22732 +       return 0;
22733 +}
22734 +
22735 +#endif /* CONFIG_COMPAT */
22736 +
22737 +
22738 +void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
22739 +{
22740 +       struct dl_info *dli;
22741 +       __u64 blimit, bfree, bavail;
22742 +       __u32 ifree;
22743 +
22744 +       dli = locate_dl_info(sb, dx_current_tag());
22745 +       if (!dli)
22746 +               return;
22747 +
22748 +       spin_lock(&dli->dl_lock);
22749 +       if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
22750 +               goto no_ilim;
22751 +
22752 +       /* reduce max inodes available to limit */
22753 +       if (buf->f_files > dli->dl_inodes_total)
22754 +               buf->f_files = dli->dl_inodes_total;
22755 +
22756 +       ifree = dli->dl_inodes_total - dli->dl_inodes_used;
22757 +       /* reduce free inodes to min */
22758 +       if (ifree < buf->f_ffree)
22759 +               buf->f_ffree = ifree;
22760 +
22761 +no_ilim:
22762 +       if (dli->dl_space_total == DLIM_INFINITY)
22763 +               goto no_blim;
22764 +
22765 +       blimit = dli->dl_space_total >> sb->s_blocksize_bits;
22766 +
22767 +       if (dli->dl_space_total < dli->dl_space_used)
22768 +               bfree = 0;
22769 +       else
22770 +               bfree = (dli->dl_space_total - dli->dl_space_used)
22771 +                       >> sb->s_blocksize_bits;
22772 +
22773 +       bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
22774 +       if (bavail < dli->dl_space_used)
22775 +               bavail = 0;
22776 +       else
22777 +               bavail = (bavail - dli->dl_space_used)
22778 +                       >> sb->s_blocksize_bits;
22779 +
22780 +       /* reduce max space available to limit */
22781 +       if (buf->f_blocks > blimit)
22782 +               buf->f_blocks = blimit;
22783 +
22784 +       /* reduce free space to min */
22785 +       if (bfree < buf->f_bfree)
22786 +               buf->f_bfree = bfree;
22787 +
22788 +       /* reduce avail space to min */
22789 +       if (bavail < buf->f_bavail)
22790 +               buf->f_bavail = bavail;
22791 +
22792 +no_blim:
22793 +       spin_unlock(&dli->dl_lock);
22794 +       put_dl_info(dli);
22795 +
22796 +       return;
22797 +}
22798 +
22799 +#include <linux/module.h>
22800 +
22801 +EXPORT_SYMBOL_GPL(locate_dl_info);
22802 +EXPORT_SYMBOL_GPL(rcu_free_dl_info);
22803 +
22804 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/helper.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/helper.c
22805 --- linux-2.6.16.20/kernel/vserver/helper.c     1970-01-01 01:00:00 +0100
22806 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/helper.c        2006-04-29 02:58:07 +0200
22807 @@ -0,0 +1,210 @@
22808 +/*
22809 + *  linux/kernel/vserver/helper.c
22810 + *
22811 + *  Virtual Context Support
22812 + *
22813 + *  Copyright (C) 2004-2005  Herbert Pötzl
22814 + *
22815 + *  V0.01  basic helper
22816 + *
22817 + */
22818 +
22819 +#include <linux/errno.h>
22820 +#include <linux/kmod.h>
22821 +#include <linux/sched.h>
22822 +#include <linux/reboot.h>
22823 +#include <linux/vs_context.h>
22824 +#include <linux/vs_network.h>
22825 +#include <linux/vserver/signal.h>
22826 +
22827 +#include <asm/uaccess.h>
22828 +#include <asm/unistd.h>
22829 +
22830 +
22831 +char vshelper_path[255] = "/sbin/vshelper";
22832 +
22833 +
22834 +static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
22835 +{
22836 +       int ret;
22837 +
22838 +       if ((ret = call_usermodehelper(name, argv, envp, sync))) {
22839 +               printk( KERN_WARNING
22840 +                       "%s: (%s %s) returned %s with %d\n",
22841 +                       name, argv[1], argv[2],
22842 +                       sync?"sync":"async", ret);
22843 +       }
22844 +       vxdprintk(VXD_CBIT(switch, 4),
22845 +               "%s: (%s %s) returned %s with %d",
22846 +               name, argv[1], argv[2], sync?"sync":"async", ret);
22847 +       return ret;
22848 +}
22849 +
22850 +/*
22851 + *      vshelper path is set via /proc/sys
22852 + *      invoked by vserver sys_reboot(), with
22853 + *      the following arguments
22854 + *
22855 + *      argv [0] = vshelper_path;
22856 + *      argv [1] = action: "restart", "halt", "poweroff", ...
22857 + *      argv [2] = context identifier
22858 + *
22859 + *      envp [*] = type-specific parameters
22860 + */
22861 +
22862 +long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
22863 +{
22864 +       char id_buf[8], cmd_buf[16];
22865 +       char uid_buf[16], pid_buf[16];
22866 +       int ret;
22867 +
22868 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
22869 +       char *envp[] = {"HOME=/", "TERM=linux",
22870 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
22871 +                       uid_buf, pid_buf, cmd_buf, 0};
22872 +
22873 +       if (vx_info_state(vxi, VXS_HELPER))
22874 +               return -EAGAIN;
22875 +       vxi->vx_state |= VXS_HELPER;
22876 +
22877 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
22878 +
22879 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
22880 +       snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid);
22881 +       snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid);
22882 +
22883 +       switch (cmd) {
22884 +       case LINUX_REBOOT_CMD_RESTART:
22885 +               argv[1] = "restart";
22886 +               break;
22887 +
22888 +       case LINUX_REBOOT_CMD_HALT:
22889 +               argv[1] = "halt";
22890 +               break;
22891 +
22892 +       case LINUX_REBOOT_CMD_POWER_OFF:
22893 +               argv[1] = "poweroff";
22894 +               break;
22895 +
22896 +       case LINUX_REBOOT_CMD_SW_SUSPEND:
22897 +               argv[1] = "swsusp";
22898 +               break;
22899 +
22900 +       default:
22901 +               vxi->vx_state &= ~VXS_HELPER;
22902 +               return 0;
22903 +       }
22904 +
22905 +#ifndef CONFIG_VSERVER_LEGACY
22906 +       ret = do_vshelper(vshelper_path, argv, envp, 1);
22907 +#else
22908 +       ret = do_vshelper(vshelper_path, argv, envp, 0);
22909 +#endif
22910 +       vxi->vx_state &= ~VXS_HELPER;
22911 +       __wakeup_vx_info(vxi);
22912 +       return (ret) ? -EPERM : 0;
22913 +}
22914 +
22915 +
22916 +long vs_reboot(unsigned int cmd, void __user * arg)
22917 +{
22918 +       struct vx_info *vxi = current->vx_info;
22919 +       long ret = 0;
22920 +
22921 +       vxdprintk(VXD_CBIT(misc, 5),
22922 +               "vs_reboot(%p[#%d],%d)",
22923 +               vxi, vxi?vxi->vx_id:0, cmd);
22924 +
22925 +       ret = vs_reboot_helper(vxi, cmd, arg);
22926 +       if (ret)
22927 +               return ret;
22928 +
22929 +       vxi->reboot_cmd = cmd;
22930 +       if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
22931 +               switch (cmd) {
22932 +               case LINUX_REBOOT_CMD_RESTART:
22933 +               case LINUX_REBOOT_CMD_HALT:
22934 +               case LINUX_REBOOT_CMD_POWER_OFF:
22935 +                       vx_info_kill(vxi, 0, SIGKILL);
22936 +                       vx_info_kill(vxi, 1, SIGKILL);
22937 +               default:
22938 +                       break;
22939 +               }
22940 +       }
22941 +       return 0;
22942 +}
22943 +
22944 +
22945 +/*
22946 + *      argv [0] = vshelper_path;
22947 + *      argv [1] = action: "startup", "shutdown"
22948 + *      argv [2] = context identifier
22949 + *
22950 + *      envp [*] = type-specific parameters
22951 + */
22952 +
22953 +long vs_state_change(struct vx_info *vxi, unsigned int cmd)
22954 +{
22955 +       char id_buf[8], cmd_buf[16];
22956 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
22957 +       char *envp[] = {"HOME=/", "TERM=linux",
22958 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
22959 +
22960 +       if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
22961 +               return 0;
22962 +
22963 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", vxi->vx_id);
22964 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
22965 +
22966 +       switch (cmd) {
22967 +       case VSC_STARTUP:
22968 +               argv[1] = "startup";
22969 +               break;
22970 +       case VSC_SHUTDOWN:
22971 +               argv[1] = "shutdown";
22972 +               break;
22973 +       default:
22974 +               return 0;
22975 +       }
22976 +
22977 +       do_vshelper(vshelper_path, argv, envp, 1);
22978 +       return 0;
22979 +}
22980 +
22981 +
22982 +/*
22983 + *      argv [0] = vshelper_path;
22984 + *      argv [1] = action: "netup", "netdown"
22985 + *      argv [2] = context identifier
22986 + *
22987 + *      envp [*] = type-specific parameters
22988 + */
22989 +
22990 +long vs_net_change(struct nx_info *nxi, unsigned int cmd)
22991 +{
22992 +       char id_buf[8], cmd_buf[16];
22993 +       char *argv[] = {vshelper_path, NULL, id_buf, 0};
22994 +       char *envp[] = {"HOME=/", "TERM=linux",
22995 +                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
22996 +
22997 +       if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
22998 +               return 0;
22999 +
23000 +       snprintf(id_buf, sizeof(id_buf)-1, "%d", nxi->nx_id);
23001 +       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
23002 +
23003 +       switch (cmd) {
23004 +       case VSC_NETUP:
23005 +               argv[1] = "netup";
23006 +               break;
23007 +       case VSC_NETDOWN:
23008 +               argv[1] = "netdown";
23009 +               break;
23010 +       default:
23011 +               return 0;
23012 +       }
23013 +
23014 +       do_vshelper(vshelper_path, argv, envp, 1);
23015 +       return 0;
23016 +}
23017 +
23018 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/history.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/history.c
23019 --- linux-2.6.16.20/kernel/vserver/history.c    1970-01-01 01:00:00 +0100
23020 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/history.c       2006-04-26 19:07:00 +0200
23021 @@ -0,0 +1,184 @@
23022 +/*
23023 + *  kernel/vserver/history.c
23024 + *
23025 + *  Virtual Context History Backtrace
23026 + *
23027 + *  Copyright (C) 2004-2005  Herbert Pötzl
23028 + *
23029 + *  V0.01  basic structure
23030 + *  V0.02  hash/unhash and trace
23031 + *  V0.03  preemption fixes
23032 + *
23033 + */
23034 +
23035 +#include <linux/errno.h>
23036 +#include <linux/module.h>
23037 +#include <linux/types.h>
23038 +#include <linux/ctype.h>
23039 +
23040 +#include <asm/uaccess.h>
23041 +#include <asm/atomic.h>
23042 +#include <asm/unistd.h>
23043 +
23044 +#include <linux/vserver/debug.h>
23045 +#include <linux/vserver/history.h>
23046 +
23047 +
23048 +#ifdef CONFIG_VSERVER_HISTORY
23049 +#define VXH_SIZE       CONFIG_VSERVER_HISTORY_SIZE
23050 +#else
23051 +#define VXH_SIZE       64
23052 +#endif
23053 +
23054 +struct _vx_history {
23055 +       unsigned int counter;
23056 +
23057 +       struct _vx_hist_entry entry[VXH_SIZE+1];
23058 +};
23059 +
23060 +
23061 +DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
23062 +
23063 +unsigned volatile int vxh_active = 1;
23064 +
23065 +static atomic_t sequence = ATOMIC_INIT(0);
23066 +
23067 +
23068 +/*     vxh_advance()
23069 +
23070 +       * requires disabled preemption                          */
23071 +
23072 +struct _vx_hist_entry *vxh_advance(void *loc)
23073 +{
23074 +       unsigned int cpu = smp_processor_id();
23075 +       struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
23076 +       struct _vx_hist_entry *entry;
23077 +       unsigned int index;
23078 +
23079 +       index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
23080 +       entry = &hist->entry[index];
23081 +
23082 +       entry->seq = atomic_inc_return(&sequence);
23083 +       entry->loc = loc;
23084 +       return entry;
23085 +}
23086 +
23087 +
23088 +#define VXH_LOC_FMTS   "(#%04x,*%d):%p"
23089 +
23090 +#define VXH_LOC_ARGS(e)        (e)->seq, cpu, (e)->loc
23091 +
23092 +
23093 +#define VXH_VXI_FMTS   "%p[#%d,%d.%d]"
23094 +
23095 +#define VXH_VXI_ARGS(e)        (e)->vxi.ptr,                   \
23096 +                       (e)->vxi.ptr?(e)->vxi.xid:0,    \
23097 +                       (e)->vxi.ptr?(e)->vxi.usecnt:0, \
23098 +                       (e)->vxi.ptr?(e)->vxi.tasks:0
23099 +
23100 +void   vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
23101 +{
23102 +       switch (e->type) {
23103 +       case VXH_THROW_OOPS:
23104 +               printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
23105 +               break;
23106 +
23107 +       case VXH_GET_VX_INFO:
23108 +       case VXH_PUT_VX_INFO:
23109 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
23110 +                       VXH_LOC_ARGS(e),
23111 +                       (e->type==VXH_GET_VX_INFO)?"get":"put",
23112 +                       VXH_VXI_ARGS(e));
23113 +               break;
23114 +
23115 +       case VXH_INIT_VX_INFO:
23116 +       case VXH_SET_VX_INFO:
23117 +       case VXH_CLR_VX_INFO:
23118 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
23119 +                       VXH_LOC_ARGS(e),
23120 +                       (e->type==VXH_INIT_VX_INFO)?"init":
23121 +                       ((e->type==VXH_SET_VX_INFO)?"set":"clr"),
23122 +                       VXH_VXI_ARGS(e), e->sc.data);
23123 +               break;
23124 +
23125 +       case VXH_CLAIM_VX_INFO:
23126 +       case VXH_RELEASE_VX_INFO:
23127 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
23128 +                       VXH_LOC_ARGS(e),
23129 +                       (e->type==VXH_CLAIM_VX_INFO)?"claim":"release",
23130 +                       VXH_VXI_ARGS(e), e->sc.data);
23131 +               break;
23132 +
23133 +       case VXH_ALLOC_VX_INFO:
23134 +       case VXH_DEALLOC_VX_INFO:
23135 +               printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
23136 +                       VXH_LOC_ARGS(e),
23137 +                       (e->type==VXH_ALLOC_VX_INFO)?"alloc":"dealloc",
23138 +                       VXH_VXI_ARGS(e));
23139 +               break;
23140 +
23141 +       case VXH_HASH_VX_INFO:
23142 +       case VXH_UNHASH_VX_INFO:
23143 +               printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
23144 +                       VXH_LOC_ARGS(e),
23145 +                       (e->type==VXH_HASH_VX_INFO)?"hash":"unhash",
23146 +                       VXH_VXI_ARGS(e));
23147 +               break;
23148 +
23149 +       case VXH_LOC_VX_INFO:
23150 +       case VXH_LOOKUP_VX_INFO:
23151 +       case VXH_CREATE_VX_INFO:
23152 +               printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
23153 +                       VXH_LOC_ARGS(e),
23154 +                       (e->type==VXH_CREATE_VX_INFO)?"create":
23155 +                       ((e->type==VXH_LOC_VX_INFO)?"loc":"lookup"),
23156 +                       e->ll.arg, VXH_VXI_ARGS(e));
23157 +               break;
23158 +       }
23159 +}
23160 +
23161 +static void __vxh_dump_history(void)
23162 +{
23163 +       unsigned int i,j;
23164 +
23165 +       printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
23166 +               atomic_read(&sequence), NR_CPUS);
23167 +
23168 +       for (i=0; i < VXH_SIZE; i++) {
23169 +               for (j=0; j < NR_CPUS; j++) {
23170 +                       struct _vx_history *hist =
23171 +                               &per_cpu(vx_history_buffer, j);
23172 +                       unsigned int index = (hist->counter-i) % VXH_SIZE;
23173 +                       struct _vx_hist_entry *entry = &hist->entry[index];
23174 +
23175 +                       vxh_dump_entry(entry, j);
23176 +               }
23177 +       }
23178 +}
23179 +
23180 +void   vxh_dump_history(void)
23181 +{
23182 +       vxh_active = 0;
23183 +#ifdef CONFIG_SMP
23184 +       local_irq_enable();
23185 +       smp_send_stop();
23186 +       local_irq_disable();
23187 +#endif
23188 +       __vxh_dump_history();
23189 +}
23190 +
23191 +
23192 +/* vserver syscall commands below here */
23193 +
23194 +
23195 +int    vc_dump_history(uint32_t id)
23196 +{
23197 +       vxh_active = 0;
23198 +       __vxh_dump_history();
23199 +       vxh_active = 1;
23200 +
23201 +       return 0;
23202 +}
23203 +
23204 +EXPORT_SYMBOL_GPL(vxh_advance);
23205 +
23206 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/init.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/init.c
23207 --- linux-2.6.16.20/kernel/vserver/init.c       1970-01-01 01:00:00 +0100
23208 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/init.c  2006-04-26 19:07:00 +0200
23209 @@ -0,0 +1,46 @@
23210 +/*
23211 + *  linux/kernel/init.c
23212 + *
23213 + *  Virtual Server Init
23214 + *
23215 + *  Copyright (C) 2004-2005  Herbert Pötzl
23216 + *
23217 + *  V0.01  basic structure
23218 + *
23219 + */
23220 +
23221 +#include <linux/errno.h>
23222 +#include <linux/init.h>
23223 +#include <linux/module.h>
23224 +
23225 +int    vserver_register_sysctl(void);
23226 +void   vserver_unregister_sysctl(void);
23227 +
23228 +
23229 +static int __init init_vserver(void)
23230 +{
23231 +       int ret = 0;
23232 +
23233 +#ifdef CONFIG_VSERVER_DEBUG
23234 +       vserver_register_sysctl();
23235 +#endif
23236 +       return ret;
23237 +}
23238 +
23239 +
23240 +static void __exit exit_vserver(void)
23241 +{
23242 +
23243 +#ifdef CONFIG_VSERVER_DEBUG
23244 +       vserver_unregister_sysctl();
23245 +#endif
23246 +       return;
23247 +}
23248 +
23249 +long vx_slab[GFP_ZONETYPES];
23250 +long vx_area;
23251 +
23252 +
23253 +module_init(init_vserver);
23254 +module_exit(exit_vserver);
23255 +
23256 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/inode.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/inode.c
23257 --- linux-2.6.16.20/kernel/vserver/inode.c      1970-01-01 01:00:00 +0100
23258 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/inode.c 2006-04-29 02:58:07 +0200
23259 @@ -0,0 +1,369 @@
23260 +/*
23261 + *  linux/kernel/vserver/inode.c
23262 + *
23263 + *  Virtual Server: File System Support
23264 + *
23265 + *  Copyright (C) 2004-2005  Herbert Pötzl
23266 + *
23267 + *  V0.01  separated from vcontext V0.05
23268 + *
23269 + */
23270 +
23271 +#include <linux/sched.h>
23272 +#include <linux/vs_context.h>
23273 +#include <linux/proc_fs.h>
23274 +#include <linux/devpts_fs.h>
23275 +#include <linux/namei.h>
23276 +#include <linux/mount.h>
23277 +#include <linux/parser.h>
23278 +#include <linux/compat.h>
23279 +#include <linux/vserver/inode.h>
23280 +#include <linux/vserver/inode_cmd.h>
23281 +#include <linux/vserver/tag.h>
23282 +
23283 +#include <asm/errno.h>
23284 +#include <asm/uaccess.h>
23285 +
23286 +
23287 +static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
23288 +{
23289 +       struct proc_dir_entry *entry;
23290 +
23291 +       if (!in || !in->i_sb)
23292 +               return -ESRCH;
23293 +
23294 +       *flags = IATTR_TAG
23295 +               | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
23296 +               | (IS_IUNLINK(in) ? IATTR_IUNLINK : 0)
23297 +               | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0);
23298 +       *mask = IATTR_IUNLINK | IATTR_IMMUTABLE;
23299 +
23300 +       if (S_ISDIR(in->i_mode))
23301 +               *mask |= IATTR_BARRIER;
23302 +
23303 +       if (IS_TAGGED(in)) {
23304 +               *tag = in->i_tag;
23305 +               *mask |= IATTR_TAG;
23306 +       }
23307 +
23308 +       switch (in->i_sb->s_magic) {
23309 +       case PROC_SUPER_MAGIC:
23310 +               entry = PROC_I(in)->pde;
23311 +
23312 +               /* check for specific inodes? */
23313 +               if (entry)
23314 +                       *mask |= IATTR_FLAGS;
23315 +               if (entry)
23316 +                       *flags |= (entry->vx_flags & IATTR_FLAGS);
23317 +               else
23318 +                       *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
23319 +               break;
23320 +
23321 +       case DEVPTS_SUPER_MAGIC:
23322 +               *tag = in->i_tag;
23323 +               *mask |= IATTR_TAG;
23324 +               break;
23325 +
23326 +       default:
23327 +               break;
23328 +       }
23329 +       return 0;
23330 +}
23331 +
23332 +int vc_get_iattr(uint32_t id, void __user *data)
23333 +{
23334 +       struct nameidata nd;
23335 +       struct vcmd_ctx_iattr_v1 vc_data = { .xid = -1 };
23336 +       int ret;
23337 +
23338 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23339 +               return -EFAULT;
23340 +
23341 +       ret = user_path_walk_link(vc_data.name, &nd);
23342 +       if (!ret) {
23343 +               ret = __vc_get_iattr(nd.dentry->d_inode,
23344 +                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
23345 +               path_release(&nd);
23346 +       }
23347 +       if (ret)
23348 +               return ret;
23349 +
23350 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
23351 +               ret = -EFAULT;
23352 +       return ret;
23353 +}
23354 +
23355 +#ifdef CONFIG_COMPAT
23356 +
23357 +int vc_get_iattr_x32(uint32_t id, void __user *data)
23358 +{
23359 +       struct nameidata nd;
23360 +       struct vcmd_ctx_iattr_v1_x32 vc_data = { .xid = -1 };
23361 +       int ret;
23362 +
23363 +       if (!vx_check(0, VX_ADMIN))
23364 +               return -ENOSYS;
23365 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23366 +               return -EFAULT;
23367 +
23368 +       ret = user_path_walk_link(compat_ptr(vc_data.name_ptr), &nd);
23369 +       if (!ret) {
23370 +               ret = __vc_get_iattr(nd.dentry->d_inode,
23371 +                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
23372 +               path_release(&nd);
23373 +       }
23374 +       if (ret)
23375 +               return ret;
23376 +
23377 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
23378 +               ret = -EFAULT;
23379 +       return ret;
23380 +}
23381 +
23382 +#endif /* CONFIG_COMPAT */
23383 +
23384 +
23385 +static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
23386 +{
23387 +       struct inode *in = de->d_inode;
23388 +       int error = 0, is_proc = 0, has_tag = 0;
23389 +       struct iattr attr = { 0 };
23390 +
23391 +       if (!in || !in->i_sb)
23392 +               return -ESRCH;
23393 +
23394 +       is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
23395 +       if ((*mask & IATTR_FLAGS) && !is_proc)
23396 +               return -EINVAL;
23397 +
23398 +       has_tag = IS_TAGGED(in) ||
23399 +               (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
23400 +       if ((*mask & IATTR_TAG) && !has_tag)
23401 +               return -EINVAL;
23402 +
23403 +       mutex_lock(&in->i_mutex);
23404 +       if (*mask & IATTR_TAG) {
23405 +               attr.ia_tag = *tag;
23406 +               attr.ia_valid |= ATTR_TAG;
23407 +       }
23408 +
23409 +       if (*mask & IATTR_FLAGS) {
23410 +               struct proc_dir_entry *entry = PROC_I(in)->pde;
23411 +               unsigned int iflags = PROC_I(in)->vx_flags;
23412 +
23413 +               iflags = (iflags & ~(*mask & IATTR_FLAGS))
23414 +                       | (*flags & IATTR_FLAGS);
23415 +               PROC_I(in)->vx_flags = iflags;
23416 +               if (entry)
23417 +                       entry->vx_flags = iflags;
23418 +       }
23419 +
23420 +       if (*mask & (IATTR_BARRIER | IATTR_IUNLINK | IATTR_IMMUTABLE)) {
23421 +               if (*mask & IATTR_IMMUTABLE) {
23422 +                       if (*flags & IATTR_IMMUTABLE)
23423 +                               in->i_flags |= S_IMMUTABLE;
23424 +                       else
23425 +                               in->i_flags &= ~S_IMMUTABLE;
23426 +               }
23427 +               if (*mask & IATTR_IUNLINK) {
23428 +                       if (*flags & IATTR_IUNLINK)
23429 +                               in->i_flags |= S_IUNLINK;
23430 +                       else
23431 +                               in->i_flags &= ~S_IUNLINK;
23432 +               }
23433 +               if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
23434 +                       if (*flags & IATTR_BARRIER)
23435 +                               in->i_flags |= S_BARRIER;
23436 +                       else
23437 +                               in->i_flags &= ~S_BARRIER;
23438 +               }
23439 +               if (in->i_op && in->i_op->sync_flags) {
23440 +                       error = in->i_op->sync_flags(in);
23441 +                       if (error)
23442 +                               goto out;
23443 +               }
23444 +       }
23445 +
23446 +       if (attr.ia_valid) {
23447 +               if (in->i_op && in->i_op->setattr)
23448 +                       error = in->i_op->setattr(de, &attr);
23449 +               else {
23450 +                       error = inode_change_ok(in, &attr);
23451 +                       if (!error)
23452 +                               error = inode_setattr(in, &attr);
23453 +               }
23454 +       }
23455 +
23456 +out:
23457 +       mutex_unlock(&in->i_mutex);
23458 +       return error;
23459 +}
23460 +
23461 +int vc_set_iattr(uint32_t id, void __user *data)
23462 +{
23463 +       struct nameidata nd;
23464 +       struct vcmd_ctx_iattr_v1 vc_data;
23465 +       int ret;
23466 +
23467 +       if (!capable(CAP_LINUX_IMMUTABLE))
23468 +               return -EPERM;
23469 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23470 +               return -EFAULT;
23471 +
23472 +       ret = user_path_walk_link(vc_data.name, &nd);
23473 +       if (!ret) {
23474 +               ret = __vc_set_iattr(nd.dentry,
23475 +                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
23476 +               path_release(&nd);
23477 +       }
23478 +
23479 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
23480 +               ret = -EFAULT;
23481 +       return ret;
23482 +}
23483 +
23484 +#ifdef CONFIG_COMPAT
23485 +
23486 +int vc_set_iattr_x32(uint32_t id, void __user *data)
23487 +{
23488 +       struct nameidata nd;
23489 +       struct vcmd_ctx_iattr_v1_x32 vc_data;
23490 +       int ret;
23491 +
23492 +       if (!capable(CAP_LINUX_IMMUTABLE))
23493 +               return -EPERM;
23494 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23495 +               return -EFAULT;
23496 +
23497 +       ret = user_path_walk_link(compat_ptr(vc_data.name_ptr), &nd);
23498 +       if (!ret) {
23499 +               ret = __vc_set_iattr(nd.dentry,
23500 +                       &vc_data.xid, &vc_data.flags, &vc_data.mask);
23501 +               path_release(&nd);
23502 +       }
23503 +
23504 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
23505 +               ret = -EFAULT;
23506 +       return ret;
23507 +}
23508 +
23509 +#endif /* CONFIG_COMPAT */
23510 +
23511 +#ifdef CONFIG_VSERVER_LEGACY
23512 +
23513 +#define PROC_DYNAMIC_FIRST 0xF0000000UL
23514 +
23515 +int vx_proc_ioctl(struct inode * inode, struct file * filp,
23516 +       unsigned int cmd, unsigned long arg)
23517 +{
23518 +       struct proc_dir_entry *entry;
23519 +       int error = 0;
23520 +       int flags;
23521 +
23522 +       if (inode->i_ino < PROC_DYNAMIC_FIRST)
23523 +               return -ENOTTY;
23524 +
23525 +       entry = PROC_I(inode)->pde;
23526 +       if (!entry)
23527 +               return -ENOTTY;
23528 +
23529 +       switch(cmd) {
23530 +       case FIOC_GETXFLG: {
23531 +               /* fixme: if stealth, return -ENOTTY */
23532 +               error = -EPERM;
23533 +               flags = entry->vx_flags;
23534 +               if (capable(CAP_CONTEXT))
23535 +                       error = put_user(flags, (int __user *) arg);
23536 +               break;
23537 +       }
23538 +       case FIOC_SETXFLG: {
23539 +               /* fixme: if stealth, return -ENOTTY */
23540 +               error = -EPERM;
23541 +               if (!capable(CAP_CONTEXT))
23542 +                       break;
23543 +               error = -EROFS;
23544 +               if (IS_RDONLY(inode))
23545 +                       break;
23546 +               error = -EFAULT;
23547 +               if (get_user(flags, (int __user *) arg))
23548 +                       break;
23549 +               error = 0;
23550 +               entry->vx_flags = flags;
23551 +               break;
23552 +       }
23553 +       default:
23554 +               return -ENOTTY;
23555 +       }
23556 +       return error;
23557 +}
23558 +#endif /* CONFIG_VSERVER_LEGACY */
23559 +
23560 +#ifdef CONFIG_PROPAGATE
23561 +
23562 +int dx_parse_tag(char *string, tag_t *tag, int remove)
23563 +{
23564 +       static match_table_t tokens = {
23565 +               {1, "tagid=%u"},
23566 +               {0, NULL}
23567 +       };
23568 +       substring_t args[MAX_OPT_ARGS];
23569 +       int token, option = 0;
23570 +
23571 +       if (!string)
23572 +               return 0;
23573 +
23574 +       token = match_token(string, tokens, args);
23575 +       if (token && tag && !match_int(args, &option))
23576 +               *tag = option;
23577 +
23578 +       vxdprintk(VXD_CBIT(tag, 7),
23579 +               "dx_parse_tag(»%s«): %d:#%d",
23580 +               string, token, option);
23581 +
23582 +       if ((token == 1) && remove) {
23583 +               char *p = strstr(string, "tagid=");
23584 +               char *q = p;
23585 +
23586 +               if (p) {
23587 +                       while (*q != '\0' && *q != ',')
23588 +                               q++;
23589 +                       while (*q)
23590 +                               *p++ = *q++;
23591 +                       while (*p)
23592 +                               *p++ = '\0';
23593 +               }
23594 +       }
23595 +       return token;
23596 +}
23597 +
23598 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
23599 +{
23600 +       tag_t new_tag = 0;
23601 +       struct vfsmount *mnt;
23602 +       int propagate;
23603 +
23604 +       if (!nd)
23605 +               return;
23606 +       mnt = nd->mnt;
23607 +       if (!mnt)
23608 +               return;
23609 +
23610 +       propagate = (mnt->mnt_flags & MNT_TAGID);
23611 +       if (propagate)
23612 +               new_tag = mnt->mnt_tag;
23613 +
23614 +       vxdprintk(VXD_CBIT(tag, 7),
23615 +               "dx_propagate_tag(%p[#%lu.%d]): %d,%d",
23616 +               inode, inode->i_ino, inode->i_tag,
23617 +               new_tag, (propagate)?1:0);
23618 +
23619 +       if (propagate)
23620 +               inode->i_tag = new_tag;
23621 +}
23622 +
23623 +#include <linux/module.h>
23624 +
23625 +EXPORT_SYMBOL_GPL(__dx_propagate_tag);
23626 +
23627 +#endif /* CONFIG_PROPAGATE */
23628 +
23629 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/legacy.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/legacy.c
23630 --- linux-2.6.16.20/kernel/vserver/legacy.c     1970-01-01 01:00:00 +0100
23631 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/legacy.c        2006-04-28 04:35:46 +0200
23632 @@ -0,0 +1,115 @@
23633 +/*
23634 + *  linux/kernel/vserver/legacy.c
23635 + *
23636 + *  Virtual Server: Legacy Funtions
23637 + *
23638 + *  Copyright (C) 2001-2003  Jacques Gelinas
23639 + *  Copyright (C) 2003-2005  Herbert Pötzl
23640 + *
23641 + *  V0.01  broken out from vcontext.c V0.05
23642 + *
23643 + */
23644 +
23645 +#include <linux/sched.h>
23646 +#include <linux/vs_context.h>
23647 +#include <linux/vs_network.h>
23648 +#include <linux/vserver/legacy.h>
23649 +#include <linux/vserver/namespace.h>
23650 +#include <linux/namespace.h>
23651 +
23652 +#include <asm/errno.h>
23653 +#include <asm/uaccess.h>
23654 +
23655 +
23656 +extern int vx_set_init(struct vx_info *, struct task_struct *);
23657 +
23658 +static int vx_set_initpid(struct vx_info *vxi, int pid)
23659 +{
23660 +       struct task_struct *init;
23661 +
23662 +       init = find_task_by_real_pid(pid);
23663 +       if (!init)
23664 +               return -ESRCH;
23665 +
23666 +       vxi->vx_flags &= ~VXF_STATE_INIT;
23667 +       return vx_set_init(vxi, init);
23668 +}
23669 +
23670 +int vc_new_s_context(uint32_t ctx, void __user *data)
23671 +{
23672 +       int ret = -ENOMEM;
23673 +       struct vcmd_new_s_context_v1 vc_data;
23674 +       struct vx_info *new_vxi;
23675 +
23676 +       if (copy_from_user(&vc_data, data, sizeof(vc_data)))
23677 +               return -EFAULT;
23678 +
23679 +       /* legacy hack, will be removed soon */
23680 +       if (ctx == -2) {
23681 +               /* assign flags and initpid */
23682 +               if (!current->vx_info)
23683 +                       return -EINVAL;
23684 +               ret = 0;
23685 +               if (vc_data.flags & VX_INFO_INIT)
23686 +                       ret = vx_set_initpid(current->vx_info, current->tgid);
23687 +               if (ret == 0) {
23688 +                       /* We keep the same vx_id, but lower the capabilities */
23689 +                       current->vx_info->vx_bcaps &= (~vc_data.remove_cap);
23690 +                       ret = vx_current_xid();
23691 +                       current->vx_info->vx_flags |= vc_data.flags;
23692 +               }
23693 +               return ret;
23694 +       }
23695 +
23696 +       if (!vx_check(0, VX_ADMIN) || !capable(CAP_SYS_ADMIN)
23697 +               /* might make sense in the future, or not ... */
23698 +               || vx_flags(VX_INFO_LOCK, 0))
23699 +               return -EPERM;
23700 +
23701 +       /* ugly hack for Spectator */
23702 +       if (ctx == 1) {
23703 +               current->xid = 1;
23704 +               return 0;
23705 +       }
23706 +
23707 +       if (((ctx > MAX_S_CONTEXT) && (ctx != VX_DYNAMIC_ID)) ||
23708 +               (ctx == 0))
23709 +               return -EINVAL;
23710 +
23711 +       if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT))
23712 +               new_vxi = lookup_or_create_vx_info(ctx);
23713 +       else
23714 +               new_vxi = lookup_vx_info(ctx);
23715 +
23716 +       if (!new_vxi)
23717 +               return -EINVAL;
23718 +
23719 +       ret = -EPERM;
23720 +       if (!vx_info_flags(new_vxi, VXF_STATE_SETUP, 0) &&
23721 +               vx_info_flags(new_vxi, VX_INFO_PRIVATE, 0))
23722 +               goto out_put;
23723 +
23724 +       new_vxi->vx_flags &= ~VXF_STATE_SETUP;
23725 +
23726 +       ret = vx_migrate_task(current, new_vxi);
23727 +       if (ret == 0) {
23728 +               current->vx_info->vx_bcaps &= (~vc_data.remove_cap);
23729 +               new_vxi->vx_flags |= vc_data.flags;
23730 +               if (vc_data.flags & VX_INFO_INIT)
23731 +                       vx_set_initpid(new_vxi, current->tgid);
23732 +               if (vc_data.flags & VX_INFO_NAMESPACE)
23733 +                       vx_set_namespace(new_vxi,
23734 +                               current->namespace, current->fs);
23735 +               if (vc_data.flags & VX_INFO_NPROC)
23736 +                       __rlim_set(&new_vxi->limit, RLIMIT_NPROC,
23737 +                               current->signal->rlim[RLIMIT_NPROC].rlim_max);
23738 +
23739 +               /* tweak some defaults for legacy */
23740 +               new_vxi->vx_flags |= (VXF_HIDE_NETIF|VXF_INFO_INIT);
23741 +               ret = new_vxi->vx_id;
23742 +       }
23743 +out_put:
23744 +       put_vx_info(new_vxi);
23745 +       return ret;
23746 +}
23747 +
23748 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/legacynet.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/legacynet.c
23749 --- linux-2.6.16.20/kernel/vserver/legacynet.c  1970-01-01 01:00:00 +0100
23750 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/legacynet.c     2006-04-26 19:07:00 +0200
23751 @@ -0,0 +1,85 @@
23752 +
23753 +/*
23754 + *  linux/kernel/vserver/legacynet.c
23755 + *
23756 + *  Virtual Server: Legacy Network Funtions
23757 + *
23758 + *  Copyright (C) 2001-2003  Jacques Gelinas
23759 + *  Copyright (C) 2003-2005  Herbert Pötzl
23760 + *
23761 + *  V0.01  broken out from legacy.c
23762 + *
23763 + */
23764 +
23765 +#include <linux/sched.h>
23766 +#include <linux/vs_context.h>
23767 +#include <linux/vs_network.h>
23768 +#include <linux/vserver/legacy.h>
23769 +// #include <linux/vserver/namespace.h>
23770 +#include <linux/namespace.h>
23771 +#include <linux/err.h>
23772 +
23773 +#include <asm/errno.h>
23774 +#include <asm/uaccess.h>
23775 +
23776 +
23777 +extern struct nx_info *create_nx_info(void);
23778 +
23779 +/*  set ipv4 root (syscall) */
23780 +
23781 +int vc_set_ipv4root(uint32_t nbip, void __user *data)
23782 +{
23783 +       int i, err = -EPERM;
23784 +       struct vcmd_set_ipv4root_v3 vc_data;
23785 +       struct nx_info *new_nxi, *nxi = current->nx_info;
23786 +
23787 +       if (nbip < 0 || nbip > NB_IPV4ROOT)
23788 +               return -EINVAL;
23789 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23790 +               return -EFAULT;
23791 +
23792 +       if (!nxi || nxi->ipv4[0] == 0 || capable(CAP_NET_ADMIN))
23793 +               /* We are allowed to change everything */
23794 +               err = 0;
23795 +       else if (nxi) {
23796 +               int found = 0;
23797 +
23798 +               /* We are allowed to select a subset of the currently
23799 +                  installed IP numbers. No new one are allowed
23800 +                  We can't change the broadcast address though */
23801 +               for (i=0; i<nbip; i++) {
23802 +                       int j;
23803 +                       __u32 nxip = vc_data.nx_mask_pair[i].ip;
23804 +                       for (j=0; j<nxi->nbipv4; j++) {
23805 +                               if (nxip == nxi->ipv4[j]) {
23806 +                                       found++;
23807 +                                       break;
23808 +                               }
23809 +                       }
23810 +               }
23811 +               if ((found == nbip) &&
23812 +                       (vc_data.broadcast == nxi->v4_bcast))
23813 +                       err = 0;
23814 +       }
23815 +       if (err)
23816 +               return err;
23817 +
23818 +       new_nxi = create_nx_info();
23819 +       if (IS_ERR(new_nxi))
23820 +               return -EINVAL;
23821 +
23822 +       new_nxi->nbipv4 = nbip;
23823 +       for (i=0; i<nbip; i++) {
23824 +               new_nxi->ipv4[i] = vc_data.nx_mask_pair[i].ip;
23825 +               new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask;
23826 +       }
23827 +       new_nxi->v4_bcast = vc_data.broadcast;
23828 +       if (nxi)
23829 +               printk("!!! switching nx_info %p->%p\n", nxi, new_nxi);
23830 +
23831 +       nx_migrate_task(current, new_nxi);
23832 +       put_nx_info(new_nxi);
23833 +       return 0;
23834 +}
23835 +
23836 +
23837 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/limit.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit.c
23838 --- linux-2.6.16.20/kernel/vserver/limit.c      1970-01-01 01:00:00 +0100
23839 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit.c 2006-05-02 02:12:38 +0200
23840 @@ -0,0 +1,272 @@
23841 +/*
23842 + *  linux/kernel/vserver/limit.c
23843 + *
23844 + *  Virtual Server: Context Limits
23845 + *
23846 + *  Copyright (C) 2004-2005  Herbert Pötzl
23847 + *
23848 + *  V0.01  broken out from vcontext V0.05
23849 + *
23850 + */
23851 +
23852 +#include <linux/module.h>
23853 +#include <linux/vs_context.h>
23854 +#include <linux/vs_limit.h>
23855 +#include <linux/vserver/limit.h>
23856 +#include <linux/vserver/switch.h>
23857 +#include <linux/vserver/limit_cmd.h>
23858 +
23859 +#include <asm/errno.h>
23860 +#include <asm/uaccess.h>
23861 +
23862 +
23863 +const char *vlimit_name[NUM_LIMITS] = {
23864 +       [RLIMIT_CPU]            = "CPU",
23865 +       [RLIMIT_RSS]            = "RSS",
23866 +       [RLIMIT_NPROC]          = "NPROC",
23867 +       [RLIMIT_NOFILE]         = "NOFILE",
23868 +       [RLIMIT_MEMLOCK]        = "VML",
23869 +       [RLIMIT_AS]             = "VM",
23870 +       [RLIMIT_LOCKS]          = "LOCKS",
23871 +       [RLIMIT_SIGPENDING]     = "SIGP",
23872 +       [RLIMIT_MSGQUEUE]       = "MSGQ",
23873 +
23874 +       [VLIMIT_NSOCK]          = "NSOCK",
23875 +       [VLIMIT_OPENFD]         = "OPENFD",
23876 +       [VLIMIT_ANON]           = "ANON",
23877 +       [VLIMIT_SHMEM]          = "SHMEM",
23878 +       [VLIMIT_DENTRY]         = "DENTRY",
23879 +};
23880 +
23881 +EXPORT_SYMBOL_GPL(vlimit_name);
23882 +
23883 +
23884 +static int is_valid_rlimit(int id)
23885 +{
23886 +       int valid = 0;
23887 +
23888 +       switch (id) {
23889 +       case RLIMIT_RSS:
23890 +       case RLIMIT_NPROC:
23891 +       case RLIMIT_NOFILE:
23892 +       case RLIMIT_MEMLOCK:
23893 +       case RLIMIT_AS:
23894 +       case RLIMIT_LOCKS:
23895 +       case RLIMIT_MSGQUEUE:
23896 +
23897 +       case VLIMIT_NSOCK:
23898 +       case VLIMIT_OPENFD:
23899 +       case VLIMIT_ANON:
23900 +       case VLIMIT_SHMEM:
23901 +       case VLIMIT_DENTRY:
23902 +               valid = 1;
23903 +               break;
23904 +       }
23905 +       return valid;
23906 +}
23907 +
23908 +static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
23909 +{
23910 +       rlim_t limit = __rlim_soft(&vxi->limit, id);
23911 +       return VX_VLIM(limit);
23912 +}
23913 +
23914 +static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
23915 +{
23916 +       rlim_t limit = __rlim_hard(&vxi->limit, id);
23917 +       return VX_VLIM(limit);
23918 +}
23919 +
23920 +static int do_get_rlimit(xid_t xid, uint32_t id,
23921 +       uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
23922 +{
23923 +       struct vx_info *vxi;
23924 +
23925 +       if (!is_valid_rlimit(id))
23926 +               return -EINVAL;
23927 +
23928 +       vxi = lookup_vx_info(xid);
23929 +       if (!vxi)
23930 +               return -ESRCH;
23931 +
23932 +       if (minimum)
23933 +               *minimum = CRLIM_UNSET;
23934 +       if (softlimit)
23935 +               *softlimit = vc_get_soft(vxi, id);
23936 +       if (maximum)
23937 +               *maximum = vc_get_hard(vxi, id);
23938 +       put_vx_info(vxi);
23939 +       return 0;
23940 +}
23941 +
23942 +int vc_get_rlimit(uint32_t id, void __user *data)
23943 +{
23944 +       struct vcmd_ctx_rlimit_v0 vc_data;
23945 +       int ret;
23946 +
23947 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23948 +               return -EFAULT;
23949 +
23950 +       ret = do_get_rlimit(id, vc_data.id,
23951 +               &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
23952 +       if (ret)
23953 +               return ret;
23954 +
23955 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
23956 +               return -EFAULT;
23957 +       return 0;
23958 +}
23959 +
23960 +static int do_set_rlimit(xid_t xid, uint32_t id,
23961 +       uint64_t minimum, uint64_t softlimit, uint64_t maximum)
23962 +{
23963 +       struct vx_info *vxi;
23964 +
23965 +       if (!is_valid_rlimit(id))
23966 +               return -EINVAL;
23967 +
23968 +       vxi = lookup_vx_info(xid);
23969 +       if (!vxi)
23970 +               return -ESRCH;
23971 +
23972 +       if (maximum != CRLIM_KEEP)
23973 +               __rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
23974 +       if (softlimit != CRLIM_KEEP)
23975 +               __rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
23976 +
23977 +       /* clamp soft limit */
23978 +       if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
23979 +               __rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
23980 +
23981 +       put_vx_info(vxi);
23982 +       return 0;
23983 +}
23984 +
23985 +int vc_set_rlimit(uint32_t id, void __user *data)
23986 +{
23987 +       struct vcmd_ctx_rlimit_v0 vc_data;
23988 +
23989 +       if (!capable(CAP_SYS_RESOURCE))
23990 +               return -EPERM;
23991 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
23992 +               return -EFAULT;
23993 +
23994 +       return do_set_rlimit(id, vc_data.id,
23995 +               vc_data.minimum, vc_data.softlimit, vc_data.maximum);
23996 +}
23997 +
23998 +#ifdef CONFIG_IA32_EMULATION
23999 +
24000 +int vc_set_rlimit_x32(uint32_t id, void __user *data)
24001 +{
24002 +       struct vcmd_ctx_rlimit_v0_x32 vc_data;
24003 +
24004 +       if (!capable(CAP_SYS_RESOURCE))
24005 +               return -EPERM;
24006 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
24007 +               return -EFAULT;
24008 +
24009 +       return do_set_rlimit(id, vc_data.id,
24010 +               vc_data.minimum, vc_data.softlimit, vc_data.maximum);
24011 +}
24012 +
24013 +int vc_get_rlimit_x32(uint32_t id, void __user *data)
24014 +{
24015 +       struct vcmd_ctx_rlimit_v0_x32 vc_data;
24016 +       int ret;
24017 +
24018 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
24019 +               return -EFAULT;
24020 +
24021 +       ret = do_get_rlimit(id, vc_data.id,
24022 +               &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
24023 +       if (ret)
24024 +               return ret;
24025 +
24026 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
24027 +               return -EFAULT;
24028 +       return 0;
24029 +}
24030 +
24031 +#endif /* CONFIG_IA32_EMULATION */
24032 +
24033 +
24034 +int vc_get_rlimit_mask(uint32_t id, void __user *data)
24035 +{
24036 +       static struct vcmd_ctx_rlimit_mask_v0 mask = {
24037 +                       /* minimum */
24038 +               0
24039 +               ,       /* softlimit */
24040 +               (1 << RLIMIT_RSS) |
24041 +               (1 << VLIMIT_ANON) |
24042 +               0
24043 +               ,       /* maximum */
24044 +               (1 << RLIMIT_RSS) |
24045 +               (1 << RLIMIT_NPROC) |
24046 +               (1 << RLIMIT_NOFILE) |
24047 +               (1 << RLIMIT_MEMLOCK) |
24048 +               (1 << RLIMIT_LOCKS) |
24049 +               (1 << RLIMIT_AS) |
24050 +               (1 << VLIMIT_ANON) |
24051 +               (1 << VLIMIT_DENTRY) |
24052 +               0
24053 +               };
24054 +
24055 +       if (!capable(CAP_SYS_RESOURCE))
24056 +               return -EPERM;
24057 +       if (copy_to_user(data, &mask, sizeof(mask)))
24058 +               return -EFAULT;
24059 +       return 0;
24060 +}
24061 +
24062 +
24063 +void vx_vsi_meminfo(struct sysinfo *val)
24064 +{
24065 +       struct vx_info *vxi = current->vx_info;
24066 +       unsigned long totalram, freeram;
24067 +       rlim_t v;
24068 +
24069 +       /* we blindly accept the max */
24070 +       v = __rlim_soft(&vxi->limit, RLIMIT_RSS);
24071 +       totalram = (v != RLIM_INFINITY) ? v : val->totalram;
24072 +
24073 +       /* total minus used equals free */
24074 +       v = __rlim_get(&vxi->limit, RLIMIT_RSS);
24075 +       freeram = (v < totalram) ? totalram - v : 0;
24076 +
24077 +       val->totalram = totalram;
24078 +       val->freeram = freeram;
24079 +       val->bufferram = 0;
24080 +       val->totalhigh = 0;
24081 +       val->freehigh = 0;
24082 +       return;
24083 +}
24084 +
24085 +void vx_vsi_swapinfo(struct sysinfo *val)
24086 +{
24087 +       struct vx_info *vxi = current->vx_info;
24088 +       unsigned long totalswap, freeswap;
24089 +       rlim_t v, w;
24090 +
24091 +       v = __rlim_soft(&vxi->limit, RLIMIT_RSS);
24092 +       if (v == RLIM_INFINITY) {
24093 +               val->freeswap = val->totalswap;
24094 +               return;
24095 +       }
24096 +
24097 +       /* we blindly accept the max */
24098 +       w = __rlim_hard(&vxi->limit, RLIMIT_RSS);
24099 +       totalswap = (w != RLIM_INFINITY) ? (w - v) : val->totalswap;
24100 +
24101 +       /* currently 'used' swap */
24102 +       w = __rlim_get(&vxi->limit, RLIMIT_RSS);
24103 +       w -= (w > v) ? v : w;
24104 +
24105 +       /* total minus used equals free */
24106 +       freeswap = (w < totalswap) ? totalswap - w : 0;
24107 +
24108 +       val->totalswap = totalswap;
24109 +       val->freeswap = freeswap;
24110 +       return;
24111 +}
24112 +
24113 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/limit_init.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit_init.h
24114 --- linux-2.6.16.20/kernel/vserver/limit_init.h 1970-01-01 01:00:00 +0100
24115 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit_init.h    2006-04-26 19:07:00 +0200
24116 @@ -0,0 +1,31 @@
24117 +
24118 +
24119 +static inline void vx_info_init_limit(struct _vx_limit *limit)
24120 +{
24121 +       int lim;
24122 +
24123 +       for (lim=0; lim<NUM_LIMITS; lim++) {
24124 +               __rlim_soft(limit, lim) = RLIM_INFINITY;
24125 +               __rlim_hard(limit, lim) = RLIM_INFINITY;
24126 +               __rlim_set(limit, lim, 0);
24127 +               atomic_set(&__rlim_lhit(limit, lim), 0);
24128 +               __rlim_rmin(limit, lim) = 0;
24129 +               __rlim_rmax(limit, lim) = 0;
24130 +       }
24131 +}
24132 +
24133 +static inline void vx_info_exit_limit(struct _vx_limit *limit)
24134 +{
24135 +#ifdef CONFIG_VSERVER_DEBUG
24136 +       rlim_t value;
24137 +       int lim;
24138 +
24139 +       for (lim=0; lim<NUM_LIMITS; lim++) {
24140 +               value = __rlim_get(limit, lim);
24141 +               vxwprintk(value,
24142 +                       "!!! limit: %p[%s,%d] = %ld on exit.",
24143 +                       limit, vlimit_name[lim], lim, (long)value);
24144 +       }
24145 +#endif
24146 +}
24147 +
24148 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/limit_proc.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit_proc.h
24149 --- linux-2.6.16.20/kernel/vserver/limit_proc.h 1970-01-01 01:00:00 +0100
24150 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/limit_proc.h    2006-05-02 02:06:16 +0200
24151 @@ -0,0 +1,71 @@
24152 +#ifndef _VX_LIMIT_PROC_H
24153 +#define _VX_LIMIT_PROC_H
24154 +
24155 +
24156 +static inline void vx_limit_fixup(struct _vx_limit *limit)
24157 +{
24158 +       rlim_t value;
24159 +       int lim;
24160 +
24161 +       for (lim=0; lim<NUM_LIMITS; lim++) {
24162 +               value = __rlim_get(limit, lim);
24163 +               if (value > __rlim_rmax(limit, lim))
24164 +                       __rlim_rmax(limit, lim) = value;
24165 +               if (value < __rlim_rmin(limit, lim))
24166 +                       __rlim_rmin(limit, lim) = value;
24167 +               if (__rlim_rmax(limit, lim) > __rlim_hard(limit, lim))
24168 +                       __rlim_rmax(limit, lim) = __rlim_hard(limit, lim);
24169 +       }
24170 +}
24171 +
24172 +
24173 +#define VX_LIMIT_FMT   ":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
24174 +#define VX_LIMIT_TOP   \
24175 +       "Limit\t current\t     min/max\t\t    soft/hard\t\thits\n"
24176 +
24177 +#define VX_LIMIT_ARG(r)                                \
24178 +       ,(unsigned long)__rlim_get(limit, r)    \
24179 +       ,(unsigned long)__rlim_rmin(limit, r)   \
24180 +       ,(unsigned long)__rlim_rmax(limit, r)   \
24181 +       ,VX_VLIM(__rlim_soft(limit, r))         \
24182 +       ,VX_VLIM(__rlim_hard(limit, r))         \
24183 +       ,atomic_read(&__rlim_lhit(limit, r))
24184 +
24185 +static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
24186 +{
24187 +       vx_limit_fixup(limit);
24188 +       return sprintf(buffer, VX_LIMIT_TOP
24189 +               "PROC"  VX_LIMIT_FMT
24190 +               "VM"    VX_LIMIT_FMT
24191 +               "VML"   VX_LIMIT_FMT
24192 +               "RSS"   VX_LIMIT_FMT
24193 +               "ANON"  VX_LIMIT_FMT
24194 +               "FILES" VX_LIMIT_FMT
24195 +               "OFD"   VX_LIMIT_FMT
24196 +               "LOCKS" VX_LIMIT_FMT
24197 +               "SOCK"  VX_LIMIT_FMT
24198 +               "MSGQ"  VX_LIMIT_FMT
24199 +               "SHM"   VX_LIMIT_FMT
24200 +               "SEMA"  VX_LIMIT_FMT
24201 +               "SEMS"  VX_LIMIT_FMT
24202 +               "DENT"  VX_LIMIT_FMT
24203 +               VX_LIMIT_ARG(RLIMIT_NPROC)
24204 +               VX_LIMIT_ARG(RLIMIT_AS)
24205 +               VX_LIMIT_ARG(RLIMIT_MEMLOCK)
24206 +               VX_LIMIT_ARG(RLIMIT_RSS)
24207 +               VX_LIMIT_ARG(VLIMIT_ANON)
24208 +               VX_LIMIT_ARG(RLIMIT_NOFILE)
24209 +               VX_LIMIT_ARG(VLIMIT_OPENFD)
24210 +               VX_LIMIT_ARG(RLIMIT_LOCKS)
24211 +               VX_LIMIT_ARG(VLIMIT_NSOCK)
24212 +               VX_LIMIT_ARG(RLIMIT_MSGQUEUE)
24213 +               VX_LIMIT_ARG(VLIMIT_SHMEM)
24214 +               VX_LIMIT_ARG(VLIMIT_SEMARY)
24215 +               VX_LIMIT_ARG(VLIMIT_NSEMS)
24216 +               VX_LIMIT_ARG(VLIMIT_DENTRY)
24217 +               );
24218 +}
24219 +
24220 +#endif /* _VX_LIMIT_PROC_H */
24221 +
24222 +
24223 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/monitor.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/monitor.c
24224 --- linux-2.6.16.20/kernel/vserver/monitor.c    1970-01-01 01:00:00 +0100
24225 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/monitor.c       2006-04-26 19:07:00 +0200
24226 @@ -0,0 +1,64 @@
24227 +/*
24228 + *  kernel/vserver/monitor.c
24229 + *
24230 + *  Virtual Context Scheduler Monitor
24231 + *
24232 + *  Copyright (C) 2006 Herbert Pötzl
24233 + *
24234 + *  V0.01  basic design
24235 + *
24236 + */
24237 +
24238 +#include <linux/config.h>
24239 +#include <linux/errno.h>
24240 +#include <linux/module.h>
24241 +#include <linux/types.h>
24242 +#include <linux/ctype.h>
24243 +
24244 +#include <asm/uaccess.h>
24245 +#include <asm/atomic.h>
24246 +#include <asm/unistd.h>
24247 +
24248 +#include <linux/vserver/monitor.h>
24249 +
24250 +
24251 +#ifdef CONFIG_VSERVER_MONITOR
24252 +#define VXM_SIZE       CONFIG_VSERVER_MONITOR_SIZE
24253 +#else
24254 +#define VXM_SIZE       64
24255 +#endif
24256 +
24257 +struct _vx_monitor {
24258 +       unsigned int counter;
24259 +
24260 +       struct _vx_mon_entry entry[VXM_SIZE+1];
24261 +};
24262 +
24263 +
24264 +DEFINE_PER_CPU(struct _vx_monitor, vx_monitor_buffer);
24265 +
24266 +unsigned volatile int vxm_active = 1;
24267 +
24268 +static atomic_t sequence = ATOMIC_INIT(0);
24269 +
24270 +
24271 +/*     vxm_advance()
24272 +
24273 +       * requires disabled preemption                          */
24274 +
24275 +struct _vx_mon_entry *vxm_advance(int cpu)
24276 +{
24277 +       struct _vx_monitor *mon = &per_cpu(vx_monitor_buffer, cpu);
24278 +       struct _vx_mon_entry *entry;
24279 +       unsigned int index;
24280 +
24281 +       index = vxm_active ? (mon->counter++ % VXM_SIZE) : VXM_SIZE;
24282 +       entry = &mon->entry[index];
24283 +
24284 +       entry->ev.seq = atomic_inc_return(&sequence);
24285 +       entry->ev.jif = jiffies;
24286 +       return entry;
24287 +}
24288 +
24289 +EXPORT_SYMBOL_GPL(vxm_advance);
24290 +
24291 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/namespace.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/namespace.c
24292 --- linux-2.6.16.20/kernel/vserver/namespace.c  1970-01-01 01:00:00 +0100
24293 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/namespace.c     2006-04-26 19:07:00 +0200
24294 @@ -0,0 +1,120 @@
24295 +/*
24296 + *  linux/kernel/vserver/namespace.c
24297 + *
24298 + *  Virtual Server: Context Namespace Support
24299 + *
24300 + *  Copyright (C) 2003-2005  Herbert Pötzl
24301 + *
24302 + *  V0.01  broken out from context.c 0.07
24303 + *  V0.02  added task locking for namespace
24304 + *
24305 + */
24306 +
24307 +#include <linux/utsname.h>
24308 +#include <linux/sched.h>
24309 +#include <linux/vs_context.h>
24310 +#include <linux/vserver/namespace.h>
24311 +#include <linux/vserver/namespace_cmd.h>
24312 +#include <linux/dcache.h>
24313 +#include <linux/mount.h>
24314 +#include <linux/fs.h>
24315 +
24316 +#include <asm/errno.h>
24317 +#include <asm/uaccess.h>
24318 +
24319 +
24320 +/* namespace functions */
24321 +
24322 +#include <linux/namespace.h>
24323 +
24324 +int vx_set_namespace(struct vx_info *vxi, struct namespace *ns, struct fs_struct *fs)
24325 +{
24326 +       struct fs_struct *fs_copy;
24327 +
24328 +       if (vxi->vx_namespace)
24329 +               return -EPERM;
24330 +       if (!ns || !fs)
24331 +               return -EINVAL;
24332 +
24333 +       fs_copy = copy_fs_struct(fs);
24334 +       if (!fs_copy)
24335 +               return -ENOMEM;
24336 +
24337 +       get_namespace(ns);
24338 +       vxi->vx_namespace = ns;
24339 +       vxi->vx_fs = fs_copy;
24340 +       return 0;
24341 +}
24342 +
24343 +int vc_enter_namespace(uint32_t id, void __user *data)
24344 +{
24345 +       struct vx_info *vxi;
24346 +       struct fs_struct *old_fs, *fs;
24347 +       struct namespace *old_ns;
24348 +       int ret = 0;
24349 +
24350 +       vxi = lookup_vx_info(id);
24351 +       if (!vxi)
24352 +               return -ESRCH;
24353 +
24354 +       ret = -EINVAL;
24355 +       if (!vxi->vx_namespace)
24356 +               goto out_put;
24357 +
24358 +       ret = -ENOMEM;
24359 +       fs = copy_fs_struct(vxi->vx_fs);
24360 +       if (!fs)
24361 +               goto out_put;
24362 +
24363 +       ret = 0;
24364 +       task_lock(current);
24365 +       old_ns = current->namespace;
24366 +       old_fs = current->fs;
24367 +       get_namespace(vxi->vx_namespace);
24368 +       current->namespace = vxi->vx_namespace;
24369 +       current->fs = fs;
24370 +       task_unlock(current);
24371 +
24372 +       put_namespace(old_ns);
24373 +       put_fs_struct(old_fs);
24374 +out_put:
24375 +       put_vx_info(vxi);
24376 +       return ret;
24377 +}
24378 +
24379 +int vc_cleanup_namespace(uint32_t id, void __user *data)
24380 +{
24381 +       // down_write(&current->namespace->sem);
24382 +       spin_lock(&vfsmount_lock);
24383 +       umount_unused(current->namespace->root, current->fs);
24384 +       spin_unlock(&vfsmount_lock);
24385 +       // up_write(&current->namespace->sem);
24386 +       return 0;
24387 +}
24388 +
24389 +int vc_set_namespace(uint32_t id, void __user *data)
24390 +{
24391 +       struct fs_struct *fs;
24392 +       struct namespace *ns;
24393 +       struct vx_info *vxi;
24394 +       int ret;
24395 +
24396 +       vxi = lookup_vx_info(id);
24397 +       if (!vxi)
24398 +               return -ESRCH;
24399 +
24400 +       task_lock(current);
24401 +       fs = current->fs;
24402 +       atomic_inc(&fs->count);
24403 +       ns = current->namespace;
24404 +       get_namespace(current->namespace);
24405 +       task_unlock(current);
24406 +
24407 +       ret = vx_set_namespace(vxi, ns, fs);
24408 +
24409 +       put_namespace(ns);
24410 +       put_fs_struct(fs);
24411 +       put_vx_info(vxi);
24412 +       return ret;
24413 +}
24414 +
24415 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/network.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/network.c
24416 --- linux-2.6.16.20/kernel/vserver/network.c    1970-01-01 01:00:00 +0100
24417 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/network.c       2006-04-27 20:28:48 +0200
24418 @@ -0,0 +1,781 @@
24419 +/*
24420 + *  linux/kernel/vserver/network.c
24421 + *
24422 + *  Virtual Server: Network Support
24423 + *
24424 + *  Copyright (C) 2003-2005  Herbert Pötzl
24425 + *
24426 + *  V0.01  broken out from vcontext V0.05
24427 + *  V0.02  cleaned up implementation
24428 + *  V0.03  added equiv nx commands
24429 + *  V0.04  switch to RCU based hash
24430 + *  V0.05  and back to locking again
24431 + *
24432 + */
24433 +
24434 +#include <linux/slab.h>
24435 +#include <linux/vserver/network_cmd.h>
24436 +#include <linux/rcupdate.h>
24437 +#include <net/tcp.h>
24438 +
24439 +#include <asm/errno.h>
24440 +
24441 +
24442 +/*     __alloc_nx_info()
24443 +
24444 +       * allocate an initialized nx_info struct
24445 +       * doesn't make it visible (hash)                        */
24446 +
24447 +static struct nx_info *__alloc_nx_info(nid_t nid)
24448 +{
24449 +       struct nx_info *new = NULL;
24450 +
24451 +       vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
24452 +
24453 +       /* would this benefit from a slab cache? */
24454 +       new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
24455 +       if (!new)
24456 +               return 0;
24457 +
24458 +       memset (new, 0, sizeof(struct nx_info));
24459 +       new->nx_id = nid;
24460 +       INIT_HLIST_NODE(&new->nx_hlist);
24461 +       atomic_set(&new->nx_usecnt, 0);
24462 +       atomic_set(&new->nx_tasks, 0);
24463 +       new->nx_state = 0;
24464 +
24465 +       new->nx_flags = NXF_INIT_SET;
24466 +
24467 +       /* rest of init goes here */
24468 +
24469 +       vxdprintk(VXD_CBIT(nid, 0),
24470 +               "alloc_nx_info(%d) = %p", nid, new);
24471 +       return new;
24472 +}
24473 +
24474 +/*     __dealloc_nx_info()
24475 +
24476 +       * final disposal of nx_info                             */
24477 +
24478 +static void __dealloc_nx_info(struct nx_info *nxi)
24479 +{
24480 +       vxdprintk(VXD_CBIT(nid, 0),
24481 +               "dealloc_nx_info(%p)", nxi);
24482 +
24483 +       nxi->nx_hlist.next = LIST_POISON1;
24484 +       nxi->nx_id = -1;
24485 +
24486 +       BUG_ON(atomic_read(&nxi->nx_usecnt));
24487 +       BUG_ON(atomic_read(&nxi->nx_tasks));
24488 +
24489 +       nxi->nx_state |= NXS_RELEASED;
24490 +       kfree(nxi);
24491 +}
24492 +
24493 +static void __shutdown_nx_info(struct nx_info *nxi)
24494 +{
24495 +       nxi->nx_state |= NXS_SHUTDOWN;
24496 +       vs_net_change(nxi, VSC_NETDOWN);
24497 +}
24498 +
24499 +/*     exported stuff                                          */
24500 +
24501 +void free_nx_info(struct nx_info *nxi)
24502 +{
24503 +       /* context shutdown is mandatory */
24504 +       BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
24505 +
24506 +       /* context must not be hashed */
24507 +       BUG_ON(nxi->nx_state & NXS_HASHED);
24508 +
24509 +       BUG_ON(atomic_read(&nxi->nx_usecnt));
24510 +       BUG_ON(atomic_read(&nxi->nx_tasks));
24511 +
24512 +       __dealloc_nx_info(nxi);
24513 +}
24514 +
24515 +
24516 +/*     hash table for nx_info hash */
24517 +
24518 +#define NX_HASH_SIZE   13
24519 +
24520 +struct hlist_head nx_info_hash[NX_HASH_SIZE];
24521 +
24522 +static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
24523 +
24524 +
24525 +static inline unsigned int __hashval(nid_t nid)
24526 +{
24527 +       return (nid % NX_HASH_SIZE);
24528 +}
24529 +
24530 +
24531 +
24532 +/*     __hash_nx_info()
24533 +
24534 +       * add the nxi to the global hash table
24535 +       * requires the hash_lock to be held                     */
24536 +
24537 +static inline void __hash_nx_info(struct nx_info *nxi)
24538 +{
24539 +       struct hlist_head *head;
24540 +
24541 +       vxd_assert_lock(&nx_info_hash_lock);
24542 +       vxdprintk(VXD_CBIT(nid, 4),
24543 +               "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
24544 +
24545 +       /* context must not be hashed */
24546 +       BUG_ON(nx_info_state(nxi, NXS_HASHED));
24547 +
24548 +       nxi->nx_state |= NXS_HASHED;
24549 +       head = &nx_info_hash[__hashval(nxi->nx_id)];
24550 +       hlist_add_head(&nxi->nx_hlist, head);
24551 +}
24552 +
24553 +/*     __unhash_nx_info()
24554 +
24555 +       * remove the nxi from the global hash table
24556 +       * requires the hash_lock to be held                     */
24557 +
24558 +static inline void __unhash_nx_info(struct nx_info *nxi)
24559 +{
24560 +       vxd_assert_lock(&nx_info_hash_lock);
24561 +       vxdprintk(VXD_CBIT(nid, 4),
24562 +               "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
24563 +
24564 +       /* context must be hashed */
24565 +       BUG_ON(!nx_info_state(nxi, NXS_HASHED));
24566 +
24567 +       nxi->nx_state &= ~NXS_HASHED;
24568 +       hlist_del(&nxi->nx_hlist);
24569 +}
24570 +
24571 +
24572 +/*     __lookup_nx_info()
24573 +
24574 +       * requires the hash_lock to be held
24575 +       * doesn't increment the nx_refcnt                       */
24576 +
24577 +static inline struct nx_info *__lookup_nx_info(nid_t nid)
24578 +{
24579 +       struct hlist_head *head = &nx_info_hash[__hashval(nid)];
24580 +       struct hlist_node *pos;
24581 +       struct nx_info *nxi;
24582 +
24583 +       vxd_assert_lock(&nx_info_hash_lock);
24584 +       hlist_for_each(pos, head) {
24585 +               nxi = hlist_entry(pos, struct nx_info, nx_hlist);
24586 +
24587 +               if (nxi->nx_id == nid)
24588 +                       goto found;
24589 +       }
24590 +       nxi = NULL;
24591 +found:
24592 +       vxdprintk(VXD_CBIT(nid, 0),
24593 +               "__lookup_nx_info(#%u): %p[#%u]",
24594 +               nid, nxi, nxi?nxi->nx_id:0);
24595 +       return nxi;
24596 +}
24597 +
24598 +
24599 +/*     __nx_dynamic_id()
24600 +
24601 +       * find unused dynamic nid
24602 +       * requires the hash_lock to be held                     */
24603 +
24604 +static inline nid_t __nx_dynamic_id(void)
24605 +{
24606 +       static nid_t seq = MAX_N_CONTEXT;
24607 +       nid_t barrier = seq;
24608 +
24609 +       vxd_assert_lock(&nx_info_hash_lock);
24610 +       do {
24611 +               if (++seq > MAX_N_CONTEXT)
24612 +                       seq = MIN_D_CONTEXT;
24613 +               if (!__lookup_nx_info(seq)) {
24614 +                       vxdprintk(VXD_CBIT(nid, 4),
24615 +                               "__nx_dynamic_id: [#%d]", seq);
24616 +                       return seq;
24617 +               }
24618 +       } while (barrier != seq);
24619 +       return 0;
24620 +}
24621 +
24622 +/*     __create_nx_info()
24623 +
24624 +       * create the requested context
24625 +       * get() and hash it                                     */
24626 +
24627 +static struct nx_info * __create_nx_info(int id)
24628 +{
24629 +       struct nx_info *new, *nxi = NULL;
24630 +
24631 +       vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
24632 +
24633 +       if (!(new = __alloc_nx_info(id)))
24634 +               return ERR_PTR(-ENOMEM);
24635 +
24636 +       /* required to make dynamic xids unique */
24637 +       spin_lock(&nx_info_hash_lock);
24638 +
24639 +       /* dynamic context requested */
24640 +       if (id == NX_DYNAMIC_ID) {
24641 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
24642 +               id = __nx_dynamic_id();
24643 +               if (!id) {
24644 +                       printk(KERN_ERR "no dynamic context available.\n");
24645 +                       nxi = ERR_PTR(-EAGAIN);
24646 +                       goto out_unlock;
24647 +               }
24648 +               new->nx_id = id;
24649 +#else
24650 +               printk(KERN_ERR "dynamic contexts disabled.\n");
24651 +               nxi = ERR_PTR(-EINVAL);
24652 +               goto out_unlock;
24653 +#endif
24654 +       }
24655 +       /* static context requested */
24656 +       else if ((nxi = __lookup_nx_info(id))) {
24657 +               vxdprintk(VXD_CBIT(nid, 0),
24658 +                       "create_nx_info(%d) = %p (already there)", id, nxi);
24659 +               if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
24660 +                       nxi = ERR_PTR(-EBUSY);
24661 +               else
24662 +                       nxi = ERR_PTR(-EEXIST);
24663 +               goto out_unlock;
24664 +       }
24665 +       /* dynamic nid creation blocker */
24666 +       else if (id >= MIN_D_CONTEXT) {
24667 +               vxdprintk(VXD_CBIT(nid, 0),
24668 +                       "create_nx_info(%d) (dynamic rejected)", id);
24669 +               nxi = ERR_PTR(-EINVAL);
24670 +               goto out_unlock;
24671 +       }
24672 +
24673 +       /* new context */
24674 +       vxdprintk(VXD_CBIT(nid, 0),
24675 +               "create_nx_info(%d) = %p (new)", id, new);
24676 +       __hash_nx_info(get_nx_info(new));
24677 +       nxi = new, new = NULL;
24678 +
24679 +out_unlock:
24680 +       spin_unlock(&nx_info_hash_lock);
24681 +       if (new)
24682 +               __dealloc_nx_info(new);
24683 +       return nxi;
24684 +}
24685 +
24686 +
24687 +
24688 +/*     exported stuff                                          */
24689 +
24690 +
24691 +void unhash_nx_info(struct nx_info *nxi)
24692 +{
24693 +       __shutdown_nx_info(nxi);
24694 +       spin_lock(&nx_info_hash_lock);
24695 +       __unhash_nx_info(nxi);
24696 +       spin_unlock(&nx_info_hash_lock);
24697 +}
24698 +
24699 +#ifdef  CONFIG_VSERVER_LEGACYNET
24700 +
24701 +struct nx_info *create_nx_info(void)
24702 +{
24703 +       return __create_nx_info(NX_DYNAMIC_ID);
24704 +}
24705 +
24706 +#endif
24707 +
24708 +/*     lookup_nx_info()
24709 +
24710 +       * search for a nx_info and get() it
24711 +       * negative id means current                             */
24712 +
24713 +struct nx_info *lookup_nx_info(int id)
24714 +{
24715 +       struct nx_info *nxi = NULL;
24716 +
24717 +       if (id < 0) {
24718 +               nxi = get_nx_info(current->nx_info);
24719 +       } else if (id > 1) {
24720 +               spin_lock(&nx_info_hash_lock);
24721 +               nxi = get_nx_info(__lookup_nx_info(id));
24722 +               spin_unlock(&nx_info_hash_lock);
24723 +       }
24724 +       return nxi;
24725 +}
24726 +
24727 +/*     nid_is_hashed()
24728 +
24729 +       * verify that nid is still hashed                       */
24730 +
24731 +int nid_is_hashed(nid_t nid)
24732 +{
24733 +       int hashed;
24734 +
24735 +       spin_lock(&nx_info_hash_lock);
24736 +       hashed = (__lookup_nx_info(nid) != NULL);
24737 +       spin_unlock(&nx_info_hash_lock);
24738 +       return hashed;
24739 +}
24740 +
24741 +
24742 +#ifdef CONFIG_PROC_FS
24743 +
24744 +/*     get_nid_list()
24745 +
24746 +       * get a subset of hashed nids for proc
24747 +       * assumes size is at least one                          */
24748 +
24749 +int get_nid_list(int index, unsigned int *nids, int size)
24750 +{
24751 +       int hindex, nr_nids = 0;
24752 +
24753 +       /* only show current and children */
24754 +       if (!nx_check(0, VX_ADMIN|VX_WATCH)) {
24755 +               if (index > 0)
24756 +                       return 0;
24757 +               nids[nr_nids] = nx_current_nid();
24758 +               return 1;
24759 +       }
24760 +
24761 +       for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
24762 +               struct hlist_head *head = &nx_info_hash[hindex];
24763 +               struct hlist_node *pos;
24764 +
24765 +               spin_lock(&nx_info_hash_lock);
24766 +               hlist_for_each(pos, head) {
24767 +                       struct nx_info *nxi;
24768 +
24769 +                       if (--index > 0)
24770 +                               continue;
24771 +
24772 +                       nxi = hlist_entry(pos, struct nx_info, nx_hlist);
24773 +                       nids[nr_nids] = nxi->nx_id;
24774 +                       if (++nr_nids >= size) {
24775 +                               spin_unlock(&nx_info_hash_lock);
24776 +                               goto out;
24777 +                       }
24778 +               }
24779 +               /* keep the lock time short */
24780 +               spin_unlock(&nx_info_hash_lock);
24781 +       }
24782 +out:
24783 +       return nr_nids;
24784 +}
24785 +#endif
24786 +
24787 +
24788 +/*
24789 + *     migrate task to new network
24790 + *     gets nxi, puts old_nxi on change
24791 + */
24792 +
24793 +int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
24794 +{
24795 +       struct nx_info *old_nxi;
24796 +       int ret = 0;
24797 +
24798 +       if (!p || !nxi)
24799 +               BUG();
24800 +
24801 +       vxdprintk(VXD_CBIT(nid, 5),
24802 +               "nx_migrate_task(%p,%p[#%d.%d.%d])",
24803 +               p, nxi, nxi->nx_id,
24804 +               atomic_read(&nxi->nx_usecnt),
24805 +               atomic_read(&nxi->nx_tasks));
24806 +
24807 +       /* maybe disallow this completely? */
24808 +       old_nxi = task_get_nx_info(p);
24809 +       if (old_nxi == nxi)
24810 +               goto out;
24811 +
24812 +       task_lock(p);
24813 +       if (old_nxi)
24814 +               clr_nx_info(&p->nx_info);
24815 +       claim_nx_info(nxi, p);
24816 +       set_nx_info(&p->nx_info, nxi);
24817 +       p->nid = nxi->nx_id;
24818 +       task_unlock(p);
24819 +
24820 +       vxdprintk(VXD_CBIT(nid, 5),
24821 +               "moved task %p into nxi:%p[#%d]",
24822 +               p, nxi, nxi->nx_id);
24823 +
24824 +       if (old_nxi)
24825 +               release_nx_info(old_nxi, p);
24826 +out:
24827 +       put_nx_info(old_nxi);
24828 +       return ret;
24829 +}
24830 +
24831 +
24832 +#ifdef CONFIG_INET
24833 +
24834 +#include <linux/netdevice.h>
24835 +#include <linux/inetdevice.h>
24836 +
24837 +int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
24838 +{
24839 +       if (!nxi)
24840 +               return 1;
24841 +       if (!ifa)
24842 +               return 0;
24843 +       return addr_in_nx_info(nxi, ifa->ifa_address);
24844 +}
24845 +
24846 +int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
24847 +{
24848 +       struct in_device *in_dev;
24849 +       struct in_ifaddr **ifap;
24850 +       struct in_ifaddr *ifa;
24851 +       int ret = 0;
24852 +
24853 +       if (!nxi)
24854 +               return 1;
24855 +
24856 +       in_dev = in_dev_get(dev);
24857 +       if (!in_dev)
24858 +               goto out;
24859 +
24860 +       for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
24861 +               ifap = &ifa->ifa_next) {
24862 +               if (addr_in_nx_info(nxi, ifa->ifa_address)) {
24863 +                       ret = 1;
24864 +                       break;
24865 +               }
24866 +       }
24867 +       in_dev_put(in_dev);
24868 +out:
24869 +       return ret;
24870 +}
24871 +
24872 +/*
24873 + *     check if address is covered by socket
24874 + *
24875 + *     sk:     the socket to check against
24876 + *     addr:   the address in question (must be != 0)
24877 + */
24878 +static inline int __addr_in_socket(struct sock *sk, uint32_t addr)
24879 +{
24880 +       struct nx_info *nxi = sk->sk_nx_info;
24881 +       uint32_t saddr = inet_rcv_saddr(sk);
24882 +
24883 +       vxdprintk(VXD_CBIT(net, 5),
24884 +               "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
24885 +               sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
24886 +               (sk->sk_socket?sk->sk_socket->flags:0));
24887 +
24888 +       if (saddr) {
24889 +               /* direct address match */
24890 +               return (saddr == addr);
24891 +       } else if (nxi) {
24892 +               /* match against nx_info */
24893 +               return addr_in_nx_info(nxi, addr);
24894 +       } else {
24895 +               /* unrestricted any socket */
24896 +               return 1;
24897 +       }
24898 +}
24899 +
24900 +
24901 +int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk)
24902 +{
24903 +       vxdprintk(VXD_CBIT(net, 2),
24904 +               "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
24905 +               nxi, sk, VXD_QUAD(addr));
24906 +
24907 +       if (addr) {
24908 +               /* check real address */
24909 +               return __addr_in_socket(sk, addr);
24910 +       } else if (nxi) {
24911 +               /* check against nx_info */
24912 +               int i, n = nxi->nbipv4;
24913 +
24914 +               for (i=0; i<n; i++)
24915 +                       if (__addr_in_socket(sk, nxi->ipv4[i]))
24916 +                               return 1;
24917 +               return 0;
24918 +       } else {
24919 +               /* check against any */
24920 +               return 1;
24921 +       }
24922 +}
24923 +
24924 +#endif /* CONFIG_INET */
24925 +
24926 +void nx_set_persistent(struct nx_info *nxi)
24927 +{
24928 +       if (nx_info_flags(nxi, NXF_PERSISTENT, 0)) {
24929 +               get_nx_info(nxi);
24930 +               claim_nx_info(nxi, current);
24931 +       } else {
24932 +               release_nx_info(nxi, current);
24933 +               put_nx_info(nxi);
24934 +       }
24935 +}
24936 +
24937 +/* vserver syscall commands below here */
24938 +
24939 +/* taks nid and nx_info functions */
24940 +
24941 +#include <asm/uaccess.h>
24942 +
24943 +
24944 +int vc_task_nid(uint32_t id, void __user *data)
24945 +{
24946 +       nid_t nid;
24947 +
24948 +       if (id) {
24949 +               struct task_struct *tsk;
24950 +
24951 +               if (!vx_check(0, VX_ADMIN|VX_WATCH))
24952 +                       return -EPERM;
24953 +
24954 +               read_lock(&tasklist_lock);
24955 +               tsk = find_task_by_real_pid(id);
24956 +               nid = (tsk) ? tsk->nid : -ESRCH;
24957 +               read_unlock(&tasklist_lock);
24958 +       }
24959 +       else
24960 +               nid = nx_current_nid();
24961 +       return nid;
24962 +}
24963 +
24964 +
24965 +int vc_nx_info(uint32_t id, void __user *data)
24966 +{
24967 +       struct nx_info *nxi;
24968 +       struct vcmd_nx_info_v0 vc_data;
24969 +
24970 +       if (!capable(CAP_SYS_RESOURCE))
24971 +               return -EPERM;
24972 +
24973 +       nxi = lookup_nx_info(id);
24974 +       if (!nxi)
24975 +               return -ESRCH;
24976 +
24977 +       vc_data.nid = nxi->nx_id;
24978 +       put_nx_info(nxi);
24979 +
24980 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
24981 +               return -EFAULT;
24982 +       return 0;
24983 +}
24984 +
24985 +
24986 +/* network functions */
24987 +
24988 +int vc_net_create(uint32_t nid, void __user *data)
24989 +{
24990 +       struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
24991 +       struct nx_info *new_nxi;
24992 +       int ret;
24993 +
24994 +       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
24995 +               return -EFAULT;
24996 +
24997 +       if ((nid > MAX_S_CONTEXT) && (nid != VX_DYNAMIC_ID))
24998 +               return -EINVAL;
24999 +       if (nid < 2)
25000 +               return -EINVAL;
25001 +
25002 +       new_nxi = __create_nx_info(nid);
25003 +       if (IS_ERR(new_nxi))
25004 +               return PTR_ERR(new_nxi);
25005 +
25006 +       /* initial flags */
25007 +       new_nxi->nx_flags = vc_data.flagword;
25008 +
25009 +       /* get a reference for persistent contexts */
25010 +       if ((vc_data.flagword & NXF_PERSISTENT))
25011 +               nx_set_persistent(new_nxi);
25012 +
25013 +       vs_net_change(new_nxi, VSC_NETUP);
25014 +       ret = new_nxi->nx_id;
25015 +       nx_migrate_task(current, new_nxi);
25016 +       /* if this fails, we might end up with a hashed nx_info */
25017 +       put_nx_info(new_nxi);
25018 +       return ret;
25019 +}
25020 +
25021 +
25022 +int vc_net_migrate(uint32_t id, void __user *data)
25023 +{
25024 +       struct nx_info *nxi;
25025 +
25026 +       nxi = lookup_nx_info(id);
25027 +       if (!nxi)
25028 +               return -ESRCH;
25029 +       nx_migrate_task(current, nxi);
25030 +       put_nx_info(nxi);
25031 +       return 0;
25032 +}
25033 +
25034 +int vc_net_add(uint32_t nid, void __user *data)
25035 +{
25036 +       struct vcmd_net_addr_v0 vc_data;
25037 +       struct nx_info *nxi;
25038 +       int index, pos, ret = 0;
25039 +
25040 +       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
25041 +               return -EFAULT;
25042 +
25043 +       switch (vc_data.type) {
25044 +       case NXA_TYPE_IPV4:
25045 +               if ((vc_data.count < 1) || (vc_data.count > 4))
25046 +                       return -EINVAL;
25047 +               break;
25048 +
25049 +       default:
25050 +               break;
25051 +       }
25052 +
25053 +       nxi = lookup_nx_info(nid);
25054 +       if (!nxi)
25055 +               return -ESRCH;
25056 +
25057 +       switch (vc_data.type) {
25058 +       case NXA_TYPE_IPV4:
25059 +               index = 0;
25060 +               while ((index < vc_data.count) &&
25061 +                       ((pos = nxi->nbipv4) < NB_IPV4ROOT)) {
25062 +                       nxi->ipv4[pos] = vc_data.ip[index];
25063 +                       nxi->mask[pos] = vc_data.mask[index];
25064 +                       index++;
25065 +                       nxi->nbipv4++;
25066 +               }
25067 +               ret = index;
25068 +               break;
25069 +
25070 +       case NXA_TYPE_IPV4|NXA_MOD_BCAST:
25071 +               nxi->v4_bcast = vc_data.ip[0];
25072 +               ret = 1;
25073 +               break;
25074 +
25075 +       default:
25076 +               ret = -EINVAL;
25077 +               break;
25078 +       }
25079 +
25080 +       put_nx_info(nxi);
25081 +       return ret;
25082 +}
25083 +
25084 +int vc_net_remove(uint32_t nid, void __user *data)
25085 +{
25086 +       struct vcmd_net_addr_v0 vc_data;
25087 +       struct nx_info *nxi;
25088 +       int ret = 0;
25089 +
25090 +       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
25091 +               return -EFAULT;
25092 +
25093 +       nxi = lookup_nx_info(nid);
25094 +       if (!nxi)
25095 +               return -ESRCH;
25096 +
25097 +       switch ((unsigned)vc_data.type) {
25098 +       case NXA_TYPE_ANY:
25099 +               nxi->nbipv4 = 0;
25100 +               break;
25101 +
25102 +       default:
25103 +               ret = -EINVAL;
25104 +               break;
25105 +       }
25106 +
25107 +       put_nx_info(nxi);
25108 +       return ret;
25109 +}
25110 +
25111 +int vc_get_nflags(uint32_t id, void __user *data)
25112 +{
25113 +       struct nx_info *nxi;
25114 +       struct vcmd_net_flags_v0 vc_data;
25115 +
25116 +       nxi = lookup_nx_info(id);
25117 +       if (!nxi)
25118 +               return -ESRCH;
25119 +
25120 +       vc_data.flagword = nxi->nx_flags;
25121 +
25122 +       /* special STATE flag handling */
25123 +       vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, NXF_ONE_TIME);
25124 +
25125 +       put_nx_info(nxi);
25126 +
25127 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
25128 +               return -EFAULT;
25129 +       return 0;
25130 +}
25131 +
25132 +int vc_set_nflags(uint32_t id, void __user *data)
25133 +{
25134 +       struct nx_info *nxi;
25135 +       struct vcmd_net_flags_v0 vc_data;
25136 +       uint64_t mask, trigger;
25137 +
25138 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
25139 +               return -EFAULT;
25140 +
25141 +       nxi = lookup_nx_info(id);
25142 +       if (!nxi)
25143 +               return -ESRCH;
25144 +
25145 +       /* special STATE flag handling */
25146 +       mask = vx_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
25147 +       trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
25148 +
25149 +       nxi->nx_flags = vx_mask_flags(nxi->nx_flags,
25150 +               vc_data.flagword, mask);
25151 +       if (trigger & NXF_PERSISTENT)
25152 +               nx_set_persistent(nxi);
25153 +
25154 +       put_nx_info(nxi);
25155 +       return 0;
25156 +}
25157 +
25158 +int vc_get_ncaps(uint32_t id, void __user *data)
25159 +{
25160 +       struct nx_info *nxi;
25161 +       struct vcmd_net_caps_v0 vc_data;
25162 +
25163 +       nxi = lookup_nx_info(id);
25164 +       if (!nxi)
25165 +               return -ESRCH;
25166 +
25167 +       vc_data.ncaps = nxi->nx_ncaps;
25168 +       vc_data.cmask = ~0UL;
25169 +       put_nx_info(nxi);
25170 +
25171 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
25172 +               return -EFAULT;
25173 +       return 0;
25174 +}
25175 +
25176 +int vc_set_ncaps(uint32_t id, void __user *data)
25177 +{
25178 +       struct nx_info *nxi;
25179 +       struct vcmd_net_caps_v0 vc_data;
25180 +
25181 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
25182 +               return -EFAULT;
25183 +
25184 +       nxi = lookup_nx_info(id);
25185 +       if (!nxi)
25186 +               return -ESRCH;
25187 +
25188 +       nxi->nx_ncaps = vx_mask_flags(nxi->nx_ncaps,
25189 +               vc_data.ncaps, vc_data.cmask);
25190 +       put_nx_info(nxi);
25191 +       return 0;
25192 +}
25193 +
25194 +
25195 +#include <linux/module.h>
25196 +
25197 +EXPORT_SYMBOL_GPL(free_nx_info);
25198 +EXPORT_SYMBOL_GPL(unhash_nx_info);
25199 +
25200 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/proc.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/proc.c
25201 --- linux-2.6.16.20/kernel/vserver/proc.c       1970-01-01 01:00:00 +0100
25202 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/proc.c  2006-05-27 19:15:19 +0200
25203 @@ -0,0 +1,921 @@
25204 +/*
25205 + *  linux/kernel/vserver/proc.c
25206 + *
25207 + *  Virtual Context Support
25208 + *
25209 + *  Copyright (C) 2003-2005  Herbert Pötzl
25210 + *
25211 + *  V0.01  basic structure
25212 + *  V0.02  adaptation vs1.3.0
25213 + *  V0.03  proc permissions
25214 + *  V0.04  locking/generic
25215 + *  V0.05  next generation procfs
25216 + *  V0.06  inode validation
25217 + *  V0.07  generic rewrite vid
25218 + *
25219 + */
25220 +
25221 +#include <linux/errno.h>
25222 +#include <linux/proc_fs.h>
25223 +#include <linux/sched.h>
25224 +#include <linux/vs_context.h>
25225 +#include <linux/vs_network.h>
25226 +#include <linux/vs_cvirt.h>
25227 +
25228 +#include <linux/vserver/switch.h>
25229 +#include <linux/vserver/global.h>
25230 +
25231 +#include <asm/uaccess.h>
25232 +#include <asm/unistd.h>
25233 +
25234 +#include "cvirt_proc.h"
25235 +#include "limit_proc.h"
25236 +#include "sched_proc.h"
25237 +#include "vci_config.h"
25238 +
25239 +static struct proc_dir_entry *proc_virtual;
25240 +
25241 +static struct proc_dir_entry *proc_vnet;
25242 +
25243 +
25244 +enum vid_directory_inos {
25245 +       PROC_XID_INO = 32,
25246 +       PROC_XID_INFO,
25247 +       PROC_XID_STATUS,
25248 +       PROC_XID_LIMIT,
25249 +       PROC_XID_SCHED,
25250 +       PROC_XID_CVIRT,
25251 +       PROC_XID_CACCT,
25252 +
25253 +       PROC_NID_INO = 64,
25254 +       PROC_NID_INFO,
25255 +       PROC_NID_STATUS,
25256 +};
25257 +
25258 +#define PROC_VID_MASK  0x60
25259 +
25260 +
25261 +/* first the actual feeds */
25262 +
25263 +
25264 +static int proc_virtual_info(int vid, char *buffer)
25265 +{
25266 +       return sprintf(buffer,
25267 +               "VCIVersion:\t%04x:%04x\n"
25268 +               "VCISyscall:\t%d\n"
25269 +               "VCIKernel:\t%08x\n"
25270 +               ,VCI_VERSION >> 16
25271 +               ,VCI_VERSION & 0xFFFF
25272 +               ,__NR_vserver
25273 +               ,vci_kernel_config()
25274 +               );
25275 +}
25276 +
25277 +static int proc_virtual_status(int vid, char *buffer)
25278 +{
25279 +       return sprintf(buffer,
25280 +               "#CTotal:\t%d\n"
25281 +               "#CActive:\t%d\n"
25282 +               ,atomic_read(&vx_global_ctotal)
25283 +               ,atomic_read(&vx_global_cactive)
25284 +               );
25285 +}
25286 +
25287 +
25288 +int proc_xid_info (int vid, char *buffer)
25289 +{
25290 +       struct vx_info *vxi;
25291 +       int length;
25292 +
25293 +       vxi = lookup_vx_info(vid);
25294 +       if (!vxi)
25295 +               return 0;
25296 +       length = sprintf(buffer,
25297 +               "ID:\t%d\n"
25298 +               "Info:\t%p\n"
25299 +               "Init:\t%d\n"
25300 +               ,vxi->vx_id
25301 +               ,vxi
25302 +               ,vxi->vx_initpid
25303 +               );
25304 +       put_vx_info(vxi);
25305 +       return length;
25306 +}
25307 +
25308 +int proc_xid_status (int vid, char *buffer)
25309 +{
25310 +       struct vx_info *vxi;
25311 +       int length;
25312 +
25313 +       vxi = lookup_vx_info(vid);
25314 +       if (!vxi)
25315 +               return 0;
25316 +       length = sprintf(buffer,
25317 +               "UseCnt:\t%d\n"
25318 +               "Tasks:\t%d\n"
25319 +               "Flags:\t%016llx\n"
25320 +               "BCaps:\t%016llx\n"
25321 +               "CCaps:\t%016llx\n"
25322 +//             "Ticks:\t%d\n"
25323 +               ,atomic_read(&vxi->vx_usecnt)
25324 +               ,atomic_read(&vxi->vx_tasks)
25325 +               ,(unsigned long long)vxi->vx_flags
25326 +               ,(unsigned long long)vxi->vx_bcaps
25327 +               ,(unsigned long long)vxi->vx_ccaps
25328 +//             ,atomic_read(&vxi->limit.ticks)
25329 +               );
25330 +       put_vx_info(vxi);
25331 +       return length;
25332 +}
25333 +
25334 +int proc_xid_limit (int vid, char *buffer)
25335 +{
25336 +       struct vx_info *vxi;
25337 +       int length;
25338 +
25339 +       vxi = lookup_vx_info(vid);
25340 +       if (!vxi)
25341 +               return 0;
25342 +       length = vx_info_proc_limit(&vxi->limit, buffer);
25343 +       put_vx_info(vxi);
25344 +       return length;
25345 +}
25346 +
25347 +int proc_xid_sched (int vid, char *buffer)
25348 +{
25349 +       struct vx_info *vxi;
25350 +       int cpu, length;
25351 +
25352 +       vxi = lookup_vx_info(vid);
25353 +       if (!vxi)
25354 +               return 0;
25355 +       length = vx_info_proc_sched(&vxi->sched, buffer);
25356 +       for_each_online_cpu(cpu) {
25357 +               length += vx_info_proc_sched_pc(
25358 +                       &vx_per_cpu(vxi, sched_pc, cpu),
25359 +                       buffer + length, cpu);
25360 +       }
25361 +       put_vx_info(vxi);
25362 +       return length;
25363 +}
25364 +
25365 +int proc_xid_cvirt (int vid, char *buffer)
25366 +{
25367 +       struct vx_info *vxi;
25368 +       int cpu, length;
25369 +
25370 +       vxi = lookup_vx_info(vid);
25371 +       if (!vxi)
25372 +               return 0;
25373 +       vx_update_load(vxi);
25374 +       length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
25375 +       for_each_online_cpu(cpu) {
25376 +               length += vx_info_proc_cvirt_pc(
25377 +                       &vx_per_cpu(vxi, cvirt_pc, cpu),
25378 +                       buffer + length, cpu);
25379 +       }
25380 +       put_vx_info(vxi);
25381 +       return length;
25382 +}
25383 +
25384 +int proc_xid_cacct (int vid, char *buffer)
25385 +{
25386 +       struct vx_info *vxi;
25387 +       int length;
25388 +
25389 +       vxi = lookup_vx_info(vid);
25390 +       if (!vxi)
25391 +               return 0;
25392 +       length = vx_info_proc_cacct(&vxi->cacct, buffer);
25393 +       put_vx_info(vxi);
25394 +       return length;
25395 +}
25396 +
25397 +
25398 +static int proc_vnet_info(int vid, char *buffer)
25399 +{
25400 +       return sprintf(buffer,
25401 +               "VCIVersion:\t%04x:%04x\n"
25402 +               "VCISyscall:\t%d\n"
25403 +               ,VCI_VERSION >> 16
25404 +               ,VCI_VERSION & 0xFFFF
25405 +               ,__NR_vserver
25406 +               );
25407 +}
25408 +
25409 +
25410 +int proc_nid_info (int vid, char *buffer)
25411 +{
25412 +       struct nx_info *nxi;
25413 +       int length, i;
25414 +
25415 +       nxi = lookup_nx_info(vid);
25416 +       if (!nxi)
25417 +               return 0;
25418 +       length = sprintf(buffer,
25419 +               "ID:\t%d\n"
25420 +               "Info:\t%p\n"
25421 +               ,nxi->nx_id
25422 +               ,nxi
25423 +               );
25424 +       for (i=0; i<nxi->nbipv4; i++) {
25425 +               length += sprintf(buffer + length,
25426 +                       "%d:\t" NIPQUAD_FMT "/" NIPQUAD_FMT "\n", i,
25427 +                       NIPQUAD(nxi->ipv4[i]), NIPQUAD(nxi->mask[i]));
25428 +       }
25429 +       put_nx_info(nxi);
25430 +       return length;
25431 +}
25432 +
25433 +int proc_nid_status (int vid, char *buffer)
25434 +{
25435 +       struct nx_info *nxi;
25436 +       int length;
25437 +
25438 +       nxi = lookup_nx_info(vid);
25439 +       if (!nxi)
25440 +               return 0;
25441 +       length = sprintf(buffer,
25442 +               "UseCnt:\t%d\n"
25443 +               "Tasks:\t%d\n"
25444 +               ,atomic_read(&nxi->nx_usecnt)
25445 +               ,atomic_read(&nxi->nx_tasks)
25446 +               );
25447 +       put_nx_info(nxi);
25448 +       return length;
25449 +}
25450 +
25451 +/* here the inode helpers */
25452 +
25453 +
25454 +#define fake_ino(id,nr) (((nr) & 0xFFFF) | \
25455 +                       (((id) & 0xFFFF) << 16))
25456 +
25457 +#define inode_vid(i)   (((i)->i_ino >> 16) & 0xFFFF)
25458 +#define inode_type(i)  ((i)->i_ino & 0xFFFF)
25459 +
25460 +#define MAX_MULBY10    ((~0U-9)/10)
25461 +
25462 +
25463 +static struct inode *proc_vid_make_inode(struct super_block * sb,
25464 +       int vid, int ino)
25465 +{
25466 +       struct inode *inode = new_inode(sb);
25467 +
25468 +       if (!inode)
25469 +               goto out;
25470 +
25471 +       inode->i_mtime = inode->i_atime =
25472 +               inode->i_ctime = CURRENT_TIME;
25473 +       inode->i_ino = fake_ino(vid, ino);
25474 +
25475 +       inode->i_uid = 0;
25476 +       inode->i_gid = 0;
25477 +out:
25478 +       return inode;
25479 +}
25480 +
25481 +static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd)
25482 +{
25483 +       struct inode * inode = dentry->d_inode;
25484 +       int vid, hashed=0;
25485 +
25486 +       vid = inode_vid(inode);
25487 +       switch (inode_type(inode) & PROC_VID_MASK) {
25488 +               case PROC_XID_INO:
25489 +                       hashed = xid_is_hashed(vid);
25490 +                       break;
25491 +               case PROC_NID_INO:
25492 +                       hashed = nid_is_hashed(vid);
25493 +                       break;
25494 +       }
25495 +       if (hashed)
25496 +               return 1;
25497 +       d_drop(dentry);
25498 +       return 0;
25499 +}
25500 +
25501 +
25502 +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
25503 +
25504 +static ssize_t proc_vid_info_read(struct file * file, char __user * buf,
25505 +                         size_t count, loff_t *ppos)
25506 +{
25507 +       struct inode * inode = file->f_dentry->d_inode;
25508 +       unsigned long page;
25509 +       ssize_t length;
25510 +       int vid;
25511 +
25512 +       if (count > PROC_BLOCK_SIZE)
25513 +               count = PROC_BLOCK_SIZE;
25514 +       if (!(page = __get_free_page(GFP_KERNEL)))
25515 +               return -ENOMEM;
25516 +
25517 +       vid = inode_vid(inode);
25518 +       length = PROC_I(inode)->op.proc_vid_read(vid, (char*)page);
25519 +
25520 +       if (length >= 0)
25521 +               length = simple_read_from_buffer(buf, count, ppos,
25522 +                       (char *)page, length);
25523 +       free_page(page);
25524 +       return length;
25525 +}
25526 +
25527 +
25528 +
25529 +
25530 +
25531 +/* here comes the lower level (vid) */
25532 +
25533 +static struct file_operations proc_vid_info_file_operations = {
25534 +       .read =         proc_vid_info_read,
25535 +};
25536 +
25537 +static struct dentry_operations proc_vid_dentry_operations = {
25538 +       .d_revalidate = proc_vid_revalidate,
25539 +};
25540 +
25541 +
25542 +struct vid_entry {
25543 +       int type;
25544 +       int len;
25545 +       char *name;
25546 +       mode_t mode;
25547 +};
25548 +
25549 +#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
25550 +
25551 +static struct vid_entry vx_base_stuff[] = {
25552 +       E(PROC_XID_INFO,        "info",         S_IFREG|S_IRUGO),
25553 +       E(PROC_XID_STATUS,      "status",       S_IFREG|S_IRUGO),
25554 +       E(PROC_XID_LIMIT,       "limit",        S_IFREG|S_IRUGO),
25555 +       E(PROC_XID_SCHED,       "sched",        S_IFREG|S_IRUGO),
25556 +       E(PROC_XID_CVIRT,       "cvirt",        S_IFREG|S_IRUGO),
25557 +       E(PROC_XID_CACCT,       "cacct",        S_IFREG|S_IRUGO),
25558 +       {0,0,NULL,0}
25559 +};
25560 +
25561 +static struct vid_entry vn_base_stuff[] = {
25562 +       E(PROC_NID_INFO,        "info",         S_IFREG|S_IRUGO),
25563 +       E(PROC_NID_STATUS,      "status",       S_IFREG|S_IRUGO),
25564 +       {0,0,NULL,0}
25565 +};
25566 +
25567 +
25568 +
25569 +static struct dentry *proc_vid_lookup(struct inode *dir,
25570 +       struct dentry *dentry, struct nameidata *nd)
25571 +{
25572 +       struct inode *inode;
25573 +       struct vid_entry *p;
25574 +       int error;
25575 +
25576 +       error = -ENOENT;
25577 +       inode = NULL;
25578 +
25579 +       switch (inode_type(dir)) {
25580 +               case PROC_XID_INO:
25581 +                       p = vx_base_stuff;
25582 +                       break;
25583 +               case PROC_NID_INO:
25584 +                       p = vn_base_stuff;
25585 +                       break;
25586 +               default:
25587 +                       goto out;
25588 +       }
25589 +
25590 +       for (; p->name; p++) {
25591 +               if (p->len != dentry->d_name.len)
25592 +                       continue;
25593 +               if (!memcmp(dentry->d_name.name, p->name, p->len))
25594 +                       break;
25595 +       }
25596 +       if (!p->name)
25597 +               goto out;
25598 +
25599 +       error = -EINVAL;
25600 +       inode = proc_vid_make_inode(dir->i_sb, inode_vid(dir), p->type);
25601 +       if (!inode)
25602 +               goto out;
25603 +
25604 +       switch(p->type) {
25605 +       case PROC_XID_INFO:
25606 +               PROC_I(inode)->op.proc_vid_read = proc_xid_info;
25607 +               break;
25608 +       case PROC_XID_STATUS:
25609 +               PROC_I(inode)->op.proc_vid_read = proc_xid_status;
25610 +               break;
25611 +       case PROC_XID_LIMIT:
25612 +               PROC_I(inode)->op.proc_vid_read = proc_xid_limit;
25613 +               break;
25614 +       case PROC_XID_SCHED:
25615 +               PROC_I(inode)->op.proc_vid_read = proc_xid_sched;
25616 +               break;
25617 +       case PROC_XID_CVIRT:
25618 +               PROC_I(inode)->op.proc_vid_read = proc_xid_cvirt;
25619 +               break;
25620 +       case PROC_XID_CACCT:
25621 +               PROC_I(inode)->op.proc_vid_read = proc_xid_cacct;
25622 +               break;
25623 +
25624 +       case PROC_NID_INFO:
25625 +               PROC_I(inode)->op.proc_vid_read = proc_nid_info;
25626 +               break;
25627 +       case PROC_NID_STATUS:
25628 +               PROC_I(inode)->op.proc_vid_read = proc_nid_status;
25629 +               break;
25630 +
25631 +       default:
25632 +               printk("procfs: impossible type (%d)",p->type);
25633 +               iput(inode);
25634 +               return ERR_PTR(-EINVAL);
25635 +       }
25636 +       inode->i_mode = p->mode;
25637 +       inode->i_fop = &proc_vid_info_file_operations;
25638 +       inode->i_nlink = 1;
25639 +       inode->i_flags|=S_IMMUTABLE;
25640 +
25641 +       dentry->d_op = &proc_vid_dentry_operations;
25642 +       d_add(dentry, inode);
25643 +       error = 0;
25644 +out:
25645 +       return ERR_PTR(error);
25646 +}
25647 +
25648 +
25649 +static int proc_vid_readdir(struct file * filp,
25650 +       void * dirent, filldir_t filldir)
25651 +{
25652 +       int i, size;
25653 +       struct inode *inode = filp->f_dentry->d_inode;
25654 +       struct vid_entry *p;
25655 +
25656 +       i = filp->f_pos;
25657 +       switch (i) {
25658 +       case 0:
25659 +               if (filldir(dirent, ".", 1, i,
25660 +                       inode->i_ino, DT_DIR) < 0)
25661 +                       return 0;
25662 +               i++;
25663 +               filp->f_pos++;
25664 +               /* fall through */
25665 +       case 1:
25666 +               if (filldir(dirent, "..", 2, i,
25667 +                       PROC_ROOT_INO, DT_DIR) < 0)
25668 +                       return 0;
25669 +               i++;
25670 +               filp->f_pos++;
25671 +               /* fall through */
25672 +       default:
25673 +               i -= 2;
25674 +               switch (inode_type(inode)) {
25675 +               case PROC_XID_INO:
25676 +                       size = sizeof(vx_base_stuff);
25677 +                       p = vx_base_stuff + i;
25678 +                       break;
25679 +               case PROC_NID_INO:
25680 +                       size = sizeof(vn_base_stuff);
25681 +                       p = vn_base_stuff + i;
25682 +                       break;
25683 +               default:
25684 +                       return 1;
25685 +               }
25686 +               if (i >= size/sizeof(struct vid_entry))
25687 +                       return 1;
25688 +               while (p->name) {
25689 +                       if (filldir(dirent, p->name, p->len,
25690 +                               filp->f_pos, fake_ino(inode_vid(inode),
25691 +                               p->type), p->mode >> 12) < 0)
25692 +                               return 0;
25693 +                       filp->f_pos++;
25694 +                       p++;
25695 +               }
25696 +       }
25697 +       return 1;
25698 +}
25699 +
25700 +
25701 +
25702 +
25703 +/* now the upper level (virtual) */
25704 +
25705 +static struct file_operations proc_vid_file_operations = {
25706 +       .read =         generic_read_dir,
25707 +       .readdir =      proc_vid_readdir,
25708 +};
25709 +
25710 +static struct inode_operations proc_vid_inode_operations = {
25711 +       .lookup =       proc_vid_lookup,
25712 +};
25713 +
25714 +
25715 +
25716 +static __inline__ int atovid(const char *str, int len)
25717 +{
25718 +       int vid, c;
25719 +
25720 +       vid = 0;
25721 +       while (len-- > 0) {
25722 +               c = *str - '0';
25723 +               str++;
25724 +               if (c > 9)
25725 +                       return -1;
25726 +               if (vid >= MAX_MULBY10)
25727 +                       return -1;
25728 +               vid *= 10;
25729 +               vid += c;
25730 +               if (!vid)
25731 +                       return -1;
25732 +       }
25733 +       return vid;
25734 +}
25735 +
25736 +static __inline__ unsigned long atoaddr(const char *str, int len)
25737 +{
25738 +       unsigned long addr, c;
25739 +
25740 +       addr = 0;
25741 +       while (len-- > 0) {
25742 +               c = *str - '0';
25743 +               if (c > 9)
25744 +                       c -= 'A'-'0'+10;
25745 +               if (c > 15)
25746 +                       c -= 'a'-'A';
25747 +               if (c > 15)
25748 +                       return -1;
25749 +               str++;
25750 +               if (addr >= ((1 << 28) - 1))
25751 +                       return -1;
25752 +               addr = (addr << 4) | c;
25753 +               if (!addr)
25754 +                       return -1;
25755 +       }
25756 +       return addr;
25757 +}
25758 +
25759 +
25760 +struct dentry *proc_virtual_lookup(struct inode *dir,
25761 +       struct dentry * dentry, struct nameidata *nd)
25762 +{
25763 +       int xid, len, ret;
25764 +       struct vx_info *vxi;
25765 +       const char *name;
25766 +       struct inode *inode;
25767 +
25768 +       name = dentry->d_name.name;
25769 +       len = dentry->d_name.len;
25770 +       ret = -ENOMEM;
25771 +
25772 +#if 0
25773 +       if (len == 7 && !memcmp(name, "current", 7)) {
25774 +               inode = new_inode(dir->i_sb);
25775 +               if (!inode)
25776 +                       goto out;
25777 +               inode->i_mtime = inode->i_atime =
25778 +                       inode->i_ctime = CURRENT_TIME;
25779 +               inode->i_ino = fake_ino(1, PROC_XID_INO);
25780 +               inode->i_mode = S_IFLNK|S_IRWXUGO;
25781 +               inode->i_uid = inode->i_gid = 0;
25782 +               d_add(dentry, inode);
25783 +               return NULL;
25784 +       }
25785 +#endif
25786 +       if (len == 4 && !memcmp(name, "info", 4)) {
25787 +               inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_INFO);
25788 +               if (!inode)
25789 +                       goto out;
25790 +               inode->i_fop = &proc_vid_info_file_operations;
25791 +               PROC_I(inode)->op.proc_vid_read = proc_virtual_info;
25792 +               inode->i_mode = S_IFREG|S_IRUGO;
25793 +               d_add(dentry, inode);
25794 +               return NULL;
25795 +       }
25796 +       if (len == 6 && !memcmp(name, "status", 6)) {
25797 +               inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_STATUS);
25798 +               if (!inode)
25799 +                       goto out;
25800 +               inode->i_fop = &proc_vid_info_file_operations;
25801 +               PROC_I(inode)->op.proc_vid_read = proc_virtual_status;
25802 +               inode->i_mode = S_IFREG|S_IRUGO;
25803 +               d_add(dentry, inode);
25804 +               return NULL;
25805 +       }
25806 +
25807 +       ret = -ENOENT;
25808 +       xid = atovid(name, len);
25809 +       if (xid < 0)
25810 +               goto out;
25811 +       vxi = lookup_vx_info(xid);
25812 +       if (!vxi)
25813 +               goto out;
25814 +
25815 +       inode = NULL;
25816 +       if (vx_check(xid, VX_ADMIN|VX_WATCH|VX_IDENT))
25817 +               inode = proc_vid_make_inode(dir->i_sb,
25818 +                       vxi->vx_id, PROC_XID_INO);
25819 +       if (!inode)
25820 +               goto out_release;
25821 +
25822 +       inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
25823 +       inode->i_op = &proc_vid_inode_operations;
25824 +       inode->i_fop = &proc_vid_file_operations;
25825 +       inode->i_nlink = 2;
25826 +       inode->i_flags|=S_IMMUTABLE;
25827 +
25828 +       dentry->d_op = &proc_vid_dentry_operations;
25829 +       d_add(dentry, inode);
25830 +       ret = 0;
25831 +
25832 +out_release:
25833 +       put_vx_info(vxi);
25834 +out:
25835 +       return ERR_PTR(ret);
25836 +}
25837 +
25838 +
25839 +struct dentry *proc_vnet_lookup(struct inode *dir,
25840 +       struct dentry * dentry, struct nameidata *nd)
25841 +{
25842 +       int nid, len, ret;
25843 +       struct nx_info *nxi;
25844 +       const char *name;
25845 +       struct inode *inode;
25846 +
25847 +       name = dentry->d_name.name;
25848 +       len = dentry->d_name.len;
25849 +       ret = -ENOMEM;
25850 +#if 0
25851 +       if (len == 7 && !memcmp(name, "current", 7)) {
25852 +               inode = new_inode(dir->i_sb);
25853 +               if (!inode)
25854 +                       goto out;
25855 +               inode->i_mtime = inode->i_atime =
25856 +                       inode->i_ctime = CURRENT_TIME;
25857 +               inode->i_ino = fake_ino(1, PROC_NID_INO);
25858 +               inode->i_mode = S_IFLNK|S_IRWXUGO;
25859 +               inode->i_uid = inode->i_gid = 0;
25860 +               d_add(dentry, inode);
25861 +               return NULL;
25862 +       }
25863 +#endif
25864 +       if (len == 4 && !memcmp(name, "info", 4)) {
25865 +               inode = proc_vid_make_inode(dir->i_sb, 0, PROC_NID_INFO);
25866 +               if (!inode)
25867 +                       goto out;
25868 +               inode->i_fop = &proc_vid_info_file_operations;
25869 +               PROC_I(inode)->op.proc_vid_read = proc_vnet_info;
25870 +               inode->i_mode = S_IFREG|S_IRUGO;
25871 +               d_add(dentry, inode);
25872 +               return NULL;
25873 +       }
25874 +
25875 +       ret = -ENOENT;
25876 +       nid = atovid(name, len);
25877 +       if (nid < 0)
25878 +               goto out;
25879 +       nxi = lookup_nx_info(nid);
25880 +       if (!nxi)
25881 +               goto out;
25882 +
25883 +       inode = NULL;
25884 +       if (1)
25885 +               inode = proc_vid_make_inode(dir->i_sb,
25886 +                       nxi->nx_id, PROC_NID_INO);
25887 +       if (!inode)
25888 +               goto out_release;
25889 +
25890 +       inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
25891 +       inode->i_op = &proc_vid_inode_operations;
25892 +       inode->i_fop = &proc_vid_file_operations;
25893 +       inode->i_nlink = 2;
25894 +       inode->i_flags|=S_IMMUTABLE;
25895 +
25896 +       dentry->d_op = &proc_vid_dentry_operations;
25897 +       d_add(dentry, inode);
25898 +       ret = 0;
25899 +
25900 +out_release:
25901 +       put_nx_info(nxi);
25902 +out:
25903 +       return ERR_PTR(ret);
25904 +}
25905 +
25906 +
25907 +
25908 +
25909 +#define PROC_NUMBUF 10
25910 +#define PROC_MAXVIDS 32
25911 +
25912 +int proc_virtual_readdir(struct file * filp,
25913 +       void * dirent, filldir_t filldir)
25914 +{
25915 +       unsigned int xid_array[PROC_MAXVIDS];
25916 +       char buf[PROC_NUMBUF];
25917 +       unsigned int nr = filp->f_pos-3;
25918 +       unsigned int nr_xids, i;
25919 +       int visible = vx_check(0, VX_ADMIN|VX_WATCH);
25920 +       ino_t ino;
25921 +
25922 +       switch ((long)filp->f_pos) {
25923 +       case 0:
25924 +               ino = fake_ino(0, PROC_XID_INO);
25925 +               if (filldir(dirent, ".", 1,
25926 +                       filp->f_pos, ino, DT_DIR) < 0)
25927 +                       return 0;
25928 +               filp->f_pos++;
25929 +               /* fall through */
25930 +       case 1:
25931 +               ino = filp->f_dentry->d_parent->d_inode->i_ino;
25932 +               if (filldir(dirent, "..", 2,
25933 +                       filp->f_pos, ino, DT_DIR) < 0)
25934 +                       return 0;
25935 +               filp->f_pos++;
25936 +               /* fall through */
25937 +       case 2:
25938 +               if (visible) {
25939 +                       ino = fake_ino(0, PROC_XID_INFO);
25940 +                       if (filldir(dirent, "info", 4,
25941 +                               filp->f_pos, ino, DT_REG) < 0)
25942 +                               return 0;
25943 +               }
25944 +               filp->f_pos++;
25945 +               /* fall through */
25946 +       case 3:
25947 +               ino = fake_ino(0, PROC_XID_STATUS);
25948 +               if (filldir(dirent, "status", 6,
25949 +                       filp->f_pos, ino, DT_REG) < 0)
25950 +                       return 0;
25951 +               filp->f_pos++;
25952 +               /* fall through */
25953 +       }
25954 +
25955 +       nr_xids = get_xid_list(nr, xid_array, PROC_MAXVIDS);
25956 +       for (i = 0; i < nr_xids; i++) {
25957 +               int xid = xid_array[i];
25958 +               ino_t ino = fake_ino(xid, PROC_XID_INO);
25959 +               unsigned int j = PROC_NUMBUF;
25960 +
25961 +               do buf[--j] = '0' + (xid % 10); while (xid/=10);
25962 +
25963 +               if (filldir(dirent, buf+j, PROC_NUMBUF-j,
25964 +                       filp->f_pos, ino, DT_DIR) < 0)
25965 +                       break;
25966 +               filp->f_pos++;
25967 +       }
25968 +       return 0;
25969 +}
25970 +
25971 +
25972 +static struct file_operations proc_virtual_dir_operations = {
25973 +       .read =         generic_read_dir,
25974 +       .readdir =      proc_virtual_readdir,
25975 +};
25976 +
25977 +static struct inode_operations proc_virtual_dir_inode_operations = {
25978 +       .lookup =       proc_virtual_lookup,
25979 +};
25980 +
25981 +
25982 +int proc_vnet_readdir(struct file * filp,
25983 +       void * dirent, filldir_t filldir)
25984 +{
25985 +       unsigned int nid_array[PROC_MAXVIDS];
25986 +       char buf[PROC_NUMBUF];
25987 +       unsigned int nr = filp->f_pos-2;
25988 +       unsigned int nr_nids, i;
25989 +//     int visible = vx_check(0, VX_ADMIN|VX_WATCH);
25990 +       ino_t ino;
25991 +
25992 +       switch ((long)filp->f_pos) {
25993 +       case 0:
25994 +               ino = fake_ino(0, PROC_NID_INO);
25995 +               if (filldir(dirent, ".", 1,
25996 +                       filp->f_pos, ino, DT_DIR) < 0)
25997 +                       return 0;
25998 +               filp->f_pos++;
25999 +               /* fall through */
26000 +       case 1:
26001 +               ino = filp->f_dentry->d_parent->d_inode->i_ino;
26002 +               if (filldir(dirent, "..", 2,
26003 +                       filp->f_pos, ino, DT_DIR) < 0)
26004 +                       return 0;
26005 +               filp->f_pos++;
26006 +               /* fall through */
26007 +       case 2:
26008 +               ino = fake_ino(0, PROC_NID_INFO);
26009 +               if (filldir(dirent, "info", 4,
26010 +                       filp->f_pos, ino, DT_REG) < 0)
26011 +                       return 0;
26012 +               filp->f_pos++;
26013 +               /* fall through */
26014 +       }
26015 +
26016 +       nr_nids = get_nid_list(nr, nid_array, PROC_MAXVIDS);
26017 +       for (i = 0; i < nr_nids; i++) {
26018 +               int nid = nid_array[i];
26019 +               ino_t ino = fake_ino(nid, PROC_NID_INO);
26020 +               unsigned long j = PROC_NUMBUF;
26021 +
26022 +               do buf[--j] = '0' + (nid % 10); while (nid/=10);
26023 +
26024 +               if (filldir(dirent, buf+j, PROC_NUMBUF-j,
26025 +                       filp->f_pos, ino, DT_DIR) < 0)
26026 +                       break;
26027 +               filp->f_pos++;
26028 +       }
26029 +       return 0;
26030 +}
26031 +
26032 +
26033 +static struct file_operations proc_vnet_dir_operations = {
26034 +       .read =         generic_read_dir,
26035 +       .readdir =      proc_vnet_readdir,
26036 +};
26037 +
26038 +static struct inode_operations proc_vnet_dir_inode_operations = {
26039 +       .lookup =       proc_vnet_lookup,
26040 +};
26041 +
26042 +
26043 +
26044 +void proc_vx_init(void)
26045 +{
26046 +       struct proc_dir_entry *ent;
26047 +
26048 +       ent = proc_mkdir("virtual", 0);
26049 +       if (ent) {
26050 +               ent->proc_fops = &proc_virtual_dir_operations;
26051 +               ent->proc_iops = &proc_virtual_dir_inode_operations;
26052 +       }
26053 +       proc_virtual = ent;
26054 +
26055 +       ent = proc_mkdir("virtnet", 0);
26056 +       if (ent) {
26057 +               ent->proc_fops = &proc_vnet_dir_operations;
26058 +               ent->proc_iops = &proc_vnet_dir_inode_operations;
26059 +       }
26060 +       proc_vnet = ent;
26061 +}
26062 +
26063 +
26064 +
26065 +
26066 +/* per pid info */
26067 +
26068 +
26069 +int proc_pid_vx_info(struct task_struct *p, char *buffer)
26070 +{
26071 +       struct vx_info *vxi;
26072 +       char * orig = buffer;
26073 +
26074 +       buffer += sprintf (buffer,"XID:\t%d\n", vx_task_xid(p));
26075 +       if (vx_flags(VXF_INFO_HIDE, 0))
26076 +               goto out;
26077 +
26078 +       vxi = task_get_vx_info(p);
26079 +       if (!vxi)
26080 +               goto out;
26081 +
26082 +       buffer += sprintf (buffer,"BCaps:\t%016llx\n"
26083 +               ,(unsigned long long)vxi->vx_bcaps);
26084 +       buffer += sprintf (buffer,"CCaps:\t%016llx\n"
26085 +               ,(unsigned long long)vxi->vx_ccaps);
26086 +       buffer += sprintf (buffer,"CFlags:\t%016llx\n"
26087 +               ,(unsigned long long)vxi->vx_flags);
26088 +       buffer += sprintf (buffer,"CIPid:\t%d\n"
26089 +               ,vxi->vx_initpid);
26090 +
26091 +       put_vx_info(vxi);
26092 +out:
26093 +       return buffer - orig;
26094 +}
26095 +
26096 +
26097 +int proc_pid_nx_info(struct task_struct *p, char *buffer)
26098 +{
26099 +       struct nx_info *nxi;
26100 +       char * orig = buffer;
26101 +       int i;
26102 +
26103 +       buffer += sprintf (buffer,"NID:\t%d\n", nx_task_nid(p));
26104 +       if (vx_flags(VXF_INFO_HIDE, 0))
26105 +               goto out;
26106 +       nxi = task_get_nx_info(p);
26107 +       if (!nxi)
26108 +               goto out;
26109 +
26110 +       for (i=0; i<nxi->nbipv4; i++){
26111 +               buffer += sprintf (buffer,
26112 +                       "V4Root[%d]:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i
26113 +                       ,NIPQUAD(nxi->ipv4[i])
26114 +                       ,NIPQUAD(nxi->mask[i]));
26115 +       }
26116 +       buffer += sprintf (buffer,
26117 +               "V4Root[bcast]:\t%d.%d.%d.%d\n"
26118 +               ,NIPQUAD(nxi->v4_bcast));
26119 +
26120 +       put_nx_info(nxi);
26121 +out:
26122 +       return buffer - orig;
26123 +}
26124 +
26125 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/sched.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched.c
26126 --- linux-2.6.16.20/kernel/vserver/sched.c      1970-01-01 01:00:00 +0100
26127 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched.c 2006-04-29 02:58:07 +0200
26128 @@ -0,0 +1,334 @@
26129 +/*
26130 + *  linux/kernel/vserver/sched.c
26131 + *
26132 + *  Virtual Server: Scheduler Support
26133 + *
26134 + *  Copyright (C) 2004-2005  Herbert Pötzl
26135 + *
26136 + *  V0.01  adapted Sam Vilains version to 2.6.3
26137 + *  V0.02  removed legacy interface
26138 + *
26139 + */
26140 +
26141 +#include <linux/sched.h>
26142 +#include <linux/vs_context.h>
26143 +#include <linux/vs_sched.h>
26144 +#include <linux/vserver/sched_cmd.h>
26145 +
26146 +#include <asm/errno.h>
26147 +#include <asm/uaccess.h>
26148 +
26149 +#define vxd_check_range(val, min, max) do {            \
26150 +       vxlprintk((val<min) || (val>max),               \
26151 +               "check_range(%ld,%ld,%ld)",             \
26152 +               (long)val, (long)min, (long)max,        \
26153 +               __FILE__, __LINE__);                    \
26154 +       } while (0)
26155 +
26156 +
26157 +void vx_update_sched_param(struct _vx_sched *sched,
26158 +       struct _vx_sched_pc *sched_pc)
26159 +{
26160 +       unsigned int set_mask = sched->update_mask;
26161 +
26162 +       if (set_mask & VXSM_FILL_RATE)
26163 +               sched_pc->fill_rate[0] = sched->fill_rate[0];
26164 +       if (set_mask & VXSM_INTERVAL)
26165 +               sched_pc->interval[0] = sched->interval[0];
26166 +       if (set_mask & VXSM_FILL_RATE2)
26167 +               sched_pc->fill_rate[1] = sched->fill_rate[1];
26168 +       if (set_mask & VXSM_INTERVAL2)
26169 +               sched_pc->interval[1] = sched->interval[1];
26170 +       if (set_mask & VXSM_TOKENS)
26171 +               sched_pc->tokens = sched->tokens;
26172 +       if (set_mask & VXSM_TOKENS_MIN)
26173 +               sched_pc->tokens_min = sched->tokens_min;
26174 +       if (set_mask & VXSM_TOKENS_MAX)
26175 +               sched_pc->tokens_max = sched->tokens_max;
26176 +
26177 +       if (set_mask & VXSM_IDLE_TIME)
26178 +               sched_pc->flags |= VXSF_IDLE_TIME;
26179 +       else
26180 +               sched_pc->flags &= ~VXSF_IDLE_TIME;
26181 +
26182 +       /* reset time */
26183 +       sched_pc->norm_time = jiffies;
26184 +}
26185 +
26186 +
26187 +/*
26188 + * recalculate the context's scheduling tokens
26189 + *
26190 + * ret > 0 : number of tokens available
26191 + * ret < 0 : on hold, check delta_min[]
26192 + *          -1 only jiffies
26193 + *          -2 also idle time
26194 + *
26195 + */
26196 +int vx_tokens_recalc(struct _vx_sched_pc *sched_pc,
26197 +       unsigned long *norm_time, unsigned long *idle_time, int delta_min[2])
26198 +{
26199 +       long delta;
26200 +       long tokens = 0;
26201 +       int flags = sched_pc->flags;
26202 +
26203 +       /* how much time did pass? */
26204 +       delta = *norm_time - sched_pc->norm_time;
26205 +       vxd_check_range(delta, 0, INT_MAX);
26206 +
26207 +       if (delta >= sched_pc->interval[0]) {
26208 +               long tokens, integral;
26209 +
26210 +               /* calc integral token part */
26211 +               tokens = delta / sched_pc->interval[0];
26212 +               integral = tokens * sched_pc->interval[0];
26213 +               tokens *= sched_pc->fill_rate[0];
26214 +#ifdef CONFIG_VSERVER_HARDCPU
26215 +               delta_min[0] = delta - integral;
26216 +               vxd_check_range(delta_min[0], 0, sched_pc->interval[0]);
26217 +#endif
26218 +               /* advance time */
26219 +               sched_pc->norm_time += delta;
26220 +
26221 +               /* add tokens */
26222 +               sched_pc->tokens += tokens;
26223 +               sched_pc->token_time += tokens;
26224 +       }
26225 +       else
26226 +               delta_min[0] = delta;
26227 +
26228 +#ifdef CONFIG_VSERVER_IDLETIME
26229 +       if (!(flags & VXSF_IDLE_TIME))
26230 +               goto skip_idle;
26231 +
26232 +       /* how much was the idle skip? */
26233 +       delta = *idle_time - sched_pc->idle_time;
26234 +       vxd_check_range(delta, 0, INT_MAX);
26235 +
26236 +       if (delta >= sched_pc->interval[1]) {
26237 +               long tokens, integral;
26238 +
26239 +               /* calc fair share token part */
26240 +               tokens = delta / sched_pc->interval[1];
26241 +               integral = tokens * sched_pc->interval[1];
26242 +               tokens *= sched_pc->fill_rate[1];
26243 +               delta_min[1] = delta - integral;
26244 +               vxd_check_range(delta_min[1], 0, sched_pc->interval[1]);
26245 +
26246 +               /* advance idle time */
26247 +               sched_pc->idle_time += integral;
26248 +
26249 +               /* add tokens */
26250 +               sched_pc->tokens += tokens;
26251 +               sched_pc->token_time += tokens;
26252 +       }
26253 +       else
26254 +               delta_min[1] = delta;
26255 +skip_idle:
26256 +#endif
26257 +
26258 +       /* clip at maximum */
26259 +       if (sched_pc->tokens > sched_pc->tokens_max)
26260 +               sched_pc->tokens = sched_pc->tokens_max;
26261 +       tokens = sched_pc->tokens;
26262 +
26263 +       if ((flags & VXSF_ONHOLD)) {
26264 +               /* can we unhold? */
26265 +               if (tokens >= sched_pc->tokens_min) {
26266 +                       flags &= ~VXSF_ONHOLD;
26267 +                       sched_pc->hold_ticks +=
26268 +                               *norm_time - sched_pc->onhold;
26269 +               }
26270 +               else
26271 +                       goto on_hold;
26272 +       } else {
26273 +               /* put on hold? */
26274 +               if (tokens <= 0) {
26275 +                       flags |= VXSF_ONHOLD;
26276 +                       sched_pc->onhold = *norm_time;
26277 +                       goto on_hold;
26278 +               }
26279 +       }
26280 +       sched_pc->flags = flags;
26281 +       return tokens;
26282 +
26283 +on_hold:
26284 +       tokens = sched_pc->tokens_min - tokens;
26285 +       sched_pc->flags = flags;
26286 +       BUG_ON(tokens < 0);
26287 +
26288 +#ifdef CONFIG_VSERVER_HARDCPU
26289 +       /* next interval? */
26290 +       if (!sched_pc->fill_rate[0])
26291 +               delta_min[0] = HZ;
26292 +       else if (tokens > sched_pc->fill_rate[0])
26293 +               delta_min[0] += sched_pc->interval[0] *
26294 +                       tokens / sched_pc->fill_rate[0];
26295 +       vxd_check_range(delta_min[0], 0, INT_MAX);
26296 +
26297 +#ifdef CONFIG_VSERVER_IDLETIME
26298 +       if (!(flags & VXSF_IDLE_TIME))
26299 +               return -1;
26300 +
26301 +       /* next interval? */
26302 +       if (!sched_pc->fill_rate[1])
26303 +               delta_min[1] = HZ;
26304 +       else if (tokens > sched_pc->fill_rate[1])
26305 +               delta_min[1] += sched_pc->interval[1] *
26306 +                       tokens / sched_pc->fill_rate[1];
26307 +       vxd_check_range(delta_min[1], 0, INT_MAX);
26308 +
26309 +       return -2;
26310 +#else
26311 +       return -1;
26312 +#endif /* CONFIG_VSERVER_IDLETIME */
26313 +#else
26314 +       return 0;
26315 +#endif /* CONFIG_VSERVER_HARDCPU */
26316 +}
26317 +
26318 +
26319 +static int do_set_sched(struct vx_info *vxi, struct vcmd_set_sched_v4 *data)
26320 +{
26321 +       unsigned int set_mask = data->set_mask;
26322 +       unsigned int update_mask;
26323 +
26324 +       /* Sanity check data values */
26325 +       if (data->fill_rate < 0)
26326 +               data->fill_rate = 1;
26327 +       if (data->interval <= 0)
26328 +               data->interval = HZ;
26329 +       if (data->tokens_max <= 0)
26330 +               data->tokens_max = HZ;
26331 +       if (data->tokens_min < 0)
26332 +               data->tokens_min = data->fill_rate*3;
26333 +       if (data->tokens_min >= data->tokens_max)
26334 +               data->tokens_min = data->tokens_max;
26335 +
26336 +       if (data->prio_bias > MAX_PRIO_BIAS)
26337 +               data->prio_bias = MAX_PRIO_BIAS;
26338 +       if (data->prio_bias < MIN_PRIO_BIAS)
26339 +               data->prio_bias = MIN_PRIO_BIAS;
26340 +
26341 +       spin_lock(&vxi->sched.tokens_lock);
26342 +
26343 +       if (set_mask & VXSM_FILL_RATE)
26344 +               vxi->sched.fill_rate[0] = data->fill_rate;
26345 +       if (set_mask & VXSM_INTERVAL)
26346 +               vxi->sched.interval[0] = data->interval;
26347 +       if (set_mask & VXSM_FILL_RATE2)
26348 +               vxi->sched.fill_rate[1] = data->fill_rate;
26349 +       if (set_mask & VXSM_INTERVAL2)
26350 +               vxi->sched.interval[1] = data->interval;
26351 +       if (set_mask & VXSM_TOKENS)
26352 +               vxi->sched.tokens = data->tokens;
26353 +       if (set_mask & VXSM_TOKENS_MIN)
26354 +               vxi->sched.tokens_min = data->tokens_min;
26355 +       if (set_mask & VXSM_TOKENS_MAX)
26356 +               vxi->sched.tokens_max = data->tokens_max;
26357 +       if (set_mask & VXSM_PRIO_BIAS)
26358 +               vxi->sched.prio_bias = data->prio_bias;
26359 +
26360 +       update_mask = vxi->sched.update_mask & VXSM_SET_MASK;
26361 +       update_mask |= (set_mask & (VXSM_SET_MASK|VXSM_IDLE_TIME));
26362 +       vxi->sched.update_mask = update_mask;
26363 +#ifdef CONFIG_SMP
26364 +       rmb();
26365 +       if (set_mask & VXSM_CPU_ID)
26366 +               vxi->sched.update = cpumask_of_cpu(data->cpu_id);
26367 +       else
26368 +               vxi->sched.update = CPU_MASK_ALL;
26369 +       /* forced reload? */
26370 +       if (set_mask & VXSM_FORCE) {
26371 +               int cpu;
26372 +
26373 +               for_each_cpu(cpu)
26374 +                       vx_update_sched_param(&vxi->sched,
26375 +                               &vx_per_cpu(vxi, sched_pc, cpu));
26376 +       }
26377 +#else
26378 +       /* on UP we update immediately */
26379 +       vx_update_sched_param(&vxi->sched,
26380 +               &vx_per_cpu(vxi, sched_pc, 0));
26381 +#endif
26382 +
26383 +       spin_unlock(&vxi->sched.tokens_lock);
26384 +       return 0;
26385 +}
26386 +
26387 +
26388 +#ifdef CONFIG_VSERVER_LEGACY
26389 +
26390 +#define COPY_MASK_V2(name, mask)                       \
26391 +       if (vc_data.name != SCHED_KEEP) {               \
26392 +               vc_data_v4.name = vc_data.name;         \
26393 +               vc_data_v4.set_mask |= mask;            \
26394 +       }
26395 +
26396 +int vc_set_sched_v2(uint32_t xid, void __user *data)
26397 +{
26398 +       struct vcmd_set_sched_v2 vc_data;
26399 +       struct vcmd_set_sched_v4 vc_data_v4 = { .set_mask = 0 };
26400 +       struct vx_info *vxi;
26401 +
26402 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
26403 +               return -EFAULT;
26404 +
26405 +       vxi = lookup_vx_info(xid);
26406 +       if (!vxi)
26407 +               return -ESRCH;
26408 +
26409 +       COPY_MASK_V2(fill_rate,  VXSM_FILL_RATE);
26410 +       COPY_MASK_V2(interval,   VXSM_INTERVAL);
26411 +       COPY_MASK_V2(tokens,     VXSM_TOKENS);
26412 +       COPY_MASK_V2(tokens_min, VXSM_TOKENS_MIN);
26413 +       COPY_MASK_V2(tokens_max, VXSM_TOKENS_MAX);
26414 +       vc_data_v4.bucket_id = 0;
26415 +
26416 +       do_set_sched(vxi, &vc_data_v4);
26417 +       put_vx_info(vxi);
26418 +       return 0;
26419 +}
26420 +#endif
26421 +
26422 +int vc_set_sched_v3(uint32_t xid, void __user *data)
26423 +{
26424 +       struct vcmd_set_sched_v3 vc_data;
26425 +       struct vcmd_set_sched_v4 vc_data_v4;
26426 +       struct vx_info *vxi;
26427 +       int ret;
26428 +
26429 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
26430 +               return -EFAULT;
26431 +
26432 +       vxi = lookup_vx_info(xid);
26433 +       if (!vxi)
26434 +               return -ESRCH;
26435 +
26436 +       /* structures are binary compatible */
26437 +       memcpy(&vc_data_v4, &vc_data, sizeof(vc_data));
26438 +       vc_data_v4.set_mask &= VXSM_V3_MASK;
26439 +       vc_data_v4.bucket_id = 0;
26440 +       ret = do_set_sched(vxi, &vc_data_v4);
26441 +       put_vx_info(vxi);
26442 +       return ret;
26443 +}
26444 +
26445 +int vc_set_sched(uint32_t xid, void __user *data)
26446 +{
26447 +       struct vcmd_set_sched_v4 vc_data;
26448 +       struct vx_info *vxi;
26449 +       int ret;
26450 +
26451 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
26452 +               return -EFAULT;
26453 +
26454 +       vxi = lookup_vx_info(xid);
26455 +       if (!vxi)
26456 +               return -ESRCH;
26457 +
26458 +       ret = do_set_sched(vxi, &vc_data);
26459 +       put_vx_info(vxi);
26460 +       return ret;
26461 +}
26462 +
26463 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/sched_init.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched_init.h
26464 --- linux-2.6.16.20/kernel/vserver/sched_init.h 1970-01-01 01:00:00 +0100
26465 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched_init.h    2006-04-26 19:07:00 +0200
26466 @@ -0,0 +1,47 @@
26467 +
26468 +static inline void vx_info_init_sched(struct _vx_sched *sched)
26469 +{
26470 +       /* scheduling; hard code starting values as constants */
26471 +       sched->fill_rate[0]     = 1;
26472 +       sched->interval[0]      = 4;
26473 +       sched->fill_rate[1]     = 1;
26474 +       sched->interval[1]      = 8;
26475 +       sched->tokens           = HZ >> 2;
26476 +       sched->tokens_min       = HZ >> 4;
26477 +       sched->tokens_max       = HZ >> 1;
26478 +       sched->tokens_lock      = SPIN_LOCK_UNLOCKED;
26479 +       sched->prio_bias        = 0;
26480 +       sched->vavavoom         = 0;
26481 +}
26482 +
26483 +static inline
26484 +void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
26485 +{
26486 +       sched_pc->fill_rate[0]  = 1;
26487 +       sched_pc->interval[0]   = 4;
26488 +       sched_pc->fill_rate[1]  = 1;
26489 +       sched_pc->interval[1]   = 8;
26490 +       sched_pc->tokens        = HZ >> 2;
26491 +       sched_pc->tokens_min    = HZ >> 4;
26492 +       sched_pc->tokens_max    = HZ >> 1;
26493 +       sched_pc->token_time    = 0;
26494 +       sched_pc->idle_time     = 0;
26495 +       sched_pc->norm_time     = jiffies;
26496 +
26497 +       sched_pc->user_ticks = 0;
26498 +       sched_pc->sys_ticks = 0;
26499 +       sched_pc->hold_ticks = 0;
26500 +}
26501 +
26502 +
26503 +static inline void vx_info_exit_sched(struct _vx_sched *sched)
26504 +{
26505 +       return;
26506 +}
26507 +
26508 +static inline
26509 +void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
26510 +{
26511 +       return;
26512 +}
26513 +
26514 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/sched_proc.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched_proc.h
26515 --- linux-2.6.16.20/kernel/vserver/sched_proc.h 1970-01-01 01:00:00 +0100
26516 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sched_proc.h    2006-04-26 19:07:00 +0200
26517 @@ -0,0 +1,59 @@
26518 +#ifndef _VX_SCHED_PROC_H
26519 +#define _VX_SCHED_PROC_H
26520 +
26521 +
26522 +static inline
26523 +int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
26524 +{
26525 +       int length = 0;
26526 +
26527 +       length += sprintf(buffer,
26528 +               "FillRate:\t%8d,%d\n"
26529 +               "Interval:\t%8d,%d\n"
26530 +               "TokensMin:\t%8d\n"
26531 +               "TokensMax:\t%8d\n"
26532 +               "PrioBias:\t%8d\n"
26533 +               "VaVaVoom:\t%8d\n"
26534 +               ,sched->fill_rate[0]
26535 +               ,sched->fill_rate[1]
26536 +               ,sched->interval[0]
26537 +               ,sched->interval[1]
26538 +               ,sched->tokens_min
26539 +               ,sched->tokens_max
26540 +               ,sched->prio_bias
26541 +               ,sched->vavavoom
26542 +               );
26543 +       return length;
26544 +}
26545 +
26546 +static inline
26547 +int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
26548 +       char *buffer, int cpu)
26549 +{
26550 +       int length = 0;
26551 +
26552 +       length += sprintf(buffer + length,
26553 +               "cpu %d: %lld %lld %lld %ld %ld"
26554 +               ,cpu
26555 +               ,(unsigned long long)sched_pc->user_ticks
26556 +               ,(unsigned long long)sched_pc->sys_ticks
26557 +               ,(unsigned long long)sched_pc->hold_ticks
26558 +               ,sched_pc->token_time
26559 +               ,sched_pc->idle_time
26560 +               );
26561 +       length += sprintf(buffer + length,
26562 +               " %c%c %d %d %d %d/%d %d/%d\n"
26563 +               ,(sched_pc->flags & VXSF_ONHOLD) ? 'H' : 'R'
26564 +               ,(sched_pc->flags & VXSF_IDLE_TIME) ? 'I' : '-'
26565 +               ,sched_pc->tokens
26566 +               ,sched_pc->tokens_min
26567 +               ,sched_pc->tokens_max
26568 +               ,sched_pc->fill_rate[0]
26569 +               ,sched_pc->interval[0]
26570 +               ,sched_pc->fill_rate[1]
26571 +               ,sched_pc->interval[1]
26572 +               );
26573 +       return length;
26574 +}
26575 +
26576 +#endif /* _VX_SCHED_PROC_H */
26577 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/signal.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/signal.c
26578 --- linux-2.6.16.20/kernel/vserver/signal.c     1970-01-01 01:00:00 +0100
26579 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/signal.c        2006-04-26 19:07:00 +0200
26580 @@ -0,0 +1,137 @@
26581 +/*
26582 + *  linux/kernel/vserver/signal.c
26583 + *
26584 + *  Virtual Server: Signal Support
26585 + *
26586 + *  Copyright (C) 2003-2005  Herbert Pötzl
26587 + *
26588 + *  V0.01  broken out from vcontext V0.05
26589 + *
26590 + */
26591 +
26592 +#include <linux/sched.h>
26593 +
26594 +#include <asm/errno.h>
26595 +#include <asm/uaccess.h>
26596 +
26597 +#include <linux/vs_context.h>
26598 +#include <linux/vserver/signal_cmd.h>
26599 +
26600 +
26601 +int vx_info_kill(struct vx_info *vxi, int pid, int sig)
26602 +{
26603 +       int retval, count=0;
26604 +       struct task_struct *p;
26605 +       unsigned long priv = 0;
26606 +
26607 +       retval = -ESRCH;
26608 +       vxdprintk(VXD_CBIT(misc, 4),
26609 +               "vx_info_kill(%p[#%d],%d,%d)*",
26610 +               vxi, vxi->vx_id, pid, sig);
26611 +       read_lock(&tasklist_lock);
26612 +       switch (pid) {
26613 +       case  0:
26614 +               priv = 1;
26615 +       case -1:
26616 +               for_each_process(p) {
26617 +                       int err = 0;
26618 +
26619 +                       if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
26620 +                               (pid && vxi->vx_initpid == p->pid))
26621 +                               continue;
26622 +
26623 +                       err = group_send_sig_info(sig, (void*)priv, p);
26624 +                       ++count;
26625 +                       if (err != -EPERM)
26626 +                               retval = err;
26627 +               }
26628 +               break;
26629 +
26630 +       case 1:
26631 +               if (vxi->vx_initpid) {
26632 +                       pid = vxi->vx_initpid;
26633 +                       priv = 1;
26634 +               }
26635 +               /* fallthrough */
26636 +       default:
26637 +               p = find_task_by_real_pid(pid);
26638 +               if (p) {
26639 +                       if (vx_task_xid(p) == vxi->vx_id)
26640 +                               retval = group_send_sig_info(sig,
26641 +                                       (void*)priv, p);
26642 +               }
26643 +               break;
26644 +       }
26645 +       read_unlock(&tasklist_lock);
26646 +       vxdprintk(VXD_CBIT(misc, 4),
26647 +               "vx_info_kill(%p[#%d],%d,%d) = %d",
26648 +               vxi, vxi->vx_id, pid, sig, retval);
26649 +       return retval;
26650 +}
26651 +
26652 +int vc_ctx_kill(uint32_t id, void __user *data)
26653 +{
26654 +       int retval;
26655 +       struct vcmd_ctx_kill_v0 vc_data;
26656 +       struct vx_info *vxi;
26657 +
26658 +       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
26659 +               return -EFAULT;
26660 +
26661 +       vxi = lookup_vx_info(id);
26662 +       if (!vxi)
26663 +               return -ESRCH;
26664 +
26665 +       retval = vx_info_kill(vxi, vc_data.pid, vc_data.sig);
26666 +       put_vx_info(vxi);
26667 +       return retval;
26668 +}
26669 +
26670 +
26671 +static int __wait_exit(struct vx_info *vxi)
26672 +{
26673 +       DECLARE_WAITQUEUE(wait, current);
26674 +       int ret = 0;
26675 +
26676 +       add_wait_queue(&vxi->vx_wait, &wait);
26677 +       set_current_state(TASK_INTERRUPTIBLE);
26678 +
26679 +wait:
26680 +       if (vx_info_state(vxi,
26681 +               VXS_SHUTDOWN|VXS_HASHED|VXS_HELPER) == VXS_SHUTDOWN)
26682 +               goto out;
26683 +       if (signal_pending(current)) {
26684 +               ret = -ERESTARTSYS;
26685 +               goto out;
26686 +       }
26687 +       schedule();
26688 +       goto wait;
26689 +
26690 +out:
26691 +       set_current_state(TASK_RUNNING);
26692 +       remove_wait_queue(&vxi->vx_wait, &wait);
26693 +       return ret;
26694 +}
26695 +
26696 +
26697 +
26698 +int vc_wait_exit(uint32_t id, void __user *data)
26699 +{
26700 +       struct vx_info *vxi;
26701 +       struct vcmd_wait_exit_v0 vc_data;
26702 +       int ret;
26703 +
26704 +       vxi = lookup_vx_info(id);
26705 +       if (!vxi)
26706 +               return -ESRCH;
26707 +
26708 +       ret = __wait_exit(vxi);
26709 +       vc_data.reboot_cmd = vxi->reboot_cmd;
26710 +       vc_data.exit_code = vxi->exit_code;
26711 +       put_vx_info(vxi);
26712 +
26713 +       if (copy_to_user (data, &vc_data, sizeof(vc_data)))
26714 +               ret = -EFAULT;
26715 +       return ret;
26716 +}
26717 +
26718 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/switch.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/switch.c
26719 --- linux-2.6.16.20/kernel/vserver/switch.c     1970-01-01 01:00:00 +0100
26720 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/switch.c        2006-04-26 19:07:00 +0200
26721 @@ -0,0 +1,297 @@
26722 +/*
26723 + *  linux/kernel/vserver/switch.c
26724 + *
26725 + *  Virtual Server: Syscall Switch
26726 + *
26727 + *  Copyright (C) 2003-2005  Herbert Pötzl
26728 + *
26729 + *  V0.01  syscall switch
26730 + *  V0.02  added signal to context
26731 + *  V0.03  added rlimit functions
26732 + *  V0.04  added iattr, task/xid functions
26733 + *  V0.05  added debug/history stuff
26734 + *  V0.06  added compat32 layer
26735 + *
26736 + */
26737 +
26738 +#include <linux/linkage.h>
26739 +#include <linux/sched.h>
26740 +#include <linux/compat.h>
26741 +#include <asm/errno.h>
26742 +
26743 +#include <linux/vserver/network.h>
26744 +#include <linux/vserver/switch.h>
26745 +#include <linux/vserver/debug.h>
26746 +
26747 +static inline
26748 +int vc_get_version(uint32_t id)
26749 +{
26750 +#ifdef CONFIG_VSERVER_LEGACY_VERSION
26751 +       if (id == 63)
26752 +               return VCI_LEGACY_VERSION;
26753 +#endif
26754 +       return VCI_VERSION;
26755 +}
26756 +
26757 +#include "vci_config.h"
26758 +
26759 +static inline
26760 +int vc_get_vci(uint32_t id)
26761 +{
26762 +       return vci_kernel_config();
26763 +}
26764 +
26765 +#include <linux/vserver/context_cmd.h>
26766 +#include <linux/vserver/cvirt_cmd.h>
26767 +#include <linux/vserver/limit_cmd.h>
26768 +#include <linux/vserver/network_cmd.h>
26769 +#include <linux/vserver/sched_cmd.h>
26770 +#include <linux/vserver/debug_cmd.h>
26771 +#include <linux/vserver/inode_cmd.h>
26772 +#include <linux/vserver/dlimit_cmd.h>
26773 +#include <linux/vserver/signal_cmd.h>
26774 +#include <linux/vserver/namespace_cmd.h>
26775 +
26776 +#include <linux/vserver/legacy.h>
26777 +#include <linux/vserver/inode.h>
26778 +#include <linux/vserver/dlimit.h>
26779 +
26780 +
26781 +#ifdef CONFIG_COMPAT
26782 +#define __COMPAT(name, id, data, compat)       \
26783 +       (compat) ? name ## _x32 (id, data) : name (id, data)
26784 +#else
26785 +#define __COMPAT(name, id, data, compat)       \
26786 +       name (id, data)
26787 +#endif
26788 +
26789 +
26790 +static inline
26791 +long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
26792 +{
26793 +       vxdprintk(VXD_CBIT(switch, 0),
26794 +               "vc: VCMD_%02d_%d[%d], %d,%p,%d",
26795 +               VC_CATEGORY(cmd), VC_COMMAND(cmd),
26796 +               VC_VERSION(cmd), id, data, compat);
26797 +
26798 +#ifdef CONFIG_VSERVER_LEGACY
26799 +       if (!capable(CAP_CONTEXT) &&
26800 +               /* dirty hack for capremove */
26801 +               !(cmd==VCMD_new_s_context && id==-2))
26802 +               return -EPERM;
26803 +#else
26804 +       if (!capable(CAP_CONTEXT))
26805 +               return -EPERM;
26806 +#endif
26807 +       /* moved here from the individual commands */
26808 +       if (!capable(CAP_SYS_ADMIN))
26809 +               return -EPERM;
26810 +
26811 +       switch (cmd) {
26812 +       case VCMD_get_version:
26813 +               return vc_get_version(id);
26814 +       case VCMD_get_vci:
26815 +               return vc_get_vci(id);
26816 +
26817 +       case VCMD_dump_history:
26818 +#ifdef CONFIG_VSERVER_HISTORY
26819 +               return vc_dump_history(id);
26820 +#else
26821 +               return -ENOSYS;
26822 +#endif
26823 +
26824 +#ifdef CONFIG_VSERVER_LEGACY
26825 +       case VCMD_new_s_context:
26826 +               return vc_new_s_context(id, data);
26827 +#endif
26828 +#ifdef CONFIG_VSERVER_LEGACYNET
26829 +       case VCMD_set_ipv4root:
26830 +               return vc_set_ipv4root(id, data);
26831 +#endif
26832 +
26833 +       case VCMD_task_xid:
26834 +               return vc_task_xid(id, data);
26835 +       case VCMD_vx_info:
26836 +               return vc_vx_info(id, data);
26837 +
26838 +       case VCMD_task_nid:
26839 +               return vc_task_nid(id, data);
26840 +       case VCMD_nx_info:
26841 +               return vc_nx_info(id, data);
26842 +
26843 +       case VCMD_set_namespace_v0:
26844 +               return vc_set_namespace(-1, data);
26845 +       /* this is version 1 */
26846 +       case VCMD_set_namespace:
26847 +               return vc_set_namespace(id, data);
26848 +       case VCMD_cleanup_namespace:
26849 +               return vc_cleanup_namespace(id, data);
26850 +       }
26851 +
26852 +       /* those are allowed while in setup too */
26853 +       if (!vx_check(0, VX_ADMIN|VX_WATCH) &&
26854 +               !vx_flags(VXF_STATE_SETUP,0))
26855 +               return -EPERM;
26856 +
26857 +#ifdef CONFIG_VSERVER_LEGACY
26858 +       switch (cmd) {
26859 +       case VCMD_set_cflags:
26860 +       case VCMD_set_ccaps_v0:
26861 +               if (vx_check(0, VX_WATCH))
26862 +                       return 0;
26863 +       }
26864 +#endif
26865 +
26866 +       switch (cmd) {
26867 +#ifdef CONFIG_IA32_EMULATION
26868 +       case VCMD_get_rlimit:
26869 +               return __COMPAT(vc_get_rlimit, id, data, compat);
26870 +       case VCMD_set_rlimit:
26871 +               return __COMPAT(vc_set_rlimit, id, data, compat);
26872 +#else
26873 +       case VCMD_get_rlimit:
26874 +               return vc_get_rlimit(id, data);
26875 +       case VCMD_set_rlimit:
26876 +               return vc_set_rlimit(id, data);
26877 +#endif
26878 +       case VCMD_get_rlimit_mask:
26879 +               return vc_get_rlimit_mask(id, data);
26880 +
26881 +       case VCMD_get_vhi_name:
26882 +               return vc_get_vhi_name(id, data);
26883 +       case VCMD_set_vhi_name:
26884 +               return vc_set_vhi_name(id, data);
26885 +
26886 +       case VCMD_set_cflags:
26887 +               return vc_set_cflags(id, data);
26888 +       case VCMD_get_cflags:
26889 +               return vc_get_cflags(id, data);
26890 +
26891 +       case VCMD_set_ccaps_v0:
26892 +               return vc_set_ccaps_v0(id, data);
26893 +       /* this is version 1 */
26894 +       case VCMD_set_ccaps:
26895 +               return vc_set_ccaps(id, data);
26896 +       case VCMD_get_ccaps_v0:
26897 +               return vc_get_ccaps_v0(id, data);
26898 +       /* this is version 1 */
26899 +       case VCMD_get_ccaps:
26900 +               return vc_get_ccaps(id, data);
26901 +       case VCMD_set_bcaps:
26902 +               return vc_set_bcaps(id, data);
26903 +       case VCMD_get_bcaps:
26904 +               return vc_get_bcaps(id, data);
26905 +
26906 +       case VCMD_set_nflags:
26907 +               return vc_set_nflags(id, data);
26908 +       case VCMD_get_nflags:
26909 +               return vc_get_nflags(id, data);
26910 +
26911 +       case VCMD_set_ncaps:
26912 +               return vc_set_ncaps(id, data);
26913 +       case VCMD_get_ncaps:
26914 +               return vc_get_ncaps(id, data);
26915 +
26916 +#ifdef CONFIG_VSERVER_LEGACY
26917 +       case VCMD_set_sched_v2:
26918 +               return vc_set_sched_v2(id, data);
26919 +#endif
26920 +       case VCMD_set_sched_v3:
26921 +               return vc_set_sched_v3(id, data);
26922 +       /* this is version 4 */
26923 +       case VCMD_set_sched:
26924 +               return vc_set_sched(id, data);
26925 +
26926 +       case VCMD_add_dlimit:
26927 +               return __COMPAT(vc_add_dlimit, id, data, compat);
26928 +       case VCMD_rem_dlimit:
26929 +               return __COMPAT(vc_rem_dlimit, id, data, compat);
26930 +       case VCMD_set_dlimit:
26931 +               return __COMPAT(vc_set_dlimit, id, data, compat);
26932 +       case VCMD_get_dlimit:
26933 +               return __COMPAT(vc_get_dlimit, id, data, compat);
26934 +       }
26935 +
26936 +       /* below here only with VX_ADMIN */
26937 +       if (!vx_check(0, VX_ADMIN|VX_WATCH))
26938 +               return -EPERM;
26939 +
26940 +       switch (cmd) {
26941 +       case VCMD_ctx_kill:
26942 +               return vc_ctx_kill(id, data);
26943 +
26944 +       case VCMD_wait_exit:
26945 +               return vc_wait_exit(id, data);
26946 +
26947 +       case VCMD_create_context:
26948 +#ifdef CONFIG_VSERVER_LEGACY
26949 +               return vc_ctx_create(id, NULL);
26950 +#else
26951 +               return -ENOSYS;
26952 +#endif
26953 +
26954 +       case VCMD_get_iattr:
26955 +               return __COMPAT(vc_get_iattr, id, data, compat);
26956 +       case VCMD_set_iattr:
26957 +               return __COMPAT(vc_set_iattr, id, data, compat);
26958 +
26959 +       case VCMD_enter_namespace:
26960 +               return vc_enter_namespace(id, data);
26961 +
26962 +       case VCMD_ctx_create_v0:
26963 +#ifdef CONFIG_VSERVER_LEGACY
26964 +               if (id == 1) {
26965 +                       current->xid = 1;
26966 +                       return 1;
26967 +               }
26968 +#endif
26969 +               return vc_ctx_create(id, NULL);
26970 +       case VCMD_ctx_create:
26971 +               return vc_ctx_create(id, data);
26972 +       case VCMD_ctx_migrate_v0:
26973 +               return vc_ctx_migrate(id, NULL);
26974 +       case VCMD_ctx_migrate:
26975 +               return vc_ctx_migrate(id, data);
26976 +
26977 +       case VCMD_net_create_v0:
26978 +               return vc_net_create(id, NULL);
26979 +       case VCMD_net_create:
26980 +               return vc_net_create(id, data);
26981 +       case VCMD_net_migrate:
26982 +               return vc_net_migrate(id, data);
26983 +       case VCMD_net_add:
26984 +               return vc_net_add(id, data);
26985 +       case VCMD_net_remove:
26986 +               return vc_net_remove(id, data);
26987 +
26988 +       }
26989 +       return -ENOSYS;
26990 +}
26991 +
26992 +extern asmlinkage long
26993 +sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
26994 +{
26995 +       long ret = do_vserver(cmd, id, data, 0);
26996 +
26997 +       vxdprintk(VXD_CBIT(switch, 1),
26998 +               "vc: VCMD_%02d_%d[%d] = %08lx(%ld)",
26999 +               VC_CATEGORY(cmd), VC_COMMAND(cmd),
27000 +               VC_VERSION(cmd), ret, ret);
27001 +       return ret;
27002 +}
27003 +
27004 +#ifdef CONFIG_COMPAT
27005 +
27006 +extern asmlinkage long
27007 +sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
27008 +{
27009 +       long ret = do_vserver(cmd, id, data, 1);
27010 +
27011 +       vxdprintk(VXD_CBIT(switch, 1),
27012 +               "vc: VCMD_%02d_%d[%d] = %08lx(%ld)",
27013 +               VC_CATEGORY(cmd), VC_COMMAND(cmd),
27014 +               VC_VERSION(cmd), ret, ret);
27015 +       return ret;
27016 +}
27017 +
27018 +#endif /* CONFIG_COMPAT */
27019 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/sysctl.c linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sysctl.c
27020 --- linux-2.6.16.20/kernel/vserver/sysctl.c     1970-01-01 01:00:00 +0100
27021 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/sysctl.c        2006-04-26 19:07:00 +0200
27022 @@ -0,0 +1,241 @@
27023 +/*
27024 + *  kernel/vserver/sysctl.c
27025 + *
27026 + *  Virtual Context Support
27027 + *
27028 + *  Copyright (C) 2004-2005  Herbert Pötzl
27029 + *
27030 + *  V0.01  basic structure
27031 + *
27032 + */
27033 +
27034 +#include <linux/errno.h>
27035 +#include <linux/module.h>
27036 +#include <linux/types.h>
27037 +#include <linux/ctype.h>
27038 +#include <linux/sysctl.h>
27039 +#include <linux/parser.h>
27040 +#include <linux/fs.h>
27041 +
27042 +#include <asm/uaccess.h>
27043 +#include <asm/unistd.h>
27044 +
27045 +
27046 +#define CTL_VSERVER    4242    /* unused? */
27047 +
27048 +enum {
27049 +       CTL_DEBUG_ERROR         = 0,
27050 +       CTL_DEBUG_SWITCH        = 1,
27051 +       CTL_DEBUG_XID,
27052 +       CTL_DEBUG_NID,
27053 +       CTL_DEBUG_TAG,
27054 +       CTL_DEBUG_NET,
27055 +       CTL_DEBUG_LIMIT,
27056 +       CTL_DEBUG_CRES,
27057 +       CTL_DEBUG_DLIM,
27058 +       CTL_DEBUG_QUOTA,
27059 +       CTL_DEBUG_CVIRT,
27060 +       CTL_DEBUG_MISC,
27061 +};
27062 +
27063 +
27064 +unsigned int vx_debug_switch   = 0;
27065 +unsigned int vx_debug_xid      = 0;
27066 +unsigned int vx_debug_nid      = 0;
27067 +unsigned int vx_debug_tag      = 0;
27068 +unsigned int vx_debug_net      = 0;
27069 +unsigned int vx_debug_limit    = 0;
27070 +unsigned int vx_debug_cres     = 0;
27071 +unsigned int vx_debug_dlim     = 0;
27072 +unsigned int vx_debug_quota    = 0;
27073 +unsigned int vx_debug_cvirt    = 0;
27074 +unsigned int vx_debug_misc     = 0;
27075 +
27076 +
27077 +static struct ctl_table_header *vserver_table_header;
27078 +static ctl_table vserver_table[];
27079 +
27080 +
27081 +void vserver_register_sysctl(void)
27082 +{
27083 +       if (!vserver_table_header) {
27084 +               vserver_table_header = register_sysctl_table(vserver_table, 1);
27085 +       }
27086 +
27087 +}
27088 +
27089 +void vserver_unregister_sysctl(void)
27090 +{
27091 +       if (vserver_table_header) {
27092 +               unregister_sysctl_table(vserver_table_header);
27093 +               vserver_table_header = NULL;
27094 +       }
27095 +}
27096 +
27097 +
27098 +static int proc_dodebug(ctl_table *table, int write,
27099 +       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
27100 +{
27101 +       char            tmpbuf[20], *p, c;
27102 +       unsigned int    value;
27103 +       size_t          left, len;
27104 +
27105 +       if ((*ppos && !write) || !*lenp) {
27106 +               *lenp = 0;
27107 +               return 0;
27108 +       }
27109 +
27110 +       left = *lenp;
27111 +
27112 +       if (write) {
27113 +               if (!access_ok(VERIFY_READ, buffer, left))
27114 +                       return -EFAULT;
27115 +               p = (char *) buffer;
27116 +               while (left && __get_user(c, p) >= 0 && isspace(c))
27117 +                       left--, p++;
27118 +               if (!left)
27119 +                       goto done;
27120 +
27121 +               if (left > sizeof(tmpbuf) - 1)
27122 +                       return -EINVAL;
27123 +               if (copy_from_user(tmpbuf, p, left))
27124 +                       return -EFAULT;
27125 +               tmpbuf[left] = '\0';
27126 +
27127 +               for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
27128 +                       value = 10 * value + (*p - '0');
27129 +               if (*p && !isspace(*p))
27130 +                       return -EINVAL;
27131 +               while (left && isspace(*p))
27132 +                       left--, p++;
27133 +               *(unsigned int *) table->data = value;
27134 +       } else {
27135 +               if (!access_ok(VERIFY_WRITE, buffer, left))
27136 +                       return -EFAULT;
27137 +               len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
27138 +               if (len > left)
27139 +                       len = left;
27140 +               if (__copy_to_user(buffer, tmpbuf, len))
27141 +                       return -EFAULT;
27142 +               if ((left -= len) > 0) {
27143 +                       if (put_user('\n', (char *)buffer + len))
27144 +                               return -EFAULT;
27145 +                       left--;
27146 +               }
27147 +       }
27148 +
27149 +done:
27150 +       *lenp -= left;
27151 +       *ppos += *lenp;
27152 +       return 0;
27153 +}
27154 +
27155 +
27156 +#define        CTL_ENTRY(ctl, name)                            \
27157 +       {                                               \
27158 +               .ctl_name       = ctl,                  \
27159 +               .procname       = #name,                \
27160 +               .data           = &vx_##name,           \
27161 +               .maxlen         = sizeof(int),          \
27162 +               .mode           = 0644,                 \
27163 +               .proc_handler   = &proc_dodebug         \
27164 +       }
27165 +
27166 +static ctl_table debug_table[] = {
27167 +       CTL_ENTRY (CTL_DEBUG_SWITCH,    debug_switch),
27168 +       CTL_ENTRY (CTL_DEBUG_XID,       debug_xid),
27169 +       CTL_ENTRY (CTL_DEBUG_NID,       debug_nid),
27170 +       CTL_ENTRY (CTL_DEBUG_TAG,       debug_tag),
27171 +       CTL_ENTRY (CTL_DEBUG_NET,       debug_net),
27172 +       CTL_ENTRY (CTL_DEBUG_LIMIT,     debug_limit),
27173 +       CTL_ENTRY (CTL_DEBUG_CRES,      debug_cres),
27174 +       CTL_ENTRY (CTL_DEBUG_DLIM,      debug_dlim),
27175 +       CTL_ENTRY (CTL_DEBUG_QUOTA,     debug_quota),
27176 +       CTL_ENTRY (CTL_DEBUG_CVIRT,     debug_cvirt),
27177 +       CTL_ENTRY (CTL_DEBUG_MISC,      debug_misc),
27178 +       { .ctl_name = 0 }
27179 +};
27180 +
27181 +static ctl_table vserver_table[] = {
27182 +       {
27183 +               .ctl_name       = CTL_VSERVER,
27184 +               .procname       = "vserver",
27185 +               .mode           = 0555,
27186 +               .child          = debug_table
27187 +       },
27188 +       { .ctl_name = 0 }
27189 +};
27190 +
27191 +
27192 +static match_table_t tokens = {
27193 +       { CTL_DEBUG_SWITCH,     "switch=%x"     },
27194 +       { CTL_DEBUG_XID,        "xid=%x"        },
27195 +       { CTL_DEBUG_NID,        "nid=%x"        },
27196 +       { CTL_DEBUG_TAG,        "tag=%x"        },
27197 +       { CTL_DEBUG_NET,        "net=%x"        },
27198 +       { CTL_DEBUG_LIMIT,      "limit=%x"      },
27199 +       { CTL_DEBUG_CRES,       "cres=%x"       },
27200 +       { CTL_DEBUG_DLIM,       "dlim=%x"       },
27201 +       { CTL_DEBUG_QUOTA,      "quota=%x"      },
27202 +       { CTL_DEBUG_CVIRT,      "cvirt=%x"      },
27203 +       { CTL_DEBUG_MISC,       "misc=%x"       },
27204 +       { CTL_DEBUG_ERROR,      NULL            }
27205 +};
27206 +
27207 +#define        HANDLE_CASE(id, name, val)                              \
27208 +       case CTL_DEBUG_ ## id:                                  \
27209 +               vx_debug_ ## name = val;                        \
27210 +               printk("vs_debug_" #name "=0x%x\n", val);       \
27211 +               break
27212 +
27213 +
27214 +static int __init vs_debug_setup(char *str)
27215 +{
27216 +       char *p;
27217 +       int token;
27218 +
27219 +       printk("vs_debug_setup(%s)\n", str);
27220 +       while ((p = strsep(&str, ",")) != NULL) {
27221 +               substring_t args[MAX_OPT_ARGS];
27222 +               unsigned int value;
27223 +
27224 +               if (!*p)
27225 +                       continue;
27226 +
27227 +               token = match_token(p, tokens, args);
27228 +               value = (token>0)?simple_strtoul(args[0].from, NULL, 0):0;
27229 +
27230 +               switch (token) {
27231 +               HANDLE_CASE(SWITCH, switch, value);
27232 +               HANDLE_CASE(XID,    xid,    value);
27233 +               HANDLE_CASE(NID,    nid,    value);
27234 +               HANDLE_CASE(NET,    net,    value);
27235 +               HANDLE_CASE(LIMIT,  limit,  value);
27236 +               HANDLE_CASE(CRES,   cres,   value);
27237 +               HANDLE_CASE(DLIM,   dlim,   value);
27238 +               HANDLE_CASE(QUOTA,  quota,  value);
27239 +               HANDLE_CASE(CVIRT,  cvirt,  value);
27240 +               HANDLE_CASE(MISC,   misc,   value);
27241 +               default:
27242 +                       return -EINVAL;
27243 +                       break;
27244 +               }
27245 +       }
27246 +       return 1;
27247 +}
27248 +
27249 +__setup("vsdebug=", vs_debug_setup);
27250 +
27251 +
27252 +
27253 +EXPORT_SYMBOL_GPL(vx_debug_switch);
27254 +EXPORT_SYMBOL_GPL(vx_debug_xid);
27255 +EXPORT_SYMBOL_GPL(vx_debug_nid);
27256 +EXPORT_SYMBOL_GPL(vx_debug_net);
27257 +EXPORT_SYMBOL_GPL(vx_debug_limit);
27258 +EXPORT_SYMBOL_GPL(vx_debug_cres);
27259 +EXPORT_SYMBOL_GPL(vx_debug_dlim);
27260 +EXPORT_SYMBOL_GPL(vx_debug_quota);
27261 +EXPORT_SYMBOL_GPL(vx_debug_cvirt);
27262 +EXPORT_SYMBOL_GPL(vx_debug_misc);
27263 +
27264 diff -NurpP --minimal linux-2.6.16.20/kernel/vserver/vci_config.h linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/vci_config.h
27265 --- linux-2.6.16.20/kernel/vserver/vci_config.h 1970-01-01 01:00:00 +0100
27266 +++ linux-2.6.16.20-vs2.1.1-rc22/kernel/vserver/vci_config.h    2006-04-26 19:07:00 +0200
27267 @@ -0,0 +1,78 @@
27268 +
27269 +enum {
27270 +       VCI_KCBIT_LEGACY = 1,
27271 +       VCI_KCBIT_LEGACYNET,
27272 +       VCI_KCBIT_NGNET,
27273 +
27274 +       VCI_KCBIT_PROC_SECURE,
27275 +       VCI_KCBIT_HARDCPU,
27276 +       VCI_KCBIT_IDLELIMIT,
27277 +       VCI_KCBIT_IDLETIME,
27278 +
27279 +       VCI_KCBIT_COWBL,
27280 +
27281 +       VCI_KCBIT_LEGACY_VERSION = 15,
27282 +       VCI_KCBIT_DEBUG = 16,
27283 +       VCI_KCBIT_HISTORY = 20,
27284 +       VCI_KCBIT_TAGGED = 24,
27285 +};
27286 +
27287 +
27288 +static inline uint32_t vci_kernel_config(void)
27289 +{
27290 +       return
27291 +       /* various legacy options */
27292 +#ifdef CONFIG_VSERVER_LEGACY
27293 +       (1 << VCI_KCBIT_LEGACY) |
27294 +#endif
27295 +#ifdef CONFIG_VSERVER_LEGACYNET
27296 +       (1 << VCI_KCBIT_LEGACYNET) |
27297 +#endif
27298 +#ifdef CONFIG_VSERVER_LEGACY_VERSION
27299 +       (1 << VCI_KCBIT_LEGACY_VERSION) |
27300 +#endif
27301 +
27302 +       /* configured features */
27303 +#ifdef CONFIG_VSERVER_PROC_SECURE
27304 +       (1 << VCI_KCBIT_PROC_SECURE) |
27305 +#endif
27306 +#ifdef CONFIG_VSERVER_HARDCPU
27307 +       (1 << VCI_KCBIT_HARDCPU) |
27308 +#endif
27309 +#ifdef CONFIG_VSERVER_IDLELIMIT
27310 +       (1 << VCI_KCBIT_IDLELIMIT) |
27311 +#endif
27312 +#ifdef CONFIG_VSERVER_IDLETIME
27313 +       (1 << VCI_KCBIT_IDLETIME) |
27314 +#endif
27315 +#ifdef CONFIG_VSERVER_COWBL
27316 +       (1 << VCI_KCBIT_COWBL) |
27317 +#endif
27318 +
27319 +       /* debug options */
27320 +#ifdef CONFIG_VSERVER_DEBUG
27321 +       (1 << VCI_KCBIT_DEBUG) |
27322 +#endif
27323 +#ifdef CONFIG_VSERVER_HISTORY
27324 +       (1 << VCI_KCBIT_HISTORY) |
27325 +#endif
27326 +
27327 +       /* inode context tagging */
27328 +#if    defined(CONFIG_TAGGING_NONE)
27329 +       (0 << VCI_KCBIT_TAGGED) |
27330 +#elif  defined(CONFIG_TAGGING_UID16)
27331 +       (1 << VCI_KCBIT_TAGGED) |
27332 +#elif  defined(CONFIG_TAGGING_GID16)
27333 +       (2 << VCI_KCBIT_TAGGED) |
27334 +#elif  defined(CONFIG_TAGGING_ID24)
27335 +       (3 << VCI_KCBIT_TAGGED) |
27336 +#elif  defined(CONFIG_TAGGING_INTERN)
27337 +       (4 << VCI_KCBIT_TAGGED) |
27338 +#elif  defined(CONFIG_TAGGING_RUNTIME)
27339 +       (5 << VCI_KCBIT_TAGGED) |
27340 +#else
27341 +       (7 << VCI_KCBIT_TAGGED) |
27342 +#endif
27343 +       0;
27344 +}
27345 +
27346 diff -NurpP --minimal linux-2.6.16.20/mm/filemap.c linux-2.6.16.20-vs2.1.1-rc22/mm/filemap.c
27347 --- linux-2.6.16.20/mm/filemap.c        2006-04-09 13:49:58 +0200
27348 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/filemap.c   2006-05-04 03:22:57 +0200
27349 @@ -1111,6 +1111,31 @@ int file_send_actor(read_descriptor_t * 
27350         return written;
27351  }
27352  
27353 +/* FIXME: It would be as simple as this, if we had a (void __user*) to write.
27354 + * We already have a kernel buffer, so it should be even simpler, right? ;)
27355 + *
27356 + * Yes, sorta.  After duplicating the complete path of generic_file_write(),
27357 + * at least some special cases could be removed, so the copy is simpler than
27358 + * the original.  But it remains a copy, so overall complexity increases.
27359 + */
27360 +static ssize_t
27361 +generic_kernel_file_write(struct file *, const char *, size_t, loff_t *);
27362 +
27363 +ssize_t generic_file_sendpage(struct file *file, struct page *page,
27364 +               int offset, size_t size, loff_t *ppos, int more)
27365 +{
27366 +       ssize_t ret;
27367 +       char *kaddr;
27368 +
27369 +       kaddr = kmap(page);
27370 +       ret = generic_kernel_file_write(file, kaddr + offset, size, ppos);
27371 +       kunmap(page);
27372 +
27373 +       return ret;
27374 +}
27375 +
27376 +EXPORT_SYMBOL(generic_file_sendpage);
27377 +
27378  ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos,
27379                          size_t count, read_actor_t actor, void *target)
27380  {
27381 @@ -1761,6 +1786,19 @@ int remove_suid(struct dentry *dentry)
27382  }
27383  EXPORT_SYMBOL(remove_suid);
27384  
27385 +static inline size_t
27386 +filemap_copy_from_kernel(struct page *page, unsigned long offset,
27387 +                        const char *buf, unsigned bytes)
27388 +{
27389 +       char *kaddr;
27390 +
27391 +       kaddr = kmap(page);
27392 +       memcpy(kaddr + offset, buf, bytes);
27393 +       kunmap(page);
27394 +
27395 +       return bytes;
27396 +}
27397 +
27398  size_t
27399  __filemap_copy_from_user_iovec(char *vaddr, 
27400                         const struct iovec *iov, size_t base, size_t bytes)
27401 @@ -2134,6 +2172,175 @@ out:
27402  }
27403  EXPORT_SYMBOL(generic_file_aio_write_nolock);
27404  
27405 +static inline void
27406 +filemap_set_next_kvec(const struct kvec **iovp, size_t *basep, size_t bytes)
27407 +{
27408 +       const struct kvec *iov = *iovp;
27409 +       size_t base = *basep;
27410 +
27411 +       while (bytes) {
27412 +               int copy = min(bytes, iov->iov_len - base);
27413 +
27414 +               bytes -= copy;
27415 +               base += copy;
27416 +               if (iov->iov_len == base) {
27417 +                       iov++;
27418 +                       base = 0;
27419 +               }
27420 +       }
27421 +       *iovp = iov;
27422 +       *basep = base;
27423 +}
27424 +
27425 +/*
27426 + * TODO:
27427 + * This largely tries to copy generic_file_aio_write_nolock(), although it
27428 + * doesn't have to be nearly as generic.  A real cleanup should either
27429 + * merge this into generic_file_aio_write_nolock() as well or keep it special
27430 + * and remove as much code as possible.
27431 + */
27432 +static ssize_t
27433 +generic_kernel_file_aio_write_nolock(struct kiocb *iocb, const struct kvec*iov,
27434 +                                    unsigned long nr_segs, loff_t *ppos)
27435 +{
27436 +       struct file *file = iocb->ki_filp;
27437 +       struct address_space * mapping = file->f_mapping;
27438 +       struct address_space_operations *a_ops = mapping->a_ops;
27439 +       size_t ocount;          /* original count */
27440 +       size_t count;           /* after file limit checks */
27441 +       struct inode    *inode = mapping->host;
27442 +       long            status = 0;
27443 +       loff_t          pos;
27444 +       struct page     *page;
27445 +       struct page     *cached_page = NULL;
27446 +       const int       isblk = S_ISBLK(inode->i_mode);
27447 +       ssize_t         written;
27448 +       ssize_t         err;
27449 +       size_t          bytes;
27450 +       struct pagevec  lru_pvec;
27451 +       const struct kvec *cur_iov = iov; /* current kvec */
27452 +       size_t          iov_base = 0;      /* offset in the current kvec */
27453 +       unsigned long   seg;
27454 +       char            *buf;
27455 +
27456 +       ocount = 0;
27457 +       for (seg = 0; seg < nr_segs; seg++) {
27458 +               const struct kvec *iv = &iov[seg];
27459 +
27460 +               /*
27461 +                * If any segment has a negative length, or the cumulative
27462 +                * length ever wraps negative then return -EINVAL.
27463 +                */
27464 +               ocount += iv->iov_len;
27465 +               if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
27466 +                       return -EINVAL;
27467 +       }
27468 +
27469 +       count = ocount;
27470 +       pos = *ppos;
27471 +       pagevec_init(&lru_pvec, 0);
27472 +
27473 +       /* We can write back this queue in page reclaim */
27474 +       current->backing_dev_info = mapping->backing_dev_info;
27475 +       written = 0;
27476 +
27477 +       err = generic_write_checks(file, &pos, &count, isblk);
27478 +       if (err)
27479 +               goto out;
27480 +
27481 +
27482 +       if (count == 0)
27483 +               goto out;
27484 +
27485 +       remove_suid(file->f_dentry);
27486 +       file_update_time(file);
27487 +
27488 +       /* There is no sane reason to use O_DIRECT */
27489 +       BUG_ON(file->f_flags & O_DIRECT);
27490 +
27491 +       buf = iov->iov_base;
27492 +       do {
27493 +               unsigned long index;
27494 +               unsigned long offset;
27495 +               size_t copied;
27496 +
27497 +               offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
27498 +               index = pos >> PAGE_CACHE_SHIFT;
27499 +               bytes = PAGE_CACHE_SIZE - offset;
27500 +               if (bytes > count)
27501 +                       bytes = count;
27502 +
27503 +               page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
27504 +               if (!page) {
27505 +                       status = -ENOMEM;
27506 +                       break;
27507 +               }
27508 +
27509 +               status = a_ops->prepare_write(file, page, offset, offset+bytes);
27510 +               if (unlikely(status)) {
27511 +                       loff_t isize = i_size_read(inode);
27512 +                       /*
27513 +                        * prepare_write() may have instantiated a few blocks
27514 +                        * outside i_size.  Trim these off again.
27515 +                        */
27516 +                       unlock_page(page);
27517 +                       page_cache_release(page);
27518 +                       if (pos + bytes > isize)
27519 +                               vmtruncate(inode, isize);
27520 +                       break;
27521 +               }
27522 +
27523 +               BUG_ON(nr_segs != 1);
27524 +               copied = filemap_copy_from_kernel(page, offset, buf, bytes);
27525 +
27526 +               flush_dcache_page(page);
27527 +               status = a_ops->commit_write(file, page, offset, offset+bytes);
27528 +               if (likely(copied > 0)) {
27529 +                       if (!status)
27530 +                               status = copied;
27531 +
27532 +                       if (status >= 0) {
27533 +                               written += status;
27534 +                               count -= status;
27535 +                               pos += status;
27536 +                               buf += status;
27537 +                               if (unlikely(nr_segs > 1))
27538 +                                       filemap_set_next_kvec(&cur_iov,
27539 +                                                       &iov_base, status);
27540 +                       }
27541 +               }
27542 +               if (unlikely(copied != bytes))
27543 +                       if (status >= 0)
27544 +                               status = -EFAULT;
27545 +               unlock_page(page);
27546 +               mark_page_accessed(page);
27547 +               page_cache_release(page);
27548 +               if (status < 0)
27549 +                       break;
27550 +               balance_dirty_pages_ratelimited(mapping);
27551 +               cond_resched();
27552 +       } while (count);
27553 +       *ppos = pos;
27554 +
27555 +       if (cached_page)
27556 +               page_cache_release(cached_page);
27557 +
27558 +       /*
27559 +        * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
27560 +        */
27561 +       if (status >= 0) {
27562 +               if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
27563 +                       status = generic_osync_inode(inode, mapping,
27564 +                                       OSYNC_METADATA|OSYNC_DATA);
27565 +       }
27566 +
27567 +       err = written ? written : status;
27568 +out:
27569 +       pagevec_lru_add(&lru_pvec);
27570 +       current->backing_dev_info = 0;
27571 +       return err;
27572 +}
27573 +
27574  ssize_t
27575  generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
27576                                 unsigned long nr_segs, loff_t *ppos)
27577 @@ -2183,6 +2390,21 @@ generic_file_write_nolock(struct file *f
27578                 ret = wait_on_sync_kiocb(&kiocb);
27579         return ret;
27580  }
27581 +
27582 +static ssize_t
27583 +generic_kernel_file_write_nolock(struct file *file, const struct kvec *iov,
27584 +                                unsigned long nr_segs, loff_t *ppos)
27585 +{
27586 +       struct kiocb kiocb;
27587 +       ssize_t ret;
27588 +
27589 +       init_sync_kiocb(&kiocb, file);
27590 +       ret = generic_kernel_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
27591 +       if (ret == -EIOCBQUEUED)
27592 +               ret = wait_on_sync_kiocb(&kiocb);
27593 +       return ret;
27594 +}
27595 +
27596  EXPORT_SYMBOL(generic_file_write_nolock);
27597  
27598  ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
27599 @@ -2237,6 +2459,21 @@ ssize_t generic_file_write(struct file *
27600  }
27601  EXPORT_SYMBOL(generic_file_write);
27602  
27603 +static ssize_t generic_kernel_file_write(struct file *file, const char *buf,
27604 +                                        size_t count, loff_t *ppos)
27605 +{
27606 +       struct inode    *inode = file->f_mapping->host;
27607 +       ssize_t         err;
27608 +       struct kvec local_iov = { .iov_base = (char *) buf,
27609 +                                 .iov_len = count };
27610 +
27611 +       mutex_lock(&inode->i_mutex);
27612 +       err = generic_kernel_file_write_nolock(file, &local_iov, 1, ppos);
27613 +       mutex_unlock(&inode->i_mutex);
27614 +
27615 +       return err;
27616 +}
27617 +
27618  ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
27619                         unsigned long nr_segs, loff_t *ppos)
27620  {
27621 diff -NurpP --minimal linux-2.6.16.20/mm/filemap_xip.c linux-2.6.16.20-vs2.1.1-rc22/mm/filemap_xip.c
27622 --- linux-2.6.16.20/mm/filemap_xip.c    2006-04-09 13:49:58 +0200
27623 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/filemap_xip.c       2006-04-26 19:07:00 +0200
27624 @@ -13,6 +13,7 @@
27625  #include <linux/module.h>
27626  #include <linux/uio.h>
27627  #include <linux/rmap.h>
27628 +#include <linux/vs_memory.h>
27629  #include <asm/tlbflush.h>
27630  #include "filemap.h"
27631  
27632 diff -NurpP --minimal linux-2.6.16.20/mm/fremap.c linux-2.6.16.20-vs2.1.1-rc22/mm/fremap.c
27633 --- linux-2.6.16.20/mm/fremap.c 2006-01-03 17:30:13 +0100
27634 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/fremap.c    2006-04-26 19:07:00 +0200
27635 @@ -15,6 +15,7 @@
27636  #include <linux/rmap.h>
27637  #include <linux/module.h>
27638  #include <linux/syscalls.h>
27639 +#include <linux/vs_memory.h>
27640  
27641  #include <asm/mmu_context.h>
27642  #include <asm/cacheflush.h>
27643 @@ -35,6 +36,7 @@ static int zap_pte(struct mm_struct *mm,
27644                                 set_page_dirty(page);
27645                         page_remove_rmap(page);
27646                         page_cache_release(page);
27647 +                       // dec_mm_counter(mm, file_rss);
27648                 }
27649         } else {
27650                 if (!pte_file(pte))
27651 @@ -74,6 +76,8 @@ int install_page(struct mm_struct *mm, s
27652         err = -ENOMEM;
27653         if (page_mapcount(page) > INT_MAX/2)
27654                 goto unlock;
27655 +       if (!vx_rsspages_avail(mm, 1))
27656 +               goto unlock;
27657  
27658         if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
27659                 inc_mm_counter(mm, file_rss);
27660 diff -NurpP --minimal linux-2.6.16.20/mm/hugetlb.c linux-2.6.16.20-vs2.1.1-rc22/mm/hugetlb.c
27661 --- linux-2.6.16.20/mm/hugetlb.c        2006-02-18 14:40:38 +0100
27662 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/hugetlb.c   2006-04-26 19:07:00 +0200
27663 @@ -18,6 +18,7 @@
27664  #include <asm/pgtable.h>
27665  
27666  #include <linux/hugetlb.h>
27667 +#include <linux/vs_memory.h>
27668  
27669  const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
27670  static unsigned long nr_huge_pages, free_huge_pages;
27671 diff -NurpP --minimal linux-2.6.16.20/mm/memory.c linux-2.6.16.20-vs2.1.1-rc22/mm/memory.c
27672 --- linux-2.6.16.20/mm/memory.c 2006-04-09 13:49:58 +0200
27673 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/memory.c    2006-04-26 19:07:00 +0200
27674 @@ -1904,6 +1904,10 @@ again:
27675                 grab_swap_token();
27676         }
27677  
27678 +       if (!vx_rsspages_avail(mm, 1)) {
27679 +               ret = VM_FAULT_OOM;
27680 +               goto out;
27681 +       }
27682         mark_page_accessed(page);
27683         lock_page(page);
27684         if (!PageSwapCache(page)) {
27685 @@ -1981,6 +1985,8 @@ static int do_anonymous_page(struct mm_s
27686                 /* Allocate our own private page. */
27687                 pte_unmap(page_table);
27688  
27689 +               if (!vx_rsspages_avail(mm, 1))
27690 +                       goto oom;
27691                 if (unlikely(anon_vma_prepare(vma)))
27692                         goto oom;
27693                 page = alloc_zeroed_user_highpage(vma, address);
27694 @@ -2059,6 +2065,9 @@ static int do_no_page(struct mm_struct *
27695                 smp_rmb(); /* serializes i_size against truncate_count */
27696         }
27697  retry:
27698 +       /* FIXME: is that check useful here? */
27699 +       if (!vx_rsspages_avail(mm, 1))
27700 +               return VM_FAULT_OOM;
27701         new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
27702         /*
27703          * No smp_rmb is needed here as long as there's a full
27704 @@ -2206,21 +2215,32 @@ static inline int handle_pte_fault(struc
27705         pte_t entry;
27706         pte_t old_entry;
27707         spinlock_t *ptl;
27708 +       int ret, type = VXPT_UNKNOWN;
27709  
27710         old_entry = entry = *pte;
27711         if (!pte_present(entry)) {
27712                 if (pte_none(entry)) {
27713 -                       if (!vma->vm_ops || !vma->vm_ops->nopage)
27714 -                               return do_anonymous_page(mm, vma, address,
27715 +                       if (!vma->vm_ops || !vma->vm_ops->nopage) {
27716 +                               ret = do_anonymous_page(mm, vma, address,
27717                                         pte, pmd, write_access);
27718 -                       return do_no_page(mm, vma, address,
27719 +                               type = VXPT_ANON;
27720 +                               goto out;
27721 +                       }
27722 +                       ret = do_no_page(mm, vma, address,
27723                                         pte, pmd, write_access);
27724 +                       type = VXPT_NONE;
27725 +                       goto out;
27726                 }
27727 -               if (pte_file(entry))
27728 -                       return do_file_page(mm, vma, address,
27729 +               if (pte_file(entry)) {
27730 +                       ret = do_file_page(mm, vma, address,
27731                                         pte, pmd, write_access, entry);
27732 -               return do_swap_page(mm, vma, address,
27733 +                       type = VXPT_FILE;
27734 +                       goto out;
27735 +               }
27736 +               ret = do_swap_page(mm, vma, address,
27737                                         pte, pmd, write_access, entry);
27738 +               type = VXPT_SWAP;
27739 +               goto out;
27740         }
27741  
27742         ptl = pte_lockptr(mm, pmd);
27743 @@ -2228,9 +2248,12 @@ static inline int handle_pte_fault(struc
27744         if (unlikely(!pte_same(*pte, entry)))
27745                 goto unlock;
27746         if (write_access) {
27747 -               if (!pte_write(entry))
27748 -                       return do_wp_page(mm, vma, address,
27749 +               if (!pte_write(entry)) {
27750 +                       ret = do_wp_page(mm, vma, address,
27751                                         pte, pmd, ptl, entry);
27752 +                       type = VXPT_WRITE;
27753 +                       goto out;
27754 +               }
27755                 entry = pte_mkdirty(entry);
27756         }
27757         entry = pte_mkyoung(entry);
27758 @@ -2250,7 +2273,10 @@ static inline int handle_pte_fault(struc
27759         }
27760  unlock:
27761         pte_unmap_unlock(pte, ptl);
27762 -       return VM_FAULT_MINOR;
27763 +       ret = VM_FAULT_MINOR;
27764 +out:
27765 +       vx_page_fault(mm, vma, type, ret);
27766 +       return ret;
27767  }
27768  
27769  /*
27770 diff -NurpP --minimal linux-2.6.16.20/mm/mempolicy.c linux-2.6.16.20-vs2.1.1-rc22/mm/mempolicy.c
27771 --- linux-2.6.16.20/mm/mempolicy.c      2006-05-22 16:25:40 +0200
27772 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/mempolicy.c 2006-05-22 06:19:46 +0200
27773 @@ -86,6 +86,7 @@
27774  #include <linux/swap.h>
27775  #include <linux/seq_file.h>
27776  #include <linux/proc_fs.h>
27777 +#include <linux/vs_pid.h>
27778  
27779  #include <asm/tlbflush.h>
27780  #include <asm/uaccess.h>
27781 diff -NurpP --minimal linux-2.6.16.20/mm/mlock.c linux-2.6.16.20-vs2.1.1-rc22/mm/mlock.c
27782 --- linux-2.6.16.20/mm/mlock.c  2006-04-09 13:49:58 +0200
27783 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/mlock.c     2006-04-26 19:07:00 +0200
27784 @@ -10,6 +10,7 @@
27785  #include <linux/mm.h>
27786  #include <linux/mempolicy.h>
27787  #include <linux/syscalls.h>
27788 +#include <linux/vs_memory.h>
27789  
27790  
27791  static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
27792 @@ -65,7 +66,7 @@ success:
27793                         ret = make_pages_present(start, end);
27794         }
27795  
27796 -       vma->vm_mm->locked_vm -= pages;
27797 +       vx_vmlocked_sub(vma->vm_mm, pages);
27798  out:
27799         if (ret == -ENOMEM)
27800                 ret = -EAGAIN;
27801 @@ -123,7 +124,7 @@ static int do_mlock(unsigned long start,
27802  
27803  asmlinkage long sys_mlock(unsigned long start, size_t len)
27804  {
27805 -       unsigned long locked;
27806 +       unsigned long locked, grow;
27807         unsigned long lock_limit;
27808         int error = -ENOMEM;
27809  
27810 @@ -134,8 +135,10 @@ asmlinkage long sys_mlock(unsigned long 
27811         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
27812         start &= PAGE_MASK;
27813  
27814 -       locked = len >> PAGE_SHIFT;
27815 -       locked += current->mm->locked_vm;
27816 +       grow = len >> PAGE_SHIFT;
27817 +       if (!vx_vmlocked_avail(current->mm, grow))
27818 +               goto out;
27819 +       locked = current->mm->locked_vm + grow;
27820  
27821         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
27822         lock_limit >>= PAGE_SHIFT;
27823 @@ -143,6 +146,7 @@ asmlinkage long sys_mlock(unsigned long 
27824         /* check against resource limits */
27825         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
27826                 error = do_mlock(start, len, 1);
27827 +out:
27828         up_write(&current->mm->mmap_sem);
27829         return error;
27830  }
27831 @@ -202,6 +206,8 @@ asmlinkage long sys_mlockall(int flags)
27832         lock_limit >>= PAGE_SHIFT;
27833  
27834         ret = -ENOMEM;
27835 +       if (!vx_vmlocked_avail(current->mm, current->mm->total_vm))
27836 +               goto out;
27837         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
27838             capable(CAP_IPC_LOCK))
27839                 ret = do_mlockall(flags);
27840 diff -NurpP --minimal linux-2.6.16.20/mm/mmap.c linux-2.6.16.20-vs2.1.1-rc22/mm/mmap.c
27841 --- linux-2.6.16.20/mm/mmap.c   2006-02-18 14:40:38 +0100
27842 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/mmap.c      2006-04-26 19:07:00 +0200
27843 @@ -1116,10 +1116,10 @@ munmap_back:
27844                 kmem_cache_free(vm_area_cachep, vma);
27845         }
27846  out:   
27847 -       mm->total_vm += len >> PAGE_SHIFT;
27848 +       vx_vmpages_add(mm, len >> PAGE_SHIFT);
27849         vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
27850         if (vm_flags & VM_LOCKED) {
27851 -               mm->locked_vm += len >> PAGE_SHIFT;
27852 +               vx_vmlocked_add(mm, len >> PAGE_SHIFT);
27853                 make_pages_present(addr, addr + len);
27854         }
27855         if (flags & MAP_POPULATE) {
27856 @@ -1479,9 +1479,9 @@ static int acct_stack_growth(struct vm_a
27857                 return -ENOMEM;
27858  
27859         /* Ok, everything looks good - let it rip */
27860 -       mm->total_vm += grow;
27861 +       vx_vmpages_add(mm, grow);
27862         if (vma->vm_flags & VM_LOCKED)
27863 -               mm->locked_vm += grow;
27864 +               vx_vmlocked_add(mm, grow);
27865         vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
27866         return 0;
27867  }
27868 @@ -1634,9 +1634,9 @@ static void remove_vma_list(struct mm_st
27869         do {
27870                 long nrpages = vma_pages(vma);
27871  
27872 -               mm->total_vm -= nrpages;
27873 +               vx_vmpages_sub(mm, nrpages);
27874                 if (vma->vm_flags & VM_LOCKED)
27875 -                       mm->locked_vm -= nrpages;
27876 +                       vx_vmlocked_sub(mm, nrpages);
27877                 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
27878                 vma = remove_vma(vma);
27879         } while (vma);
27880 @@ -1865,6 +1865,8 @@ unsigned long do_brk(unsigned long addr,
27881                 lock_limit >>= PAGE_SHIFT;
27882                 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
27883                         return -EAGAIN;
27884 +               if (!vx_vmlocked_avail(mm, len >> PAGE_SHIFT))
27885 +                       return -ENOMEM;
27886         }
27887  
27888         /*
27889 @@ -1891,7 +1893,8 @@ unsigned long do_brk(unsigned long addr,
27890         if (mm->map_count > sysctl_max_map_count)
27891                 return -ENOMEM;
27892  
27893 -       if (security_vm_enough_memory(len >> PAGE_SHIFT))
27894 +       if (security_vm_enough_memory(len >> PAGE_SHIFT) ||
27895 +               !vx_vmpages_avail(mm, len >> PAGE_SHIFT))
27896                 return -ENOMEM;
27897  
27898         flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
27899 @@ -1919,9 +1922,9 @@ unsigned long do_brk(unsigned long addr,
27900         vma->vm_page_prot = protection_map[flags & 0x0f];
27901         vma_link(mm, vma, prev, rb_link, rb_parent);
27902  out:
27903 -       mm->total_vm += len >> PAGE_SHIFT;
27904 +       vx_vmpages_add(mm, len >> PAGE_SHIFT);
27905         if (flags & VM_LOCKED) {
27906 -               mm->locked_vm += len >> PAGE_SHIFT;
27907 +               vx_vmlocked_add(mm, len >> PAGE_SHIFT);
27908                 make_pages_present(addr, addr + len);
27909         }
27910         return addr;
27911 @@ -1947,6 +1950,11 @@ void exit_mmap(struct mm_struct *mm)
27912         free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
27913         tlb_finish_mmu(tlb, 0, end);
27914  
27915 +       set_mm_counter(mm, file_rss, 0);
27916 +       set_mm_counter(mm, anon_rss, 0);
27917 +       vx_vmpages_sub(mm, mm->total_vm);
27918 +       vx_vmlocked_sub(mm, mm->locked_vm);
27919 +
27920         /*
27921          * Walk the list again, actually closing and freeing it,
27922          * with preemption enabled, without holding any MM locks.
27923 @@ -1986,7 +1994,8 @@ int insert_vm_struct(struct mm_struct * 
27924         if (__vma && __vma->vm_start < vma->vm_end)
27925                 return -ENOMEM;
27926         if ((vma->vm_flags & VM_ACCOUNT) &&
27927 -            security_vm_enough_memory(vma_pages(vma)))
27928 +               (security_vm_enough_memory(vma_pages(vma)) ||
27929 +               !vx_vmpages_avail(mm, vma_pages(vma))))
27930                 return -ENOMEM;
27931         vma_link(mm, vma, prev, rb_link, rb_parent);
27932         return 0;
27933 @@ -2059,5 +2068,7 @@ int may_expand_vm(struct mm_struct *mm, 
27934  
27935         if (cur + npages > lim)
27936                 return 0;
27937 +       if (!vx_vmpages_avail(mm, npages))
27938 +               return 0;
27939         return 1;
27940  }
27941 diff -NurpP --minimal linux-2.6.16.20/mm/mremap.c linux-2.6.16.20-vs2.1.1-rc22/mm/mremap.c
27942 --- linux-2.6.16.20/mm/mremap.c 2006-04-09 13:49:58 +0200
27943 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/mremap.c    2006-04-26 19:07:00 +0200
27944 @@ -18,6 +18,7 @@
27945  #include <linux/highmem.h>
27946  #include <linux/security.h>
27947  #include <linux/syscalls.h>
27948 +#include <linux/vs_memory.h>
27949  
27950  #include <asm/uaccess.h>
27951  #include <asm/cacheflush.h>
27952 @@ -211,7 +212,7 @@ static unsigned long move_vma(struct vm_
27953          * If this were a serious issue, we'd add a flag to do_munmap().
27954          */
27955         hiwater_vm = mm->hiwater_vm;
27956 -       mm->total_vm += new_len >> PAGE_SHIFT;
27957 +       vx_vmpages_add(mm, new_len >> PAGE_SHIFT);
27958         vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
27959  
27960         if (do_munmap(mm, old_addr, old_len) < 0) {
27961 @@ -229,7 +230,7 @@ static unsigned long move_vma(struct vm_
27962         }
27963  
27964         if (vm_flags & VM_LOCKED) {
27965 -               mm->locked_vm += new_len >> PAGE_SHIFT;
27966 +               vx_vmlocked_add(mm, new_len >> PAGE_SHIFT);
27967                 if (new_len > old_len)
27968                         make_pages_present(new_addr + old_len,
27969                                            new_addr + new_len);
27970 @@ -336,6 +337,9 @@ unsigned long do_mremap(unsigned long ad
27971                 ret = -EAGAIN;
27972                 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
27973                         goto out;
27974 +               if (!vx_vmlocked_avail(current->mm,
27975 +                       (new_len - old_len) >> PAGE_SHIFT))
27976 +                       goto out;
27977         }
27978         if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) {
27979                 ret = -ENOMEM;
27980 @@ -364,10 +368,10 @@ unsigned long do_mremap(unsigned long ad
27981                         vma_adjust(vma, vma->vm_start,
27982                                 addr + new_len, vma->vm_pgoff, NULL);
27983  
27984 -                       mm->total_vm += pages;
27985 +                       vx_vmpages_add(mm, pages);
27986                         vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
27987                         if (vma->vm_flags & VM_LOCKED) {
27988 -                               mm->locked_vm += pages;
27989 +                               vx_vmlocked_add(mm, pages);
27990                                 make_pages_present(addr + old_len,
27991                                                    addr + new_len);
27992                         }
27993 diff -NurpP --minimal linux-2.6.16.20/mm/nommu.c linux-2.6.16.20-vs2.1.1-rc22/mm/nommu.c
27994 --- linux-2.6.16.20/mm/nommu.c  2006-04-09 13:49:58 +0200
27995 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/nommu.c     2006-04-26 19:07:00 +0200
27996 @@ -820,7 +820,7 @@ unsigned long do_mmap_pgoff(struct file 
27997         realalloc += kobjsize(vma);
27998         askedalloc += sizeof(*vma);
27999  
28000 -       current->mm->total_vm += len >> PAGE_SHIFT;
28001 +       vx_vmpages_add(current->mm, len >> PAGE_SHIFT);
28002  
28003         add_nommu_vma(vma);
28004  
28005 @@ -937,7 +937,7 @@ int do_munmap(struct mm_struct *mm, unsi
28006         kfree(vml);
28007  
28008         update_hiwater_vm(mm);
28009 -       mm->total_vm -= len >> PAGE_SHIFT;
28010 +       vx_vmpages_sub(mm, len >> PAGE_SHIFT);
28011  
28012  #ifdef DEBUG
28013         show_process_blocks();
28014 @@ -956,7 +956,7 @@ void exit_mmap(struct mm_struct * mm)
28015                 printk("Exit_mmap:\n");
28016  #endif
28017  
28018 -               mm->total_vm = 0;
28019 +               vx_vmpages_sub(mm, mm->total_vm);
28020  
28021                 while ((tmp = mm->context.vmlist)) {
28022                         mm->context.vmlist = tmp->next;
28023 diff -NurpP --minimal linux-2.6.16.20/mm/oom_kill.c linux-2.6.16.20-vs2.1.1-rc22/mm/oom_kill.c
28024 --- linux-2.6.16.20/mm/oom_kill.c       2006-04-09 13:49:58 +0200
28025 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/oom_kill.c  2006-04-26 19:07:00 +0200
28026 @@ -55,6 +55,7 @@ unsigned long badness(struct task_struct
28027          * The memory size of the process is the basis for the badness.
28028          */
28029         points = p->mm->total_vm;
28030 +       /* FIXME: add vserver badness ;) */
28031  
28032         /*
28033          * Processes which fork a lot of child processes are likely
28034 @@ -230,8 +231,8 @@ static void __oom_kill_task(task_t *p, c
28035                 return;
28036         }
28037         task_unlock(p);
28038 -       printk(KERN_ERR "%s: Killed process %d (%s).\n",
28039 -                               message, p->pid, p->comm);
28040 +       printk(KERN_ERR "%s: Killed process %d[#%u] (%s).\n",
28041 +               message, p->pid, p->xid, p->comm);
28042  
28043         /*
28044          * We give our sacrificial lamb high priority and access to
28045 diff -NurpP --minimal linux-2.6.16.20/mm/page_alloc.c linux-2.6.16.20-vs2.1.1-rc22/mm/page_alloc.c
28046 --- linux-2.6.16.20/mm/page_alloc.c     2006-06-06 15:37:20 +0200
28047 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/page_alloc.c        2006-06-06 15:31:32 +0200
28048 @@ -37,6 +37,7 @@
28049  #include <linux/cpu.h>
28050  #include <linux/cpuset.h>
28051  #include <linux/memory_hotplug.h>
28052 +#include <linux/vs_limit.h>
28053  #include <linux/nodemask.h>
28054  #include <linux/vmalloc.h>
28055  #include <linux/mempolicy.h>
28056 @@ -1356,6 +1357,8 @@ void si_meminfo(struct sysinfo *val)
28057         val->freehigh = 0;
28058  #endif
28059         val->mem_unit = PAGE_SIZE;
28060 +       if (vx_flags(VXF_VIRT_MEM, 0))
28061 +               vx_vsi_meminfo(val);
28062  }
28063  
28064  EXPORT_SYMBOL(si_meminfo);
28065 @@ -1370,6 +1373,8 @@ void si_meminfo_node(struct sysinfo *val
28066         val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
28067         val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
28068         val->mem_unit = PAGE_SIZE;
28069 +       if (vx_flags(VXF_VIRT_MEM, 0))
28070 +               vx_vsi_meminfo(val);
28071  }
28072  #endif
28073  
28074 diff -NurpP --minimal linux-2.6.16.20/mm/rmap.c linux-2.6.16.20-vs2.1.1-rc22/mm/rmap.c
28075 --- linux-2.6.16.20/mm/rmap.c   2006-04-09 13:49:58 +0200
28076 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/rmap.c      2006-04-26 19:07:00 +0200
28077 @@ -53,6 +53,7 @@
28078  #include <linux/rmap.h>
28079  #include <linux/rcupdate.h>
28080  #include <linux/module.h>
28081 +#include <linux/vs_memory.h>
28082  
28083  #include <asm/tlbflush.h>
28084  
28085 diff -NurpP --minimal linux-2.6.16.20/mm/shmem.c linux-2.6.16.20-vs2.1.1-rc22/mm/shmem.c
28086 --- linux-2.6.16.20/mm/shmem.c  2006-05-22 16:25:40 +0200
28087 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/shmem.c     2006-05-22 06:19:46 +0200
28088 @@ -51,7 +51,6 @@
28089  #include <asm/pgtable.h>
28090  
28091  /* This magic number is used in glibc for posix shared memory */
28092 -#define TMPFS_MAGIC    0x01021994
28093  
28094  #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
28095  #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
28096 @@ -1656,7 +1655,7 @@ static int shmem_statfs(struct super_blo
28097  {
28098         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
28099  
28100 -       buf->f_type = TMPFS_MAGIC;
28101 +       buf->f_type = TMPFS_SUPER_MAGIC;
28102         buf->f_bsize = PAGE_CACHE_SIZE;
28103         buf->f_namelen = NAME_MAX;
28104         spin_lock(&sbinfo->stat_lock);
28105 @@ -2098,7 +2097,7 @@ static int shmem_fill_super(struct super
28106         sb->s_maxbytes = SHMEM_MAX_BYTES;
28107         sb->s_blocksize = PAGE_CACHE_SIZE;
28108         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
28109 -       sb->s_magic = TMPFS_MAGIC;
28110 +       sb->s_magic = TMPFS_SUPER_MAGIC;
28111         sb->s_op = &shmem_ops;
28112  
28113         inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
28114 diff -NurpP --minimal linux-2.6.16.20/mm/slab.c linux-2.6.16.20-vs2.1.1-rc22/mm/slab.c
28115 --- linux-2.6.16.20/mm/slab.c   2006-04-09 13:49:58 +0200
28116 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/slab.c      2006-04-26 19:07:00 +0200
28117 @@ -487,6 +487,8 @@ struct kmem_cache {
28118  #define STATS_INC_FREEMISS(x)  do { } while (0)
28119  #endif
28120  
28121 +#include "slab_vs.h"
28122 +
28123  #if DEBUG
28124  /* Magic nums for obj red zoning.
28125   * Placed in the first word before and the first word after an obj.
28126 @@ -2819,6 +2821,7 @@ __cache_alloc(struct kmem_cache *cachep,
28127  
28128         local_irq_save(save_flags);
28129         objp = ____cache_alloc(cachep, flags);
28130 +       vx_slab_alloc(cachep, flags);
28131         local_irq_restore(save_flags);
28132         objp = cache_alloc_debugcheck_after(cachep, flags, objp,
28133                                             caller);
28134 @@ -2864,6 +2867,7 @@ static void *__cache_alloc_node(struct k
28135  
28136         obj = slab_get_obj(cachep, slabp, nodeid);
28137         check_slabp(cachep, slabp);
28138 +       vx_slab_alloc(cachep, flags);
28139         l3->free_objects--;
28140         /* move slabp to correct slabp list: */
28141         list_del(&slabp->list);
28142 @@ -2899,6 +2903,7 @@ static void free_block(struct kmem_cache
28143         int i;
28144         struct kmem_list3 *l3;
28145  
28146 +       // printk("·· free_block(%x) = %dx%x\n", cachep->gfpflags, nr_objects, cachep->objsize);
28147         for (i = 0; i < nr_objects; i++) {
28148                 void *objp = objpp[i];
28149                 struct slab *slabp;
28150 @@ -2996,6 +3001,7 @@ static inline void __cache_free(struct k
28151  
28152         check_irq_off();
28153         objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
28154 +       vx_slab_free(cachep);
28155  
28156         /* Make sure we are not freeing a object from another
28157          * node to the array cache on this cpu.
28158 diff -NurpP --minimal linux-2.6.16.20/mm/slab_vs.h linux-2.6.16.20-vs2.1.1-rc22/mm/slab_vs.h
28159 --- linux-2.6.16.20/mm/slab_vs.h        1970-01-01 01:00:00 +0100
28160 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/slab_vs.h   2006-04-29 02:58:07 +0200
28161 @@ -0,0 +1,23 @@
28162 +
28163 +static inline
28164 +void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
28165 +{
28166 +       int what = gfp_zone(cachep->gfpflags);
28167 +
28168 +       if (!current->vx_info)
28169 +               return;
28170 +
28171 +       atomic_add(cachep->buffer_size, &current->vx_info->cacct.slab[what]);
28172 +}
28173 +
28174 +static inline
28175 +void vx_slab_free(struct kmem_cache *cachep)
28176 +{
28177 +       int what = gfp_zone(cachep->gfpflags);
28178 +
28179 +       if (!current->vx_info)
28180 +               return;
28181 +
28182 +       atomic_sub(cachep->buffer_size, &current->vx_info->cacct.slab[what]);
28183 +}
28184 +
28185 diff -NurpP --minimal linux-2.6.16.20/mm/swapfile.c linux-2.6.16.20-vs2.1.1-rc22/mm/swapfile.c
28186 --- linux-2.6.16.20/mm/swapfile.c       2006-02-18 14:40:38 +0100
28187 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/swapfile.c  2006-04-26 19:07:00 +0200
28188 @@ -32,6 +32,7 @@
28189  #include <asm/pgtable.h>
28190  #include <asm/tlbflush.h>
28191  #include <linux/swapops.h>
28192 +#include <linux/vs_memory.h>
28193  
28194  DEFINE_SPINLOCK(swap_lock);
28195  unsigned int nr_swapfiles;
28196 @@ -1634,6 +1635,8 @@ void si_swapinfo(struct sysinfo *val)
28197         val->freeswap = nr_swap_pages + nr_to_be_unused;
28198         val->totalswap = total_swap_pages + nr_to_be_unused;
28199         spin_unlock(&swap_lock);
28200 +       if (vx_flags(VXF_VIRT_MEM, 0))
28201 +               vx_vsi_swapinfo(val);
28202  }
28203  
28204  /*
28205 diff -NurpP --minimal linux-2.6.16.20/mm/vmscan.c linux-2.6.16.20-vs2.1.1-rc22/mm/vmscan.c
28206 --- linux-2.6.16.20/mm/vmscan.c 2006-05-22 16:25:40 +0200
28207 +++ linux-2.6.16.20-vs2.1.1-rc22/mm/vmscan.c    2006-05-22 06:19:46 +0200
28208 @@ -1843,7 +1843,7 @@ static int __init kswapd_init(void)
28209         swap_setup();
28210         for_each_pgdat(pgdat)
28211                 pgdat->kswapd
28212 -               = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
28213 +               = find_task_by_real_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
28214         total_memory = nr_free_pagecache_pages();
28215         hotcpu_notifier(cpu_callback, 0);
28216         return 0;
28217 diff -NurpP --minimal linux-2.6.16.20/net/core/dev.c linux-2.6.16.20-vs2.1.1-rc22/net/core/dev.c
28218 --- linux-2.6.16.20/net/core/dev.c      2006-05-11 21:25:37 +0200
28219 +++ linux-2.6.16.20-vs2.1.1-rc22/net/core/dev.c 2006-04-26 19:07:00 +0200
28220 @@ -114,6 +114,7 @@
28221  #include <linux/wireless.h>            /* Note : will define WIRELESS_EXT */
28222  #include <net/iw_handler.h>
28223  #endif /* CONFIG_NET_RADIO */
28224 +#include <linux/vs_network.h>
28225  #include <asm/current.h>
28226  
28227  /*
28228 @@ -1848,6 +1849,9 @@ static int dev_ifconf(char __user *arg)
28229  
28230         total = 0;
28231         for (dev = dev_base; dev; dev = dev->next) {
28232 +               if (vx_flags(VXF_HIDE_NETIF, 0) &&
28233 +                       !dev_in_nx_info(dev, current->nx_info))
28234 +                       continue;
28235                 for (i = 0; i < NPROTO; i++) {
28236                         if (gifconf_list[i]) {
28237                                 int done;
28238 @@ -1908,6 +1912,10 @@ void dev_seq_stop(struct seq_file *seq, 
28239  
28240  static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
28241  {
28242 +       struct nx_info *nxi = current->nx_info;
28243 +
28244 +       if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi))
28245 +               return;
28246         if (dev->get_stats) {
28247                 struct net_device_stats *stats = dev->get_stats(dev);
28248  
28249 diff -NurpP --minimal linux-2.6.16.20/net/core/rtnetlink.c linux-2.6.16.20-vs2.1.1-rc22/net/core/rtnetlink.c
28250 --- linux-2.6.16.20/net/core/rtnetlink.c        2006-02-18 14:40:39 +0100
28251 +++ linux-2.6.16.20-vs2.1.1-rc22/net/core/rtnetlink.c   2006-04-26 19:07:00 +0200
28252 @@ -278,6 +278,9 @@ static int rtnetlink_dump_ifinfo(struct 
28253         for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
28254                 if (idx < s_idx)
28255                         continue;
28256 +               if (vx_info_flags(skb->sk->sk_vx_info, VXF_HIDE_NETIF, 0) &&
28257 +                       !dev_in_nx_info(dev, skb->sk->sk_nx_info))
28258 +                       continue;
28259                 if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
28260                                           NETLINK_CB(cb->skb).pid,
28261                                           cb->nlh->nlmsg_seq, 0,
28262 @@ -451,6 +454,9 @@ void rtmsg_ifinfo(int type, struct net_d
28263                                sizeof(struct rtnl_link_ifmap) +
28264                                sizeof(struct rtnl_link_stats) + 128);
28265  
28266 +       if (vx_flags(VXF_HIDE_NETIF, 0) &&
28267 +               !dev_in_nx_info(dev, current->nx_info))
28268 +               return;
28269         skb = alloc_skb(size, GFP_KERNEL);
28270         if (!skb)
28271                 return;
28272 diff -NurpP --minimal linux-2.6.16.20/net/core/sock.c linux-2.6.16.20-vs2.1.1-rc22/net/core/sock.c
28273 --- linux-2.6.16.20/net/core/sock.c     2006-05-11 21:25:37 +0200
28274 +++ linux-2.6.16.20-vs2.1.1-rc22/net/core/sock.c        2006-04-26 19:07:00 +0200
28275 @@ -125,6 +125,9 @@
28276  #include <linux/ipsec.h>
28277  
28278  #include <linux/filter.h>
28279 +#include <linux/vs_socket.h>
28280 +#include <linux/vs_limit.h>
28281 +#include <linux/vs_context.h>
28282  
28283  #ifdef CONFIG_INET
28284  #include <net/tcp.h>
28285 @@ -661,6 +664,8 @@ struct sock *sk_alloc(int family, gfp_t 
28286                         sk->sk_prot = sk->sk_prot_creator = prot;
28287                         sock_lock_init(sk);
28288                 }
28289 +               sock_vx_init(sk);
28290 +               sock_nx_init(sk);
28291                 
28292                 if (security_sk_alloc(sk, family, priority))
28293                         goto out_free;
28294 @@ -699,6 +704,11 @@ void sk_free(struct sock *sk)
28295                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
28296  
28297         security_sk_free(sk);
28298 +       vx_sock_dec(sk);
28299 +       clr_vx_info(&sk->sk_vx_info);
28300 +       sk->sk_xid = -1;
28301 +       clr_nx_info(&sk->sk_nx_info);
28302 +       sk->sk_nid = -1;
28303         if (sk->sk_prot_creator->slab != NULL)
28304                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
28305         else
28306 @@ -716,6 +726,8 @@ struct sock *sk_clone(const struct sock 
28307                 memcpy(newsk, sk, sk->sk_prot->obj_size);
28308  
28309                 /* SANITY */
28310 +               sock_vx_init(newsk);
28311 +               sock_nx_init(newsk);
28312                 sk_node_init(&newsk->sk_node);
28313                 sock_lock_init(newsk);
28314                 bh_lock_sock(newsk);
28315 @@ -756,6 +768,12 @@ struct sock *sk_clone(const struct sock 
28316                 newsk->sk_priority = 0;
28317                 atomic_set(&newsk->sk_refcnt, 2);
28318  
28319 +               set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
28320 +               newsk->sk_xid = sk->sk_xid;
28321 +               vx_sock_inc(newsk);
28322 +               set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
28323 +               newsk->sk_nid = sk->sk_nid;
28324 +
28325                 /*
28326                  * Increment the counter in the same struct proto as the master
28327                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
28328 @@ -1320,6 +1338,11 @@ void sock_init_data(struct socket *sock,
28329         sk->sk_stamp.tv_sec     = -1L;
28330         sk->sk_stamp.tv_usec    = -1L;
28331  
28332 +       set_vx_info(&sk->sk_vx_info, current->vx_info);
28333 +       sk->sk_xid = vx_current_xid();
28334 +       vx_sock_inc(sk);
28335 +       set_nx_info(&sk->sk_nx_info, current->nx_info);
28336 +       sk->sk_nid = nx_current_nid();
28337         atomic_set(&sk->sk_refcnt, 1);
28338  }
28339  
28340 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/af_inet.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/af_inet.c
28341 --- linux-2.6.16.20/net/ipv4/af_inet.c  2006-02-18 14:40:39 +0100
28342 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/af_inet.c     2006-04-26 19:07:00 +0200
28343 @@ -114,6 +114,7 @@
28344  #ifdef CONFIG_IP_MROUTE
28345  #include <linux/mroute.h>
28346  #endif
28347 +#include <linux/vs_limit.h>
28348  
28349  DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
28350  
28351 @@ -282,9 +283,11 @@ lookup_protocol:
28352         }
28353  
28354         err = -EPERM;
28355 +       if ((protocol == IPPROTO_ICMP) && vx_ccaps(VXC_RAW_ICMP))
28356 +               goto override;
28357         if (answer->capability > 0 && !capable(answer->capability))
28358                 goto out_rcu_unlock;
28359 -
28360 +override:
28361         sock->ops = answer->ops;
28362         answer_prot = answer->prot;
28363         answer_no_check = answer->no_check;
28364 @@ -401,6 +404,10 @@ int inet_bind(struct socket *sock, struc
28365         unsigned short snum;
28366         int chk_addr_ret;
28367         int err;
28368 +       __u32 s_addr;   /* Address used for validation */
28369 +       __u32 s_addr1;  /* Address used for socket */
28370 +       __u32 s_addr2;  /* Broadcast address for the socket */
28371 +       struct nx_info *nxi = sk->sk_nx_info;
28372  
28373         /* If the socket has its own bind function then use it. (RAW) */
28374         if (sk->sk_prot->bind) {
28375 @@ -411,7 +418,40 @@ int inet_bind(struct socket *sock, struc
28376         if (addr_len < sizeof(struct sockaddr_in))
28377                 goto out;
28378  
28379 -       chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
28380 +       s_addr = addr->sin_addr.s_addr;
28381 +       s_addr1 = s_addr;
28382 +       s_addr2 = 0xffffffffl;
28383 +
28384 +       vxdprintk(VXD_CBIT(net, 3),
28385 +               "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d",
28386 +               sk, sk->sk_nx_info, sk->sk_socket,
28387 +               (sk->sk_socket?sk->sk_socket->flags:0),
28388 +               VXD_QUAD(s_addr));
28389 +       if (nxi) {
28390 +               __u32 v4_bcast = nxi->v4_bcast;
28391 +               __u32 ipv4root = nxi->ipv4[0];
28392 +               int nbipv4 = nxi->nbipv4;
28393 +
28394 +               if (s_addr == 0) {
28395 +                       /* bind to any for 1-n */
28396 +                       s_addr = ipv4root;
28397 +                       s_addr1 = (nbipv4 > 1) ? 0 : s_addr;
28398 +                       s_addr2 = v4_bcast;
28399 +               } else if (s_addr == IPI_LOOPBACK) {
28400 +                       /* rewrite localhost to ipv4root */
28401 +                       s_addr = ipv4root;
28402 +                       s_addr1 = ipv4root;
28403 +               } else if (s_addr != v4_bcast) {
28404 +                       /* normal address bind */
28405 +                       if (!addr_in_nx_info(nxi, s_addr))
28406 +                               return -EADDRNOTAVAIL;
28407 +               }
28408 +       }
28409 +       chk_addr_ret = inet_addr_type(s_addr);
28410 +
28411 +       vxdprintk(VXD_CBIT(net, 3),
28412 +               "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d",
28413 +               sk, VXD_QUAD(s_addr), VXD_QUAD(s_addr1), VXD_QUAD(s_addr2));
28414  
28415         /* Not specified by any standard per-se, however it breaks too
28416          * many applications when removed.  It is unfortunate since
28417 @@ -423,7 +463,7 @@ int inet_bind(struct socket *sock, struc
28418         err = -EADDRNOTAVAIL;
28419         if (!sysctl_ip_nonlocal_bind &&
28420             !inet->freebind &&
28421 -           addr->sin_addr.s_addr != INADDR_ANY &&
28422 +           s_addr != INADDR_ANY &&
28423             chk_addr_ret != RTN_LOCAL &&
28424             chk_addr_ret != RTN_MULTICAST &&
28425             chk_addr_ret != RTN_BROADCAST)
28426 @@ -448,7 +488,8 @@ int inet_bind(struct socket *sock, struc
28427         if (sk->sk_state != TCP_CLOSE || inet->num)
28428                 goto out_release_sock;
28429  
28430 -       inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
28431 +       inet->rcv_saddr = inet->saddr = s_addr1;
28432 +       inet->rcv_saddr2 = s_addr2;
28433         if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
28434                 inet->saddr = 0;  /* Use device */
28435  
28436 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/devinet.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/devinet.c
28437 --- linux-2.6.16.20/net/ipv4/devinet.c  2006-02-18 14:40:39 +0100
28438 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/devinet.c     2006-04-26 19:07:00 +0200
28439 @@ -607,6 +607,9 @@ int devinet_ioctl(unsigned int cmd, void
28440                 *colon = ':';
28441  
28442         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
28443 +               struct nx_info *nxi = current->nx_info;
28444 +               int hide_netif = vx_flags(VXF_HIDE_NETIF, 0);
28445 +
28446                 if (tryaddrmatch) {
28447                         /* Matthias Andree */
28448                         /* compare label and address (4.4BSD style) */
28449 @@ -615,6 +618,8 @@ int devinet_ioctl(unsigned int cmd, void
28450                            This is checked above. */
28451                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
28452                              ifap = &ifa->ifa_next) {
28453 +                               if (hide_netif && !ifa_in_nx_info(ifa, nxi))
28454 +                                       continue;
28455                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
28456                                     sin_orig.sin_addr.s_addr ==
28457                                                         ifa->ifa_address) {
28458 @@ -627,9 +632,12 @@ int devinet_ioctl(unsigned int cmd, void
28459                    comparing just the label */
28460                 if (!ifa) {
28461                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
28462 -                            ifap = &ifa->ifa_next)
28463 +                            ifap = &ifa->ifa_next) {
28464 +                               if (hide_netif && !ifa_in_nx_info(ifa, nxi))
28465 +                                       continue;
28466                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
28467                                         break;
28468 +                       }
28469                 }
28470         }
28471  
28472 @@ -780,6 +788,9 @@ static int inet_gifconf(struct net_devic
28473                 goto out;
28474  
28475         for (; ifa; ifa = ifa->ifa_next) {
28476 +               if (vx_flags(VXF_HIDE_NETIF, 0) &&
28477 +                       !ifa_in_nx_info(ifa, current->nx_info))
28478 +                       continue;
28479                 if (!buf) {
28480                         done += sizeof(ifr);
28481                         continue;
28482 @@ -1091,6 +1102,7 @@ static int inet_dump_ifaddr(struct sk_bu
28483         struct net_device *dev;
28484         struct in_device *in_dev;
28485         struct in_ifaddr *ifa;
28486 +       struct sock *sk = skb->sk;
28487         int s_ip_idx, s_idx = cb->args[0];
28488  
28489         s_ip_idx = ip_idx = cb->args[1];
28490 @@ -1108,6 +1120,9 @@ static int inet_dump_ifaddr(struct sk_bu
28491  
28492                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
28493                      ifa = ifa->ifa_next, ip_idx++) {
28494 +                       if (sk && vx_info_flags(sk->sk_vx_info, VXF_HIDE_NETIF, 0) &&
28495 +                               !ifa_in_nx_info(ifa, sk->sk_nx_info))
28496 +                               continue;
28497                         if (ip_idx < s_ip_idx)
28498                                 continue;
28499                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
28500 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/fib_hash.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/fib_hash.c
28501 --- linux-2.6.16.20/net/ipv4/fib_hash.c 2006-04-09 13:49:59 +0200
28502 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/fib_hash.c    2006-04-26 19:07:00 +0200
28503 @@ -989,6 +989,8 @@ static unsigned fib_flag_trans(int type,
28504         return flags;
28505  }
28506  
28507 +extern int dev_in_nx_info(struct net_device *, struct nx_info *);
28508 +
28509  /* 
28510   *     This outputs /proc/net/route.
28511   *
28512 @@ -1019,7 +1021,8 @@ static int fib_seq_show(struct seq_file 
28513         prefix  = f->fn_key;
28514         mask    = FZ_MASK(iter->zone);
28515         flags   = fib_flag_trans(fa->fa_type, mask, fi);
28516 -       if (fi)
28517 +       if (fi && (!vx_flags(VXF_HIDE_NETIF, 0) ||
28518 +               dev_in_nx_info(fi->fib_dev, current->nx_info)))
28519                 snprintf(bf, sizeof(bf),
28520                          "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
28521                          fi->fib_dev ? fi->fib_dev->name : "*", prefix,
28522 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/inet_connection_sock.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_connection_sock.c
28523 --- linux-2.6.16.20/net/ipv4/inet_connection_sock.c     2006-02-18 14:40:39 +0100
28524 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_connection_sock.c        2006-04-26 19:07:00 +0200
28525 @@ -40,7 +40,6 @@ int sysctl_local_port_range[2] = { 1024,
28526  int inet_csk_bind_conflict(const struct sock *sk,
28527                            const struct inet_bind_bucket *tb)
28528  {
28529 -       const u32 sk_rcv_saddr = inet_rcv_saddr(sk);
28530         struct sock *sk2;
28531         struct hlist_node *node;
28532         int reuse = sk->sk_reuse;
28533 @@ -53,9 +52,8 @@ int inet_csk_bind_conflict(const struct 
28534                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
28535                         if (!reuse || !sk2->sk_reuse ||
28536                             sk2->sk_state == TCP_LISTEN) {
28537 -                               const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
28538 -                               if (!sk2_rcv_saddr || !sk_rcv_saddr ||
28539 -                                   sk2_rcv_saddr == sk_rcv_saddr)
28540 +                               if (nx_addr_conflict(sk->sk_nx_info,
28541 +                                       inet_rcv_saddr(sk), sk2))
28542                                         break;
28543                         }
28544                 }
28545 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/inet_diag.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_diag.c
28546 --- linux-2.6.16.20/net/ipv4/inet_diag.c        2006-04-09 13:49:59 +0200
28547 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_diag.c   2006-04-26 19:07:00 +0200
28548 @@ -694,6 +694,8 @@ static int inet_diag_dump(struct sk_buff
28549                         sk_for_each(sk, node, &hashinfo->listening_hash[i]) {
28550                                 struct inet_sock *inet = inet_sk(sk);
28551  
28552 +                               if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28553 +                                       continue;
28554                                 if (num < s_num) {
28555                                         num++;
28556                                         continue;
28557 @@ -754,6 +756,8 @@ skip_listen_ht:
28558                 sk_for_each(sk, node, &head->chain) {
28559                         struct inet_sock *inet = inet_sk(sk);
28560  
28561 +                       if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28562 +                               continue;
28563                         if (num < s_num)
28564                                 goto next_normal;
28565                         if (!(r->idiag_states & (1 << sk->sk_state)))
28566 @@ -778,6 +782,8 @@ next_normal:
28567                         inet_twsk_for_each(tw, node,
28568                                     &hashinfo->ehash[i + hashinfo->ehash_size].chain) {
28569  
28570 +                               if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))
28571 +                                       continue;
28572                                 if (num < s_num)
28573                                         goto next_dying;
28574                                 if (r->id.idiag_sport != tw->tw_sport &&
28575 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/inet_hashtables.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_hashtables.c
28576 --- linux-2.6.16.20/net/ipv4/inet_hashtables.c  2006-02-18 14:40:39 +0100
28577 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/inet_hashtables.c     2006-04-26 19:07:00 +0200
28578 @@ -143,11 +143,10 @@ struct sock *__inet_lookup_listener(cons
28579                         const __u32 rcv_saddr = inet->rcv_saddr;
28580                         int score = sk->sk_family == PF_INET ? 1 : 0;
28581  
28582 -                       if (rcv_saddr) {
28583 -                               if (rcv_saddr != daddr)
28584 -                                       continue;
28585 +                       if (inet_addr_match(sk->sk_nx_info, daddr, rcv_saddr))
28586                                 score += 2;
28587 -                       }
28588 +                       else
28589 +                               continue;
28590                         if (sk->sk_bound_dev_if) {
28591                                 if (sk->sk_bound_dev_if != dif)
28592                                         continue;
28593 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/raw.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/raw.c
28594 --- linux-2.6.16.20/net/ipv4/raw.c      2006-02-18 14:40:41 +0100
28595 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/raw.c 2006-04-26 19:07:00 +0200
28596 @@ -102,6 +102,27 @@ static void raw_v4_unhash(struct sock *s
28597         write_unlock_bh(&raw_v4_lock);
28598  }
28599  
28600 +
28601 +/*
28602 + *     Check if a given address matches for a socket
28603 + *
28604 + *     nxi:            the socket's nx_info if any
28605 + *     addr:           to be verified address
28606 + *     saddr/baddr:    socket addresses
28607 + */
28608 +static inline int raw_addr_match (
28609 +       struct nx_info *nxi,
28610 +       uint32_t addr,
28611 +       uint32_t saddr,
28612 +       uint32_t baddr)
28613 +{
28614 +       if (addr && (saddr == addr || baddr == addr))
28615 +               return 1;
28616 +       if (!saddr)
28617 +               return addr_in_nx_info(nxi, addr);
28618 +       return 0;
28619 +}
28620 +
28621  struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
28622                              unsigned long raddr, unsigned long laddr,
28623                              int dif)
28624 @@ -113,7 +134,8 @@ struct sock *__raw_v4_lookup(struct sock
28625  
28626                 if (inet->num == num                                    &&
28627                     !(inet->daddr && inet->daddr != raddr)              &&
28628 -                   !(inet->rcv_saddr && inet->rcv_saddr != laddr)      &&
28629 +                   raw_addr_match(sk->sk_nx_info, laddr,
28630 +                       inet->rcv_saddr, inet->rcv_saddr2)              &&
28631                     !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
28632                         goto found; /* gotcha */
28633         }
28634 @@ -313,6 +335,11 @@ static int raw_send_hdrinc(struct sock *
28635                 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
28636         }
28637  
28638 +       err = -EPERM;
28639 +       if (!vx_check(0, VX_ADMIN) && !capable(CAP_NET_RAW)
28640 +               && (!addr_in_nx_info(sk->sk_nx_info, iph->saddr)))
28641 +               goto error_free;
28642 +
28643         err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
28644                       dst_output);
28645         if (err > 0)
28646 @@ -324,6 +351,7 @@ out:
28647  
28648  error_fault:
28649         err = -EFAULT;
28650 +error_free:
28651         kfree_skb(skb);
28652  error:
28653         IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
28654 @@ -484,6 +512,12 @@ static int raw_sendmsg(struct kiocb *ioc
28655                 if (!inet->hdrincl)
28656                         raw_probe_proto_opt(&fl, msg);
28657  
28658 +               if (sk->sk_nx_info) {
28659 +                       err = ip_find_src(sk->sk_nx_info, &rt, &fl);
28660 +
28661 +                       if (err)
28662 +                               goto done;
28663 +               }
28664                 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
28665         }
28666         if (err)
28667 @@ -753,7 +787,8 @@ static struct sock *raw_get_first(struct
28668                 struct hlist_node *node;
28669  
28670                 sk_for_each(sk, node, &raw_v4_htable[state->bucket])
28671 -                       if (sk->sk_family == PF_INET)
28672 +                       if (sk->sk_family == PF_INET &&
28673 +                               vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28674                                 goto found;
28675         }
28676         sk = NULL;
28677 @@ -769,7 +804,8 @@ static struct sock *raw_get_next(struct 
28678                 sk = sk_next(sk);
28679  try_again:
28680                 ;
28681 -       } while (sk && sk->sk_family != PF_INET);
28682 +       } while (sk && (sk->sk_family != PF_INET ||
28683 +               !vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)));
28684  
28685         if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
28686                 sk = sk_head(&raw_v4_htable[state->bucket]);
28687 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/tcp.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp.c
28688 --- linux-2.6.16.20/net/ipv4/tcp.c      2006-04-09 13:49:59 +0200
28689 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp.c 2006-04-26 19:07:00 +0200
28690 @@ -257,6 +257,7 @@
28691  #include <linux/fs.h>
28692  #include <linux/random.h>
28693  #include <linux/bootmem.h>
28694 +#include <linux/in.h>
28695  
28696  #include <net/icmp.h>
28697  #include <net/tcp.h>
28698 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/tcp_ipv4.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp_ipv4.c
28699 --- linux-2.6.16.20/net/ipv4/tcp_ipv4.c 2006-02-18 14:40:42 +0100
28700 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp_ipv4.c    2006-04-26 19:07:00 +0200
28701 @@ -77,6 +77,7 @@
28702  #include <linux/stddef.h>
28703  #include <linux/proc_fs.h>
28704  #include <linux/seq_file.h>
28705 +#include <linux/vserver/debug.h>
28706  
28707  int sysctl_tcp_tw_reuse;
28708  int sysctl_tcp_low_latency;
28709 @@ -1350,6 +1351,12 @@ static void *listening_get_next(struct s
28710                 req = req->dl_next;
28711                 while (1) {
28712                         while (req) {
28713 +                               vxdprintk(VXD_CBIT(net, 6),
28714 +                                       "sk,req: %p [#%d] (from %d)", req->sk,
28715 +                                       (req->sk)?req->sk->sk_xid:0, vx_current_xid());
28716 +                               if (req->sk &&
28717 +                                       !vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH))
28718 +                                       continue;
28719                                 if (req->rsk_ops->family == st->family) {
28720                                         cur = req;
28721                                         goto out;
28722 @@ -1374,6 +1381,10 @@ get_req:
28723         }
28724  get_sk:
28725         sk_for_each_from(sk, node) {
28726 +               vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
28727 +                       sk, sk->sk_xid, vx_current_xid());
28728 +               if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28729 +                       continue;
28730                 if (sk->sk_family == st->family) {
28731                         cur = sk;
28732                         goto out;
28733 @@ -1425,18 +1436,26 @@ static void *established_get_first(struc
28734  
28735                 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
28736                 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
28737 -                       if (sk->sk_family != st->family) {
28738 +                       vxdprintk(VXD_CBIT(net, 6),
28739 +                               "sk,egf: %p [#%d] (from %d)",
28740 +                               sk, sk->sk_xid, vx_current_xid());
28741 +                       if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28742 +                               continue;
28743 +                       if (sk->sk_family != st->family)
28744                                 continue;
28745 -                       }
28746                         rc = sk;
28747                         goto out;
28748                 }
28749                 st->state = TCP_SEQ_STATE_TIME_WAIT;
28750                 inet_twsk_for_each(tw, node,
28751                                    &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
28752 -                       if (tw->tw_family != st->family) {
28753 +                       vxdprintk(VXD_CBIT(net, 6),
28754 +                               "tw: %p [#%d] (from %d)",
28755 +                               tw, tw->tw_xid, vx_current_xid());
28756 +                       if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))
28757 +                               continue;
28758 +                       if (tw->tw_family != st->family)
28759                                 continue;
28760 -                       }
28761                         rc = tw;
28762                         goto out;
28763                 }
28764 @@ -1460,7 +1479,8 @@ static void *established_get_next(struct
28765                 tw = cur;
28766                 tw = tw_next(tw);
28767  get_tw:
28768 -               while (tw && tw->tw_family != st->family) {
28769 +               while (tw && (tw->tw_family != st->family ||
28770 +                       !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))) {
28771                         tw = tw_next(tw);
28772                 }
28773                 if (tw) {
28774 @@ -1484,6 +1504,11 @@ get_tw:
28775                 sk = sk_next(sk);
28776  
28777         sk_for_each_from(sk, node) {
28778 +               vxdprintk(VXD_CBIT(net, 6),
28779 +                       "sk,egn: %p [#%d] (from %d)",
28780 +                       sk, sk->sk_xid, vx_current_xid());
28781 +               if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28782 +                       continue;
28783                 if (sk->sk_family == st->family)
28784                         goto found;
28785         }
28786 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/tcp_minisocks.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp_minisocks.c
28787 --- linux-2.6.16.20/net/ipv4/tcp_minisocks.c    2006-04-09 13:49:59 +0200
28788 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/tcp_minisocks.c       2006-04-26 19:07:00 +0200
28789 @@ -29,6 +29,10 @@
28790  #include <net/inet_common.h>
28791  #include <net/xfrm.h>
28792  
28793 +#include <linux/vs_limit.h>
28794 +#include <linux/vs_socket.h>
28795 +#include <linux/vs_context.h>
28796 +
28797  #ifdef CONFIG_SYSCTL
28798  #define SYNC_INIT 0 /* let the user enable it */
28799  #else
28800 @@ -295,6 +299,11 @@ void tcp_time_wait(struct sock *sk, int 
28801                 tcptw->tw_ts_recent     = tp->rx_opt.ts_recent;
28802                 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
28803  
28804 +               tw->tw_xid              = sk->sk_xid;
28805 +               tw->tw_vx_info          = NULL;
28806 +               tw->tw_nid              = sk->sk_nid;
28807 +               tw->tw_nx_info          = NULL;
28808 +
28809  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
28810                 if (tw->tw_family == PF_INET6) {
28811                         struct ipv6_pinfo *np = inet6_sk(sk);
28812 diff -NurpP --minimal linux-2.6.16.20/net/ipv4/udp.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/udp.c
28813 --- linux-2.6.16.20/net/ipv4/udp.c      2006-02-18 14:40:42 +0100
28814 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv4/udp.c 2006-05-21 23:37:20 +0200
28815 @@ -176,14 +176,12 @@ gotit:
28816                         struct inet_sock *inet2 = inet_sk(sk2);
28817  
28818                         if (inet2->num == snum &&
28819 -                           sk2 != sk &&
28820 -                           !ipv6_only_sock(sk2) &&
28821 +                           sk2 != sk && !ipv6_only_sock(sk2) &&
28822                             (!sk2->sk_bound_dev_if ||
28823                              !sk->sk_bound_dev_if ||
28824                              sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
28825 -                           (!inet2->rcv_saddr ||
28826 -                            !inet->rcv_saddr ||
28827 -                            inet2->rcv_saddr == inet->rcv_saddr) &&
28828 +                           nx_addr_conflict(sk->sk_nx_info,
28829 +                            inet_rcv_saddr(sk), sk2) &&
28830                             (!sk2->sk_reuse || !sk->sk_reuse))
28831                                 goto fail;
28832                 }
28833 @@ -218,6 +216,7 @@ static void udp_v4_unhash(struct sock *s
28834         write_unlock_bh(&udp_hash_lock);
28835  }
28836  
28837 +
28838  /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
28839   * harder than this. -DaveM
28840   */
28841 @@ -238,6 +237,11 @@ static struct sock *udp_v4_lookup_longwa
28842                                 if (inet->rcv_saddr != daddr)
28843                                         continue;
28844                                 score+=2;
28845 +                       } else if (sk->sk_nx_info) {
28846 +                               if (addr_in_nx_info(sk->sk_nx_info, daddr))
28847 +                                       score+=2;
28848 +                               else
28849 +                                       continue;
28850                         }
28851                         if (inet->daddr) {
28852                                 if (inet->daddr != saddr)
28853 @@ -294,7 +298,8 @@ static inline struct sock *udp_v4_mcast_
28854                 if (inet->num != hnum                                   ||
28855                     (inet->daddr && inet->daddr != rmt_addr)            ||
28856                     (inet->dport != rmt_port && inet->dport)            ||
28857 -                   (inet->rcv_saddr && inet->rcv_saddr != loc_addr)    ||
28858 +                   (inet->rcv_saddr && inet->rcv_saddr != loc_addr &&
28859 +                    inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr)  ||
28860                     ipv6_only_sock(s)                                   ||
28861                     (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
28862                         continue;
28863 @@ -604,6 +609,19 @@ int udp_sendmsg(struct kiocb *iocb, stru
28864                                     .uli_u = { .ports =
28865                                                { .sport = inet->sport,
28866                                                  .dport = dport } } };
28867 +               struct nx_info *nxi = sk->sk_nx_info;
28868 +
28869 +               if (nxi) {
28870 +                       err = ip_find_src(nxi, &rt, &fl);
28871 +                       if (err)
28872 +                               goto out;
28873 +                       if (daddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
28874 +                               daddr = fl.fl4_dst = nxi->ipv4[0];
28875 +#ifdef CONFIG_VSERVER_REMAP_SADDR
28876 +                       if (saddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
28877 +                               saddr = fl.fl4_src = nxi->ipv4[0];
28878 +#endif
28879 +               }
28880                 err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
28881                 if (err)
28882                         goto out;
28883 @@ -1370,8 +1388,10 @@ static struct sock *udp_get_first(struct
28884  
28885         for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
28886                 struct hlist_node *node;
28887 +
28888                 sk_for_each(sk, node, &udp_hash[state->bucket]) {
28889 -                       if (sk->sk_family == state->family)
28890 +                       if (sk->sk_family == state->family &&
28891 +                               vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
28892                                 goto found;
28893                 }
28894         }
28895 @@ -1388,7 +1408,8 @@ static struct sock *udp_get_next(struct 
28896                 sk = sk_next(sk);
28897  try_again:
28898                 ;
28899 -       } while (sk && sk->sk_family != state->family);
28900 +       } while (sk && (sk->sk_family != state->family ||
28901 +               !vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)));
28902  
28903         if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
28904                 sk = sk_head(&udp_hash[state->bucket]);
28905 diff -NurpP --minimal linux-2.6.16.20/net/ipv6/addrconf.c linux-2.6.16.20-vs2.1.1-rc22/net/ipv6/addrconf.c
28906 --- linux-2.6.16.20/net/ipv6/addrconf.c 2006-04-09 13:49:59 +0200
28907 +++ linux-2.6.16.20-vs2.1.1-rc22/net/ipv6/addrconf.c    2006-04-26 19:07:00 +0200
28908 @@ -2646,7 +2646,10 @@ static void if6_seq_stop(struct seq_file
28909  static int if6_seq_show(struct seq_file *seq, void *v)
28910  {
28911         struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
28912 -       seq_printf(seq,
28913 +
28914 +       /* no ipv6 inside a vserver for now */
28915 +       if (vx_check(0, VX_ADMIN|VX_WATCH))
28916 +               seq_printf(seq,
28917                    NIP6_SEQFMT " %02x %02x %02x %02x %8s\n",
28918                    NIP6(ifp->addr),
28919                    ifp->idev->dev->ifindex,
28920 @@ -3002,6 +3005,10 @@ static int inet6_dump_addr(struct sk_buf
28921         struct ifmcaddr6 *ifmca;
28922         struct ifacaddr6 *ifaca;
28923  
28924 +       /* no ipv6 inside a vserver for now */
28925 +       if (skb->sk && skb->sk->sk_vx_info)
28926 +               return skb->len;
28927 +
28928         s_idx = cb->args[0];
28929         s_ip_idx = ip_idx = cb->args[1];
28930         read_lock(&dev_base_lock);
28931 @@ -3207,6 +3214,10 @@ static int inet6_dump_ifinfo(struct sk_b
28932         struct net_device *dev;
28933         struct inet6_dev *idev;
28934  
28935 +       /* no ipv6 inside a vserver for now */
28936 +       if (skb->sk && skb->sk->sk_vx_info)
28937 +               return skb->len;
28938 +
28939         read_lock(&dev_base_lock);
28940         for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
28941                 if (idx < s_idx)
28942 diff -NurpP --minimal linux-2.6.16.20/net/netlink/af_netlink.c linux-2.6.16.20-vs2.1.1-rc22/net/netlink/af_netlink.c
28943 --- linux-2.6.16.20/net/netlink/af_netlink.c    2006-04-09 13:49:59 +0200
28944 +++ linux-2.6.16.20-vs2.1.1-rc22/net/netlink/af_netlink.c       2006-04-26 19:07:00 +0200
28945 @@ -56,6 +56,9 @@
28946  #include <linux/mm.h>
28947  #include <linux/types.h>
28948  #include <linux/audit.h>
28949 +#include <linux/vs_context.h>
28950 +#include <linux/vs_network.h>
28951 +#include <linux/vs_limit.h>
28952  
28953  #include <net/sock.h>
28954  #include <net/scm.h>
28955 diff -NurpP --minimal linux-2.6.16.20/net/socket.c linux-2.6.16.20-vs2.1.1-rc22/net/socket.c
28956 --- linux-2.6.16.20/net/socket.c        2006-02-18 14:40:43 +0100
28957 +++ linux-2.6.16.20-vs2.1.1-rc22/net/socket.c   2006-04-26 19:07:00 +0200
28958 @@ -96,6 +96,7 @@
28959  
28960  #include <net/sock.h>
28961  #include <linux/netfilter.h>
28962 +#include <linux/vs_socket.h>
28963  
28964  static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
28965  static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
28966 @@ -536,7 +537,7 @@ static inline int __sock_sendmsg(struct 
28967                                  struct msghdr *msg, size_t size)
28968  {
28969         struct sock_iocb *si = kiocb_to_siocb(iocb);
28970 -       int err;
28971 +       int err, len;
28972  
28973         si->sock = sock;
28974         si->scm = NULL;
28975 @@ -547,7 +548,21 @@ static inline int __sock_sendmsg(struct 
28976         if (err)
28977                 return err;
28978  
28979 -       return sock->ops->sendmsg(iocb, sock, msg, size);
28980 +       len = sock->ops->sendmsg(iocb, sock, msg, size);
28981 +       if (sock->sk) {
28982 +               if (len == size)
28983 +                       vx_sock_send(sock->sk, size);
28984 +               else
28985 +                       vx_sock_fail(sock->sk, size);
28986 +       }
28987 +       vxdprintk(VXD_CBIT(net, 7),
28988 +               "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d",
28989 +               sock, sock->sk,
28990 +               (sock->sk)?sock->sk->sk_nx_info:0,
28991 +               (sock->sk)?sock->sk->sk_vx_info:0,
28992 +               (sock->sk)?sock->sk->sk_xid:0,
28993 +               (unsigned int)size, len);
28994 +       return len;
28995  }
28996  
28997  int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
28998 @@ -585,7 +600,7 @@ int kernel_sendmsg(struct socket *sock, 
28999  static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 
29000                                  struct msghdr *msg, size_t size, int flags)
29001  {
29002 -       int err;
29003 +       int err, len;
29004         struct sock_iocb *si = kiocb_to_siocb(iocb);
29005  
29006         si->sock = sock;
29007 @@ -598,7 +613,17 @@ static inline int __sock_recvmsg(struct 
29008         if (err)
29009                 return err;
29010  
29011 -       return sock->ops->recvmsg(iocb, sock, msg, size, flags);
29012 +       len = sock->ops->recvmsg(iocb, sock, msg, size, flags);
29013 +       if ((len >= 0) && sock->sk)
29014 +               vx_sock_recv(sock->sk, len);
29015 +       vxdprintk(VXD_CBIT(net, 7),
29016 +               "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d",
29017 +               sock, sock->sk,
29018 +               (sock->sk)?sock->sk->sk_nx_info:0,
29019 +               (sock->sk)?sock->sk->sk_vx_info:0,
29020 +               (sock->sk)?sock->sk->sk_xid:0,
29021 +               (unsigned int)size, len);
29022 +       return len;
29023  }
29024  
29025  int sock_recvmsg(struct socket *sock, struct msghdr *msg, 
29026 @@ -1088,6 +1113,10 @@ static int __sock_create(int family, int
29027         if (type < 0 || type >= SOCK_MAX)
29028                 return -EINVAL;
29029  
29030 +       /* disable IPv6 inside vservers for now */
29031 +       if (family == PF_INET6 && !vx_check(0, VX_ADMIN))
29032 +               return -EAFNOSUPPORT;
29033 +
29034         /* Compatibility.
29035  
29036            This uglymoron is moved from INET layer to here to avoid
29037 @@ -1198,6 +1227,7 @@ asmlinkage long sys_socket(int family, i
29038         if (retval < 0)
29039                 goto out;
29040  
29041 +       set_bit(SOCK_USER_SOCKET, &sock->flags);
29042         retval = sock_map_fd(sock);
29043         if (retval < 0)
29044                 goto out_release;
29045 @@ -1228,10 +1258,12 @@ asmlinkage long sys_socketpair(int famil
29046         err = sock_create(family, type, protocol, &sock1);
29047         if (err < 0)
29048                 goto out;
29049 +       set_bit(SOCK_USER_SOCKET, &sock1->flags);
29050  
29051         err = sock_create(family, type, protocol, &sock2);
29052         if (err < 0)
29053                 goto out_release_1;
29054 +       set_bit(SOCK_USER_SOCKET, &sock2->flags);
29055  
29056         err = sock1->ops->socketpair(sock1, sock2);
29057         if (err < 0) 
29058 diff -NurpP --minimal linux-2.6.16.20/net/sunrpc/auth.c linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/auth.c
29059 --- linux-2.6.16.20/net/sunrpc/auth.c   2006-02-18 14:40:43 +0100
29060 +++ linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/auth.c      2006-04-26 19:07:00 +0200
29061 @@ -13,6 +13,7 @@
29062  #include <linux/errno.h>
29063  #include <linux/sunrpc/clnt.h>
29064  #include <linux/spinlock.h>
29065 +#include <linux/vs_tag.h>
29066  
29067  #ifdef RPC_DEBUG
29068  # define RPCDBG_FACILITY       RPCDBG_AUTH
29069 @@ -251,6 +252,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
29070         struct auth_cred acred = {
29071                 .uid = current->fsuid,
29072                 .gid = current->fsgid,
29073 +               .tag = dx_current_tag(),
29074                 .group_info = current->group_info,
29075         };
29076         struct rpc_cred *ret;
29077 @@ -270,6 +272,7 @@ rpcauth_bindcred(struct rpc_task *task)
29078         struct auth_cred acred = {
29079                 .uid = current->fsuid,
29080                 .gid = current->fsgid,
29081 +               .tag = dx_current_tag(),
29082                 .group_info = current->group_info,
29083         };
29084         struct rpc_cred *ret;
29085 diff -NurpP --minimal linux-2.6.16.20/net/sunrpc/auth_unix.c linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/auth_unix.c
29086 --- linux-2.6.16.20/net/sunrpc/auth_unix.c      2006-02-18 14:40:43 +0100
29087 +++ linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/auth_unix.c 2006-04-26 19:07:00 +0200
29088 @@ -11,12 +11,14 @@
29089  #include <linux/module.h>
29090  #include <linux/sunrpc/clnt.h>
29091  #include <linux/sunrpc/auth.h>
29092 +#include <linux/vs_tag.h>
29093  
29094  #define NFS_NGROUPS    16
29095  
29096  struct unx_cred {
29097         struct rpc_cred         uc_base;
29098         gid_t                   uc_gid;
29099 +       tag_t                   uc_tag;
29100         gid_t                   uc_gids[NFS_NGROUPS];
29101  };
29102  #define uc_uid                 uc_base.cr_uid
29103 @@ -78,6 +80,7 @@ unx_create_cred(struct rpc_auth *auth, s
29104         if (flags & RPCAUTH_LOOKUP_ROOTCREDS) {
29105                 cred->uc_uid = 0;
29106                 cred->uc_gid = 0;
29107 +               cred->uc_tag = dx_current_tag();
29108                 cred->uc_gids[0] = NOGROUP;
29109         } else {
29110                 int groups = acred->group_info->ngroups;
29111 @@ -86,6 +89,7 @@ unx_create_cred(struct rpc_auth *auth, s
29112  
29113                 cred->uc_uid = acred->uid;
29114                 cred->uc_gid = acred->gid;
29115 +               cred->uc_tag = acred->tag;
29116                 for (i = 0; i < groups; i++)
29117                         cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
29118                 if (i < NFS_NGROUPS)
29119 @@ -117,7 +121,8 @@ unx_match(struct auth_cred *acred, struc
29120                 int groups;
29121  
29122                 if (cred->uc_uid != acred->uid
29123 -                || cred->uc_gid != acred->gid)
29124 +                || cred->uc_gid != acred->gid
29125 +                || cred->uc_tag != acred->tag)
29126                         return 0;
29127  
29128                 groups = acred->group_info->ngroups;
29129 @@ -143,7 +148,7 @@ unx_marshal(struct rpc_task *task, u32 *
29130         struct rpc_clnt *clnt = task->tk_client;
29131         struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred;
29132         u32             *base, *hold;
29133 -       int             i;
29134 +       int             i, tag;
29135  
29136         *p++ = htonl(RPC_AUTH_UNIX);
29137         base = p++;
29138 @@ -153,9 +158,12 @@ unx_marshal(struct rpc_task *task, u32 *
29139          * Copy the UTS nodename captured when the client was created.
29140          */
29141         p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
29142 +       tag = task->tk_client->cl_tag;
29143  
29144 -       *p++ = htonl((u32) cred->uc_uid);
29145 -       *p++ = htonl((u32) cred->uc_gid);
29146 +       *p++ = htonl((u32) TAGINO_UID(tag,
29147 +               cred->uc_uid, cred->uc_tag));
29148 +       *p++ = htonl((u32) TAGINO_GID(tag,
29149 +               cred->uc_gid, cred->uc_tag));
29150         hold = p++;
29151         for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++)
29152                 *p++ = htonl((u32) cred->uc_gids[i]);
29153 diff -NurpP --minimal linux-2.6.16.20/net/sunrpc/clnt.c linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/clnt.c
29154 --- linux-2.6.16.20/net/sunrpc/clnt.c   2006-04-09 13:50:00 +0200
29155 +++ linux-2.6.16.20-vs2.1.1-rc22/net/sunrpc/clnt.c      2006-04-26 19:07:00 +0200
29156 @@ -34,6 +34,7 @@
29157  #include <linux/sunrpc/rpc_pipe_fs.h>
29158  
29159  #include <linux/nfs.h>
29160 +#include <linux/vs_cvirt.h>
29161  
29162  
29163  #define RPC_SLACK_SPACE                (1024)  /* total overkill */
29164 @@ -168,10 +169,10 @@ rpc_new_client(struct rpc_xprt *xprt, ch
29165         }
29166  
29167         /* save the nodename */
29168 -       clnt->cl_nodelen = strlen(system_utsname.nodename);
29169 +       clnt->cl_nodelen = strlen(vx_new_uts(nodename));
29170         if (clnt->cl_nodelen > UNX_MAXNODENAME)
29171                 clnt->cl_nodelen = UNX_MAXNODENAME;
29172 -       memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
29173 +       memcpy(clnt->cl_nodename, vx_new_uts(nodename), clnt->cl_nodelen);
29174         return clnt;
29175  
29176  out_no_auth:
29177 diff -NurpP --minimal linux-2.6.16.20/net/unix/af_unix.c linux-2.6.16.20-vs2.1.1-rc22/net/unix/af_unix.c
29178 --- linux-2.6.16.20/net/unix/af_unix.c  2006-04-09 13:50:00 +0200
29179 +++ linux-2.6.16.20-vs2.1.1-rc22/net/unix/af_unix.c     2006-04-26 19:07:00 +0200
29180 @@ -117,6 +117,9 @@
29181  #include <linux/mount.h>
29182  #include <net/checksum.h>
29183  #include <linux/security.h>
29184 +#include <linux/vs_context.h>
29185 +#include <linux/vs_network.h>
29186 +#include <linux/vs_limit.h>
29187  
29188  int sysctl_unix_max_dgram_qlen = 10;
29189  
29190 @@ -235,6 +238,8 @@ static struct sock *__unix_find_socket_b
29191         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
29192                 struct unix_sock *u = unix_sk(s);
29193  
29194 +               if (!vx_check(s->sk_xid, VX_IDENT|VX_WATCH))
29195 +                       continue;
29196                 if (u->addr->len == len &&
29197                     !memcmp(u->addr->name, sunname, len))
29198                         goto found;
29199 @@ -781,7 +786,7 @@ static int unix_bind(struct socket *sock
29200                  */
29201                 mode = S_IFSOCK |
29202                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
29203 -               err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
29204 +               err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0, NULL);
29205                 if (err)
29206                         goto out_mknod_dput;
29207                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
29208 diff -NurpP --minimal linux-2.6.16.20/net/x25/af_x25.c linux-2.6.16.20-vs2.1.1-rc22/net/x25/af_x25.c
29209 --- linux-2.6.16.20/net/x25/af_x25.c    2006-02-18 14:40:43 +0100
29210 +++ linux-2.6.16.20-vs2.1.1-rc22/net/x25/af_x25.c       2006-04-26 19:07:00 +0200
29211 @@ -491,7 +491,10 @@ static int x25_create(struct socket *soc
29212  
29213         x25 = x25_sk(sk);
29214  
29215 -       sock_init_data(sock, sk);
29216 +       sk->sk_socket = sock;
29217 +       sk->sk_type = sock->type;
29218 +       sk->sk_sleep = &sock->wait;
29219 +       sock->sk = sk;
29220  
29221         x25_init_timers(sk);
29222  
29223 diff -NurpP --minimal linux-2.6.16.20/security/commoncap.c linux-2.6.16.20-vs2.1.1-rc22/security/commoncap.c
29224 --- linux-2.6.16.20/security/commoncap.c        2006-02-18 14:40:44 +0100
29225 +++ linux-2.6.16.20-vs2.1.1-rc22/security/commoncap.c   2006-04-28 04:54:41 +0200
29226 @@ -27,7 +27,7 @@
29227  
29228  int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
29229  {
29230 -       NETLINK_CB(skb).eff_cap = current->cap_effective;
29231 +       cap_t(NETLINK_CB(skb).eff_cap) = vx_mbcap(cap_effective);
29232         return 0;
29233  }
29234  
29235 @@ -45,7 +45,7 @@ EXPORT_SYMBOL(cap_netlink_recv);
29236  int cap_capable (struct task_struct *tsk, int cap)
29237  {
29238         /* Derived from include/linux/sched.h:capable. */
29239 -       if (cap_raised(tsk->cap_effective, cap))
29240 +       if (vx_cap_raised(tsk->vx_info, tsk->cap_effective, cap))
29241                 return 0;
29242         return -EPERM;
29243  }
29244 @@ -143,7 +143,8 @@ void cap_bprm_apply_creds (struct linux_
29245         /* Derived from fs/exec.c:compute_creds. */
29246         kernel_cap_t new_permitted, working;
29247  
29248 -       new_permitted = cap_intersect (bprm->cap_permitted, cap_bset);
29249 +       new_permitted = cap_intersect (bprm->cap_permitted,
29250 +                                       vx_current_cap_bset());
29251         working = cap_intersect (bprm->cap_inheritable,
29252                                  current->cap_inheritable);
29253         new_permitted = cap_combine (new_permitted, working);
29254 @@ -312,7 +313,8 @@ void cap_task_reparent_to_init (struct t
29255  
29256  int cap_syslog (int type)
29257  {
29258 -       if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
29259 +       if ((type != 3 && type != 10) &&
29260 +               !vx_capable(CAP_SYS_ADMIN, VXC_SYSLOG))
29261                 return -EPERM;
29262         return 0;
29263  }
29264 diff -NurpP --minimal linux-2.6.16.20/security/dummy.c linux-2.6.16.20-vs2.1.1-rc22/security/dummy.c
29265 --- linux-2.6.16.20/security/dummy.c    2006-04-09 13:50:00 +0200
29266 +++ linux-2.6.16.20-vs2.1.1-rc22/security/dummy.c       2006-04-27 20:29:01 +0200
29267 @@ -85,7 +85,7 @@ static int dummy_sysctl (ctl_table * tab
29268         return 0;
29269  }
29270  
29271 -static int dummy_quotactl (int cmds, int type, int id, struct super_block *sb)
29272 +static int dummy_quotactl (int cmds, int type, int id, struct dqhash *hash)
29273  {
29274         return 0;
29275  }
29276 @@ -656,7 +656,7 @@ static int dummy_sem_semop (struct sem_a
29277  
29278  static int dummy_netlink_send (struct sock *sk, struct sk_buff *skb)
29279  {
29280 -       NETLINK_CB(skb).eff_cap = current->cap_effective;
29281 +       cap_t(NETLINK_CB(skb).eff_cap) = vx_mbcap(cap_effective);
29282         return 0;
29283  }
29284  
29285 diff -NurpP --minimal linux-2.6.16.20/security/security.c linux-2.6.16.20-vs2.1.1-rc22/security/security.c
29286 --- linux-2.6.16.20/security/security.c 2006-02-18 14:40:44 +0100
29287 +++ linux-2.6.16.20-vs2.1.1-rc22/security/security.c    2006-04-27 21:33:12 +0200
29288 @@ -186,6 +186,8 @@ int mod_unreg_security(const char *name,
29289   */
29290  int capable(int cap)
29291  {
29292 +       if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
29293 +               return 0;
29294         if (security_ops->capable(current, cap)) {
29295                 /* capability denied */
29296                 return 0;
29297 @@ -196,6 +198,7 @@ int capable(int cap)
29298         return 1;
29299  }
29300  
29301 +
29302  EXPORT_SYMBOL_GPL(register_security);
29303  EXPORT_SYMBOL_GPL(unregister_security);
29304  EXPORT_SYMBOL_GPL(mod_reg_security);
29305 diff -NurpP --minimal linux-2.6.16.20/security/selinux/hooks.c linux-2.6.16.20-vs2.1.1-rc22/security/selinux/hooks.c
29306 --- linux-2.6.16.20/security/selinux/hooks.c    2006-04-09 13:50:00 +0200
29307 +++ linux-2.6.16.20-vs2.1.1-rc22/security/selinux/hooks.c       2006-04-26 19:07:00 +0200
29308 @@ -1348,9 +1348,10 @@ static int selinux_sysctl(ctl_table *tab
29309         return error;
29310  }
29311  
29312 -static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb)
29313 +static int selinux_quotactl(int cmds, int type, int id, struct dqhash *hash)
29314  {
29315         int rc = 0;
29316 +       struct super_block *sb = hash->dqh_sb;
29317  
29318         if (!sb)
29319                 return 0;
This page took 2.316766 seconds and 3 git commands to generate.