1 diff -urNp -x '*.orig' linux-4.4/Documentation/vserver/debug.txt linux-4.4/Documentation/vserver/debug.txt
2 --- linux-4.4/Documentation/vserver/debug.txt 1970-01-01 01:00:00.000000000 +0100
3 +++ linux-4.4/Documentation/vserver/debug.txt 2021-02-24 16:56:24.542821396 +0100
8 + 2 4 "vx_map_tgid: %p/%llx: %d -> %d"
9 + "vx_rmap_tgid: %p/%llx: %d -> %d"
13 + 0 1 "ALLOC (%p,#%d)%c inode (%d)"
14 + "FREE (%p,#%d)%c inode"
15 + 1 2 "ALLOC (%p,#%d)%c %lld bytes (%d)"
16 + "FREE (%p,#%d)%c %lld bytes"
17 + 2 4 "ADJUST: %lld,%lld on %ld,%ld [mult=%d]"
18 + 3 8 "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d"
19 + "ext3_has_free_blocks(%p): free=%lu, root=%lu"
20 + "rcu_free_dl_info(%p)"
21 + 4 10 "alloc_dl_info(%p,%d) = %p"
22 + "dealloc_dl_info(%p)"
23 + "get_dl_info(%p[#%d.%d])"
24 + "put_dl_info(%p[#%d.%d])"
25 + 5 20 "alloc_dl_info(%p,%d)*"
26 + 6 40 "__hash_dl_info: %p[#%d]"
27 + "__unhash_dl_info: %p[#%d]"
28 + 7 80 "locate_dl_info(%p,#%d) = %p"
32 + 0 1 "destroy_dqhash: %p [#0x%08x] c=%d"
33 + "new_dqhash: %p [#0x%08x]"
34 + "vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]"
35 + "vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]"
36 + "vroot[%d]_set_dev: dev=%p[%lu,%d:%d]"
37 + "vroot_get_real_bdev not set"
38 + 1 2 "cow_break_link(?%s?)"
40 + 2 4 "dentry_open(new): %p"
41 + "dentry_open(old): %p"
42 + "lookup_create(new): %p"
44 + "path_lookup(old): %d"
45 + "vfs_create(new): %d"
48 + 3 8 "fput(new_file=%p[#%d])"
49 + "fput(old_file=%p[#%d])"
50 + 4 10 "vx_info_kill(%p[#%d],%d,%d) = %d"
51 + "vx_info_kill(%p[#%d],%d,%d)*"
52 + 5 20 "vs_reboot(%p[#%d],%d)"
53 + 6 40 "dropping task %p[#%u,%u] for %p[#%u,%u]"
57 + 2 4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d"
58 + 3 8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
59 + "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
60 + 4 10 "ip_route_connect(%p) %p,%p;%lx"
61 + 5 20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
62 + 6 40 "sk,egf: %p [#%d] (from %d)"
63 + "sk,egn: %p [#%d] (from %d)"
64 + "sk,req: %p [#%d] (from %d)"
65 + "sk: %p [#%d] (from %d)"
66 + "tw: %p [#%d] (from %d)"
67 + 7 80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
68 + "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
72 + 0 1 "__lookup_nx_info(#%u): %p[#%u]"
73 + "alloc_nx_info(%d) = %p"
74 + "create_nx_info(%d) (dynamic rejected)"
75 + "create_nx_info(%d) = %p (already there)"
76 + "create_nx_info(%d) = %p (new)"
77 + "dealloc_nx_info(%p)"
78 + 1 2 "alloc_nx_info(%d)*"
79 + "create_nx_info(%d)*"
80 + 2 4 "get_nx_info(%p[#%d.%d])"
81 + "put_nx_info(%p[#%d.%d])"
82 + 3 8 "claim_nx_info(%p[#%d.%d.%d]) %p"
83 + "clr_nx_info(%p[#%d.%d])"
84 + "init_nx_info(%p[#%d.%d])"
85 + "release_nx_info(%p[#%d.%d.%d]) %p"
86 + "set_nx_info(%p[#%d.%d])"
87 + 4 10 "__hash_nx_info: %p[#%d]"
88 + "__nx_dynamic_id: [#%d]"
89 + "__unhash_nx_info: %p[#%d.%d.%d]"
90 + 5 20 "moved task %p into nxi:%p[#%d]"
91 + "nx_migrate_task(%p,%p[#%d.%d.%d])"
92 + "task_get_nx_info(%p)"
93 + 6 40 "nx_clear_persistent(%p[#%d])"
97 + 0 1 "quota_sync_dqh(%p,%d) discard inode %p"
98 + 1 2 "quota_sync_dqh(%p,%d)"
99 + "sync_dquots(%p,%d)"
100 + "sync_dquots_dqh(%p,%d)"
101 + 3 8 "do_quotactl(%p,%d,cmd=%d,id=%d,%p)"
105 + 0 1 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]"
106 + 1 2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]"
107 + 4 10 "%s: (%s %s) returned %s with %d"
111 + 7 80 "dx_parse_tag(?%s?): %d:#%d"
112 + "dx_propagate_tag(%p[#%lu.%d]): %d,%d"
116 + 0 1 "__lookup_vx_info(#%u): %p[#%u]"
117 + "alloc_vx_info(%d) = %p"
118 + "alloc_vx_info(%d)*"
119 + "create_vx_info(%d) (dynamic rejected)"
120 + "create_vx_info(%d) = %p (already there)"
121 + "create_vx_info(%d) = %p (new)"
122 + "dealloc_vx_info(%p)"
123 + "loc_vx_info(%d) = %p (found)"
124 + "loc_vx_info(%d) = %p (new)"
125 + "loc_vx_info(%d) = %p (not available)"
126 + 1 2 "create_vx_info(%d)*"
128 + 2 4 "get_vx_info(%p[#%d.%d])"
129 + "put_vx_info(%p[#%d.%d])"
130 + 3 8 "claim_vx_info(%p[#%d.%d.%d]) %p"
131 + "clr_vx_info(%p[#%d.%d])"
132 + "init_vx_info(%p[#%d.%d])"
133 + "release_vx_info(%p[#%d.%d.%d]) %p"
134 + "set_vx_info(%p[#%d.%d])"
135 + 4 10 "__hash_vx_info: %p[#%d]"
136 + "__unhash_vx_info: %p[#%d.%d.%d]"
137 + "__vx_dynamic_id: [#%d]"
138 + 5 20 "enter_vx_info(%p[#%d],%p) %p[#%d,%p]"
139 + "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]"
140 + "moved task %p into vxi:%p[#%d]"
141 + "task_get_vx_info(%p)"
142 + "vx_migrate_task(%p,%p[#%d.%d])"
143 + 6 40 "vx_clear_persistent(%p[#%d])"
144 + "vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
145 + "vx_set_init(%p[#%d],%p[#%d,%d,%d])"
146 + "vx_set_persistent(%p[#%d])"
147 + "vx_set_reaper(%p[#%d],%p[#%d,%d])"
148 + 7 80 "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]"
153 + n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s"
154 + "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
156 + m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s"
157 + "vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
158 + "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
159 diff -urNp -x '*.orig' linux-4.4/arch/alpha/Kconfig linux-4.4/arch/alpha/Kconfig
160 --- linux-4.4/arch/alpha/Kconfig 2016-01-11 00:01:32.000000000 +0100
161 +++ linux-4.4/arch/alpha/Kconfig 2021-02-24 16:56:24.532821082 +0100
162 @@ -745,6 +745,8 @@ config DUMMY_CONSOLE
166 +source "kernel/vserver/Kconfig"
168 source "security/Kconfig"
170 source "crypto/Kconfig"
171 diff -urNp -x '*.orig' linux-4.4/arch/alpha/kernel/systbls.S linux-4.4/arch/alpha/kernel/systbls.S
172 --- linux-4.4/arch/alpha/kernel/systbls.S 2016-01-11 00:01:32.000000000 +0100
173 +++ linux-4.4/arch/alpha/kernel/systbls.S 2021-02-24 16:56:24.532821082 +0100
174 @@ -446,7 +446,7 @@ sys_call_table:
175 .quad sys_stat64 /* 425 */
178 - .quad sys_ni_syscall /* sys_vserver */
179 + .quad sys_vserver /* sys_vserver */
180 .quad sys_ni_syscall /* sys_mbind */
181 .quad sys_ni_syscall /* sys_get_mempolicy */
182 .quad sys_ni_syscall /* sys_set_mempolicy */
183 diff -urNp -x '*.orig' linux-4.4/arch/alpha/kernel/traps.c linux-4.4/arch/alpha/kernel/traps.c
184 --- linux-4.4/arch/alpha/kernel/traps.c 2016-01-11 00:01:32.000000000 +0100
185 +++ linux-4.4/arch/alpha/kernel/traps.c 2021-02-24 16:56:24.532821082 +0100
186 @@ -174,7 +174,8 @@ die_if_kernel(char * str, struct pt_regs
188 printk("CPU %d ", hard_smp_processor_id());
190 - printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
191 + printk("%s(%d:#%u): %s %ld\n", current->comm,
192 + task_pid_nr(current), current->xid, str, err);
193 dik_show_regs(regs, r9_15);
194 add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
195 dik_show_trace((unsigned long *)(regs+1));
196 diff -urNp -x '*.orig' linux-4.4/arch/arm/Kconfig linux-4.4/arch/arm/Kconfig
197 --- linux-4.4/arch/arm/Kconfig 2021-02-24 16:56:10.302372928 +0100
198 +++ linux-4.4/arch/arm/Kconfig 2021-02-24 16:56:24.532821082 +0100
199 @@ -2163,6 +2163,8 @@ source "fs/Kconfig"
201 source "arch/arm/Kconfig.debug"
203 +source "kernel/vserver/Kconfig"
205 source "security/Kconfig"
207 source "crypto/Kconfig"
208 diff -urNp -x '*.orig' linux-4.4/arch/arm/kernel/calls.S linux-4.4/arch/arm/kernel/calls.S
209 --- linux-4.4/arch/arm/kernel/calls.S 2016-01-11 00:01:32.000000000 +0100
210 +++ linux-4.4/arch/arm/kernel/calls.S 2021-02-24 16:56:24.532821082 +0100
212 /* 310 */ CALL(sys_request_key)
214 CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
215 -/* vserver */ CALL(sys_ni_syscall)
218 /* 315 */ CALL(sys_ioprio_get)
219 CALL(sys_inotify_init)
220 diff -urNp -x '*.orig' linux-4.4/arch/arm/kernel/traps.c linux-4.4/arch/arm/kernel/traps.c
221 --- linux-4.4/arch/arm/kernel/traps.c 2021-02-24 16:56:10.335707312 +0100
222 +++ linux-4.4/arch/arm/kernel/traps.c 2021-02-24 16:56:24.536154520 +0100
223 @@ -259,8 +259,8 @@ static int __die(const char *str, int er
227 - pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
228 - TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
229 + pr_emerg("Process %.*s (pid: %d:%u, stack limit = 0x%p)\n",
230 + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), tsk->xid, end_of_stack(tsk));
232 if (!user_mode(regs) || in_interrupt()) {
233 dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
234 diff -urNp -x '*.orig' linux-4.4/arch/cris/Kconfig linux-4.4/arch/cris/Kconfig
235 --- linux-4.4/arch/cris/Kconfig 2016-01-11 00:01:32.000000000 +0100
236 +++ linux-4.4/arch/cris/Kconfig 2021-02-24 16:56:24.536154520 +0100
237 @@ -581,6 +581,8 @@ source "fs/Kconfig"
239 source "arch/cris/Kconfig.debug"
241 +source "kernel/vserver/Kconfig"
243 source "security/Kconfig"
245 source "crypto/Kconfig"
246 diff -urNp -x '*.orig' linux-4.4/arch/ia64/Kconfig linux-4.4/arch/ia64/Kconfig
247 --- linux-4.4/arch/ia64/Kconfig 2016-01-11 00:01:32.000000000 +0100
248 +++ linux-4.4/arch/ia64/Kconfig 2021-02-24 16:56:24.536154520 +0100
249 @@ -606,6 +606,8 @@ source "fs/Kconfig"
251 source "arch/ia64/Kconfig.debug"
253 +source "kernel/vserver/Kconfig"
255 source "security/Kconfig"
257 source "crypto/Kconfig"
258 diff -urNp -x '*.orig' linux-4.4/arch/ia64/kernel/entry.S linux-4.4/arch/ia64/kernel/entry.S
259 --- linux-4.4/arch/ia64/kernel/entry.S 2016-01-11 00:01:32.000000000 +0100
260 +++ linux-4.4/arch/ia64/kernel/entry.S 2021-02-24 16:56:24.536154520 +0100
261 @@ -1694,7 +1694,7 @@ sys_call_table:
263 data8 sys_mq_getsetattr
265 - data8 sys_ni_syscall // reserved for vserver
267 data8 sys_waitid // 1270
269 data8 sys_request_key
270 diff -urNp -x '*.orig' linux-4.4/arch/ia64/kernel/ptrace.c linux-4.4/arch/ia64/kernel/ptrace.c
271 --- linux-4.4/arch/ia64/kernel/ptrace.c 2016-01-11 00:01:32.000000000 +0100
272 +++ linux-4.4/arch/ia64/kernel/ptrace.c 2021-02-24 16:56:24.536154520 +0100
274 #include <linux/regset.h>
275 #include <linux/elf.h>
276 #include <linux/tracehook.h>
277 +#include <linux/vs_base.h>
279 #include <asm/pgtable.h>
280 #include <asm/processor.h>
281 diff -urNp -x '*.orig' linux-4.4/arch/ia64/kernel/traps.c linux-4.4/arch/ia64/kernel/traps.c
282 --- linux-4.4/arch/ia64/kernel/traps.c 2016-01-11 00:01:32.000000000 +0100
283 +++ linux-4.4/arch/ia64/kernel/traps.c 2021-02-24 16:56:24.536154520 +0100
284 @@ -60,8 +60,9 @@ die (const char *str, struct pt_regs *re
287 if (++die.lock_owner_depth < 3) {
288 - printk("%s[%d]: %s %ld [%d]\n",
289 - current->comm, task_pid_nr(current), str, err, ++die_counter);
290 + printk("%s[%d:#%u]: %s %ld [%d]\n",
291 + current->comm, task_pid_nr(current), current->xid,
292 + str, err, ++die_counter);
293 if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
296 @@ -324,8 +325,9 @@ handle_fpu_swa (int fp_fault, struct pt_
297 if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
298 last.time = current_jiffies + 5 * HZ;
300 - "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
301 - current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
302 + "%s(%d:#%u): floating-point assist fault at ip %016lx, isr %016lx\n",
303 + current->comm, task_pid_nr(current), current->xid,
304 + regs->cr_iip + ia64_psr(regs)->ri, isr);
308 diff -urNp -x '*.orig' linux-4.4/arch/m32r/kernel/traps.c linux-4.4/arch/m32r/kernel/traps.c
309 --- linux-4.4/arch/m32r/kernel/traps.c 2016-01-11 00:01:32.000000000 +0100
310 +++ linux-4.4/arch/m32r/kernel/traps.c 2021-02-24 16:56:24.536154520 +0100
311 @@ -184,8 +184,9 @@ static void show_registers(struct pt_reg
313 printk("SPI: %08lx\n", sp);
315 - printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
316 - current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
317 + printk("Process %s (pid: %d:#%u, process nr: %d, stackpage=%08lx)",
318 + current->comm, task_pid_nr(current), current->xid,
319 + 0xffff & i, 4096+(unsigned long)current);
322 * When in-kernel, we also print out the stack and code at the
323 diff -urNp -x '*.orig' linux-4.4/arch/m68k/Kconfig linux-4.4/arch/m68k/Kconfig
324 --- linux-4.4/arch/m68k/Kconfig 2016-01-11 00:01:32.000000000 +0100
325 +++ linux-4.4/arch/m68k/Kconfig 2021-02-24 16:56:24.536154520 +0100
326 @@ -164,6 +164,8 @@ source "fs/Kconfig"
328 source "arch/m68k/Kconfig.debug"
330 +source "kernel/vserver/Kconfig"
332 source "security/Kconfig"
334 source "crypto/Kconfig"
335 diff -urNp -x '*.orig' linux-4.4/arch/mips/Kconfig linux-4.4/arch/mips/Kconfig
336 --- linux-4.4/arch/mips/Kconfig 2021-02-24 16:56:10.382375448 +0100
337 +++ linux-4.4/arch/mips/Kconfig 2021-02-24 16:56:24.536154520 +0100
338 @@ -3031,6 +3031,8 @@ source "fs/Kconfig"
340 source "arch/mips/Kconfig.debug"
342 +source "kernel/vserver/Kconfig"
344 source "security/Kconfig"
346 source "crypto/Kconfig"
347 diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/ptrace.c linux-4.4/arch/mips/kernel/ptrace.c
348 --- linux-4.4/arch/mips/kernel/ptrace.c 2021-02-24 16:56:10.399042639 +0100
349 +++ linux-4.4/arch/mips/kernel/ptrace.c 2021-02-24 16:56:24.536154520 +0100
351 #include <linux/audit.h>
352 #include <linux/seccomp.h>
353 #include <linux/ftrace.h>
354 +#include <linux/vs_base.h>
356 #include <asm/byteorder.h>
358 @@ -797,6 +798,9 @@ long arch_ptrace(struct task_struct *chi
359 void __user *datavp = (void __user *) data;
360 unsigned long __user *datalp = (void __user *) data;
362 + if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
366 /* when I and D space are separate, these will need to be fixed. */
367 case PTRACE_PEEKTEXT: /* read word at location addr. */
368 diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/scall32-o32.S linux-4.4/arch/mips/kernel/scall32-o32.S
369 --- linux-4.4/arch/mips/kernel/scall32-o32.S 2021-02-24 16:56:10.402376078 +0100
370 +++ linux-4.4/arch/mips/kernel/scall32-o32.S 2021-02-24 16:56:24.536154520 +0100
371 @@ -511,7 +511,7 @@ EXPORT(sys_call_table)
372 PTR sys_mq_timedreceive
373 PTR sys_mq_notify /* 4275 */
374 PTR sys_mq_getsetattr
375 - PTR sys_ni_syscall /* sys_vserver */
378 PTR sys_ni_syscall /* available, was setaltroot */
379 PTR sys_add_key /* 4280 */
380 diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/scall64-64.S linux-4.4/arch/mips/kernel/scall64-64.S
381 --- linux-4.4/arch/mips/kernel/scall64-64.S 2021-02-24 16:56:10.402376078 +0100
382 +++ linux-4.4/arch/mips/kernel/scall64-64.S 2021-02-24 16:56:24.536154520 +0100
383 @@ -348,7 +348,7 @@ EXPORT(sys_call_table)
384 PTR sys_mq_timedreceive
386 PTR sys_mq_getsetattr /* 5235 */
387 - PTR sys_ni_syscall /* sys_vserver */
390 PTR sys_ni_syscall /* available, was setaltroot */
392 diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/scall64-n32.S linux-4.4/arch/mips/kernel/scall64-n32.S
393 --- linux-4.4/arch/mips/kernel/scall64-n32.S 2021-02-24 16:56:10.402376078 +0100
394 +++ linux-4.4/arch/mips/kernel/scall64-n32.S 2021-02-24 16:56:24.536154520 +0100
395 @@ -343,7 +343,7 @@ EXPORT(sysn32_call_table)
396 PTR compat_sys_mq_timedreceive
397 PTR compat_sys_mq_notify
398 PTR compat_sys_mq_getsetattr
399 - PTR sys_ni_syscall /* 6240, sys_vserver */
400 + PTR sys32_vserver /* 6240 */
401 PTR compat_sys_waitid
402 PTR sys_ni_syscall /* available, was setaltroot */
404 diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/scall64-o32.S linux-4.4/arch/mips/kernel/scall64-o32.S
405 --- linux-4.4/arch/mips/kernel/scall64-o32.S 2021-02-24 16:56:10.402376078 +0100
406 +++ linux-4.4/arch/mips/kernel/scall64-o32.S 2021-02-24 16:56:24.539487958 +0100
407 @@ -499,7 +499,7 @@ EXPORT(sys32_call_table)
408 PTR compat_sys_mq_timedreceive
409 PTR compat_sys_mq_notify /* 4275 */
410 PTR compat_sys_mq_getsetattr
411 - PTR sys_ni_syscall /* sys_vserver */
413 PTR compat_sys_waitid
414 PTR sys_ni_syscall /* available, was setaltroot */
415 PTR sys_add_key /* 4280 */
416 diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/traps.c linux-4.4/arch/mips/kernel/traps.c
417 --- linux-4.4/arch/mips/kernel/traps.c 2021-02-24 16:56:10.402376078 +0100
418 +++ linux-4.4/arch/mips/kernel/traps.c 2021-02-24 16:56:24.539487958 +0100
419 @@ -354,9 +354,10 @@ void show_registers(struct pt_regs *regs
423 - printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
424 - current->comm, current->pid, current_thread_info(), current,
425 - field, current_thread_info()->tp_value);
426 + printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p, tls=%0*lx)\n",
427 + current->comm, task_pid_nr(current), current->xid,
428 + current_thread_info(), current,
429 + field, current_thread_info()->tp_value);
430 if (cpu_has_userlocal) {
433 diff -urNp -x '*.orig' linux-4.4/arch/parisc/Kconfig linux-4.4/arch/parisc/Kconfig
434 --- linux-4.4/arch/parisc/Kconfig 2021-02-24 16:56:10.415709831 +0100
435 +++ linux-4.4/arch/parisc/Kconfig 2021-02-24 16:56:24.539487958 +0100
436 @@ -341,6 +341,8 @@ config SECCOMP
438 If unsure, say Y. Only embedded should say N here.
440 +source "kernel/vserver/Kconfig"
442 source "security/Kconfig"
444 source "crypto/Kconfig"
445 diff -urNp -x '*.orig' linux-4.4/arch/parisc/kernel/syscall_table.S linux-4.4/arch/parisc/kernel/syscall_table.S
446 --- linux-4.4/arch/parisc/kernel/syscall_table.S 2021-02-24 16:56:10.422376707 +0100
447 +++ linux-4.4/arch/parisc/kernel/syscall_table.S 2021-02-24 16:56:24.539487958 +0100
449 ENTRY_COMP(mbind) /* 260 */
450 ENTRY_COMP(get_mempolicy)
451 ENTRY_COMP(set_mempolicy)
452 - ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */
453 + ENTRY_DIFF(vserver)
455 ENTRY_SAME(request_key) /* 265 */
457 diff -urNp -x '*.orig' linux-4.4/arch/parisc/kernel/traps.c linux-4.4/arch/parisc/kernel/traps.c
458 --- linux-4.4/arch/parisc/kernel/traps.c 2021-02-24 16:56:10.422376707 +0100
459 +++ linux-4.4/arch/parisc/kernel/traps.c 2021-02-24 16:56:24.539487958 +0100
460 @@ -235,8 +235,9 @@ void die_if_kernel(char *str, struct pt_
463 parisc_printk_ratelimited(1, regs,
464 - KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
465 - current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
466 + KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n",
467 + current->comm, task_pid_nr(current), current->xid,
468 + str, err, regs->iaoq[0]);
472 @@ -266,8 +267,8 @@ void die_if_kernel(char *str, struct pt_
473 pdc_console_restart();
476 - printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
477 - current->comm, task_pid_nr(current), str, err);
478 + printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n",
479 + current->comm, task_pid_nr(current), current->xid, str, err);
481 /* Wot's wrong wif bein' racy? */
482 if (current->thread.flags & PARISC_KERNEL_DEATH) {
483 diff -urNp -x '*.orig' linux-4.4/arch/powerpc/Kconfig linux-4.4/arch/powerpc/Kconfig
484 --- linux-4.4/arch/powerpc/Kconfig 2021-02-24 16:56:10.422376707 +0100
485 +++ linux-4.4/arch/powerpc/Kconfig 2021-02-24 16:56:24.539487958 +0100
486 @@ -1087,6 +1087,8 @@ source "lib/Kconfig"
488 source "arch/powerpc/Kconfig.debug"
490 +source "kernel/vserver/Kconfig"
492 source "security/Kconfig"
494 source "crypto/Kconfig"
495 diff -urNp -x '*.orig' linux-4.4/arch/powerpc/include/uapi/asm/unistd.h linux-4.4/arch/powerpc/include/uapi/asm/unistd.h
496 --- linux-4.4/arch/powerpc/include/uapi/asm/unistd.h 2016-01-11 00:01:32.000000000 +0100
497 +++ linux-4.4/arch/powerpc/include/uapi/asm/unistd.h 2021-02-24 16:56:24.539487958 +0100
500 #define __NR_rtas 255
501 #define __NR_sys_debug_setcontext 256
502 -/* Number 257 is reserved for vserver */
503 +#define __NR_vserver 257
504 #define __NR_migrate_pages 258
505 #define __NR_mbind 259
506 #define __NR_get_mempolicy 260
507 diff -urNp -x '*.orig' linux-4.4/arch/powerpc/kernel/traps.c linux-4.4/arch/powerpc/kernel/traps.c
508 --- linux-4.4/arch/powerpc/kernel/traps.c 2021-02-24 16:56:10.445710775 +0100
509 +++ linux-4.4/arch/powerpc/kernel/traps.c 2021-02-24 16:56:24.539487958 +0100
510 @@ -1315,8 +1315,9 @@ void nonrecoverable_exception(struct pt_
512 void trace_syscall(struct pt_regs *regs)
514 - printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n",
515 - current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
516 + printk("Task: %p(%d:#%u), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n",
517 + current, task_pid_nr(current), current->xid,
518 + regs->nip, regs->link, regs->gpr[0],
519 regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
522 diff -urNp -x '*.orig' linux-4.4/arch/s390/Kconfig linux-4.4/arch/s390/Kconfig
523 --- linux-4.4/arch/s390/Kconfig 2021-02-24 16:56:10.465711405 +0100
524 +++ linux-4.4/arch/s390/Kconfig 2021-02-24 16:56:24.539487958 +0100
525 @@ -776,6 +776,8 @@ source "fs/Kconfig"
527 source "arch/s390/Kconfig.debug"
529 +source "kernel/vserver/Kconfig"
531 source "security/Kconfig"
533 source "crypto/Kconfig"
534 diff -urNp -x '*.orig' linux-4.4/arch/s390/include/asm/tlb.h linux-4.4/arch/s390/include/asm/tlb.h
535 --- linux-4.4/arch/s390/include/asm/tlb.h 2016-01-11 00:01:32.000000000 +0100
536 +++ linux-4.4/arch/s390/include/asm/tlb.h 2021-02-24 16:56:24.539487958 +0100
538 #include <linux/mm.h>
539 #include <linux/pagemap.h>
540 #include <linux/swap.h>
542 #include <asm/processor.h>
543 #include <asm/pgalloc.h>
544 #include <asm/tlbflush.h>
545 diff -urNp -x '*.orig' linux-4.4/arch/s390/include/uapi/asm/unistd.h linux-4.4/arch/s390/include/uapi/asm/unistd.h
546 --- linux-4.4/arch/s390/include/uapi/asm/unistd.h 2016-01-11 00:01:32.000000000 +0100
547 +++ linux-4.4/arch/s390/include/uapi/asm/unistd.h 2021-02-24 16:56:24.539487958 +0100
549 #define __NR_clock_gettime 260
550 #define __NR_clock_getres 261
551 #define __NR_clock_nanosleep 262
552 -/* Number 263 is reserved for vserver */
553 +#define __NR_vserver 263
554 #define __NR_statfs64 265
555 #define __NR_fstatfs64 266
556 #define __NR_remap_file_pages 267
557 diff -urNp -x '*.orig' linux-4.4/arch/s390/kernel/ptrace.c linux-4.4/arch/s390/kernel/ptrace.c
558 --- linux-4.4/arch/s390/kernel/ptrace.c 2021-02-24 16:56:10.475711720 +0100
559 +++ linux-4.4/arch/s390/kernel/ptrace.c 2021-02-24 16:56:24.539487958 +0100
561 #include <linux/tracehook.h>
562 #include <linux/seccomp.h>
563 #include <linux/compat.h>
564 +#include <linux/vs_base.h>
565 #include <trace/syscall.h>
566 #include <asm/segment.h>
567 #include <asm/page.h>
568 diff -urNp -x '*.orig' linux-4.4/arch/s390/kernel/syscalls.S linux-4.4/arch/s390/kernel/syscalls.S
569 --- linux-4.4/arch/s390/kernel/syscalls.S 2021-02-24 16:56:10.475711720 +0100
570 +++ linux-4.4/arch/s390/kernel/syscalls.S 2021-02-24 16:56:24.539487958 +0100
571 @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,compat_sys_clo
572 SYSCALL(sys_clock_gettime,compat_sys_clock_gettime) /* 260 */
573 SYSCALL(sys_clock_getres,compat_sys_clock_getres)
574 SYSCALL(sys_clock_nanosleep,compat_sys_clock_nanosleep)
575 -NI_SYSCALL /* reserved for vserver */
576 +SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
577 SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
578 SYSCALL(sys_statfs64,compat_sys_statfs64)
579 SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
580 diff -urNp -x '*.orig' linux-4.4/arch/sh/Kconfig linux-4.4/arch/sh/Kconfig
581 --- linux-4.4/arch/sh/Kconfig 2016-01-11 00:01:32.000000000 +0100
582 +++ linux-4.4/arch/sh/Kconfig 2021-02-24 16:56:24.542821396 +0100
583 @@ -883,6 +883,8 @@ source "fs/Kconfig"
585 source "arch/sh/Kconfig.debug"
587 +source "kernel/vserver/Kconfig"
589 source "security/Kconfig"
591 source "crypto/Kconfig"
592 diff -urNp -x '*.orig' linux-4.4/arch/sh/kernel/irq.c linux-4.4/arch/sh/kernel/irq.c
593 --- linux-4.4/arch/sh/kernel/irq.c 2016-01-11 00:01:32.000000000 +0100
594 +++ linux-4.4/arch/sh/kernel/irq.c 2021-02-24 16:56:24.542821396 +0100
596 #include <linux/ftrace.h>
597 #include <linux/delay.h>
598 #include <linux/ratelimit.h>
599 +// #include <linux/vs_context.h>
600 #include <asm/processor.h>
601 #include <asm/machvec.h>
602 #include <asm/uaccess.h>
603 diff -urNp -x '*.orig' linux-4.4/arch/sparc/Kconfig linux-4.4/arch/sparc/Kconfig
604 --- linux-4.4/arch/sparc/Kconfig 2021-02-24 16:56:10.485712035 +0100
605 +++ linux-4.4/arch/sparc/Kconfig 2021-02-24 16:56:24.542821396 +0100
606 @@ -561,6 +561,8 @@ source "fs/Kconfig"
608 source "arch/sparc/Kconfig.debug"
610 +source "kernel/vserver/Kconfig"
612 source "security/Kconfig"
614 source "crypto/Kconfig"
615 diff -urNp -x '*.orig' linux-4.4/arch/sparc/include/uapi/asm/unistd.h linux-4.4/arch/sparc/include/uapi/asm/unistd.h
616 --- linux-4.4/arch/sparc/include/uapi/asm/unistd.h 2016-01-11 00:01:32.000000000 +0100
617 +++ linux-4.4/arch/sparc/include/uapi/asm/unistd.h 2021-02-24 16:56:24.542821396 +0100
619 #define __NR_timer_getoverrun 264
620 #define __NR_timer_delete 265
621 #define __NR_timer_create 266
622 -/* #define __NR_vserver 267 Reserved for VSERVER */
623 +#define __NR_vserver 267
624 #define __NR_io_setup 268
625 #define __NR_io_destroy 269
626 #define __NR_io_submit 270
627 diff -urNp -x '*.orig' linux-4.4/arch/sparc/kernel/systbls_32.S linux-4.4/arch/sparc/kernel/systbls_32.S
628 --- linux-4.4/arch/sparc/kernel/systbls_32.S 2016-01-11 00:01:32.000000000 +0100
629 +++ linux-4.4/arch/sparc/kernel/systbls_32.S 2021-02-24 16:56:24.542821396 +0100
630 @@ -70,7 +70,7 @@ sys_call_table:
631 /*250*/ .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
632 /*255*/ .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
633 /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
634 -/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
635 +/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
636 /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
637 /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
638 /*280*/ .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
639 diff -urNp -x '*.orig' linux-4.4/arch/sparc/kernel/systbls_64.S linux-4.4/arch/sparc/kernel/systbls_64.S
640 --- linux-4.4/arch/sparc/kernel/systbls_64.S 2016-01-11 00:01:32.000000000 +0100
641 +++ linux-4.4/arch/sparc/kernel/systbls_64.S 2021-02-24 16:56:24.542821396 +0100
642 @@ -71,7 +71,7 @@ sys_call_table32:
643 /*250*/ .word sys_mremap, compat_sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
644 .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
645 /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
646 - .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
647 + .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
648 /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
649 .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
650 /*280*/ .word sys_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat
651 @@ -152,7 +152,7 @@ sys_call_table:
652 /*250*/ .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
653 .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
654 /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
655 - .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
656 + .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
657 /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
658 .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
659 /*280*/ .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
660 diff -urNp -x '*.orig' linux-4.4/arch/um/Kconfig.rest linux-4.4/arch/um/Kconfig.rest
661 --- linux-4.4/arch/um/Kconfig.rest 2016-01-11 00:01:32.000000000 +0100
662 +++ linux-4.4/arch/um/Kconfig.rest 2021-02-24 16:56:24.542821396 +0100
663 @@ -12,6 +12,8 @@ source "arch/um/Kconfig.net"
667 +source "kernel/vserver/Kconfig"
669 source "security/Kconfig"
671 source "crypto/Kconfig"
672 diff -urNp -x '*.orig' linux-4.4/arch/x86/Kconfig linux-4.4/arch/x86/Kconfig
673 --- linux-4.4/arch/x86/Kconfig 2021-02-24 16:56:10.502379227 +0100
674 +++ linux-4.4/arch/x86/Kconfig 2021-02-24 16:56:24.542821396 +0100
675 @@ -2724,6 +2724,8 @@ source "fs/Kconfig"
677 source "arch/x86/Kconfig.debug"
679 +source "kernel/vserver/Kconfig"
681 source "security/Kconfig"
683 source "crypto/Kconfig"
684 diff -urNp -x '*.orig' linux-4.4/arch/x86/entry/syscalls/syscall_32.tbl linux-4.4/arch/x86/entry/syscalls/syscall_32.tbl
685 --- linux-4.4/arch/x86/entry/syscalls/syscall_32.tbl 2021-02-24 16:56:10.509046103 +0100
686 +++ linux-4.4/arch/x86/entry/syscalls/syscall_32.tbl 2021-02-24 16:56:24.542821396 +0100
688 270 i386 tgkill sys_tgkill
689 271 i386 utimes sys_utimes compat_sys_utimes
690 272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
692 +273 i386 vserver sys_vserver sys32_vserver
693 274 i386 mbind sys_mbind
694 275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
695 276 i386 set_mempolicy sys_set_mempolicy
696 diff -urNp -x '*.orig' linux-4.4/arch/x86/entry/syscalls/syscall_64.tbl linux-4.4/arch/x86/entry/syscalls/syscall_64.tbl
697 --- linux-4.4/arch/x86/entry/syscalls/syscall_64.tbl 2016-01-11 00:01:32.000000000 +0100
698 +++ linux-4.4/arch/x86/entry/syscalls/syscall_64.tbl 2021-02-24 16:56:24.542821396 +0100
700 233 common epoll_ctl sys_epoll_ctl
701 234 common tgkill sys_tgkill
702 235 common utimes sys_utimes
704 +236 64 vserver sys_vserver
705 237 common mbind sys_mbind
706 238 common set_mempolicy sys_set_mempolicy
707 239 common get_mempolicy sys_get_mempolicy
708 diff -urNp -x '*.orig' linux-4.4/block/ioprio.c linux-4.4/block/ioprio.c
709 --- linux-4.4/block/ioprio.c 2021-02-24 16:56:10.582381746 +0100
710 +++ linux-4.4/block/ioprio.c 2021-02-24 16:56:24.542821396 +0100
712 #include <linux/syscalls.h>
713 #include <linux/security.h>
714 #include <linux/pid_namespace.h>
715 +#include <linux/vs_base.h>
717 int set_task_ioprio(struct task_struct *task, int ioprio)
719 @@ -105,6 +106,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which,
721 pgrp = find_vpid(who);
722 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
723 + if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
725 ret = set_task_ioprio(p, ioprio);
728 @@ -203,6 +206,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which,
730 pgrp = find_vpid(who);
731 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
732 + if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
734 tmpio = get_task_ioprio(p);
737 diff -urNp -x '*.orig' linux-4.4/drivers/block/Kconfig linux-4.4/drivers/block/Kconfig
738 --- linux-4.4/drivers/block/Kconfig 2021-02-24 16:56:10.629049882 +0100
739 +++ linux-4.4/drivers/block/Kconfig 2021-02-24 16:56:24.542821396 +0100
740 @@ -283,6 +283,13 @@ config BLK_DEV_CRYPTOLOOP
742 source "drivers/block/drbd/Kconfig"
744 +config BLK_DEV_VROOT
745 + tristate "Virtual Root device support"
746 + depends on QUOTACTL
748 + Saying Y here will allow you to use quota/fs ioctls on a shared
749 + partition within a virtual server without compromising security.
752 tristate "Network block device support"
754 diff -urNp -x '*.orig' linux-4.4/drivers/block/Makefile linux-4.4/drivers/block/Makefile
755 --- linux-4.4/drivers/block/Makefile 2016-01-11 00:01:32.000000000 +0100
756 +++ linux-4.4/drivers/block/Makefile 2021-02-24 16:56:24.542821396 +0100
757 @@ -32,6 +32,7 @@ obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
759 obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
760 obj-$(CONFIG_BLK_DEV_HD) += hd.o
761 +obj-$(CONFIG_BLK_DEV_VROOT) += vroot.o
763 obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
764 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
765 diff -urNp -x '*.orig' linux-4.4/drivers/block/loop.c linux-4.4/drivers/block/loop.c
766 --- linux-4.4/drivers/block/loop.c 2021-02-24 16:56:10.632383321 +0100
767 +++ linux-4.4/drivers/block/loop.c 2021-02-24 16:56:24.542821396 +0100
769 #include <linux/miscdevice.h>
770 #include <linux/falloc.h>
771 #include <linux/uio.h>
772 +#include <linux/vs_context.h>
775 #include <asm/uaccess.h>
776 @@ -947,6 +948,7 @@ static int loop_set_fd(struct loop_devic
777 lo->lo_blocksize = lo_blocksize;
778 lo->lo_device = bdev;
779 lo->lo_flags = lo_flags;
780 + lo->lo_xid = vx_current_xid();
781 lo->lo_backing_file = file;
784 @@ -1067,6 +1069,7 @@ static int loop_clr_fd(struct loop_devic
786 lo->lo_sizelimit = 0;
787 lo->lo_encrypt_key_size = 0;
789 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
790 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
791 memset(lo->lo_file_name, 0, LO_NAME_SIZE);
792 @@ -1113,7 +1116,7 @@ loop_set_status(struct loop_device *lo,
794 if (lo->lo_encrypt_key_size &&
795 !uid_eq(lo->lo_key_owner, uid) &&
796 - !capable(CAP_SYS_ADMIN))
797 + !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP))
799 if (lo->lo_state != Lo_bound)
801 @@ -1218,7 +1221,8 @@ loop_get_status(struct loop_device *lo,
802 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
803 info->lo_encrypt_type =
804 lo->lo_encryption ? lo->lo_encryption->number : 0;
805 - if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
806 + if (lo->lo_encrypt_key_size &&
807 + vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) {
808 info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
809 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
810 lo->lo_encrypt_key_size);
811 @@ -1580,6 +1584,11 @@ static int lo_open(struct block_device *
815 + if (!vx_check(lo->lo_xid, VS_IDENT|VS_HOSTID|VS_ADMIN_P)) {
820 atomic_inc(&lo->lo_refcnt);
822 mutex_unlock(&loop_index_mutex);
823 diff -urNp -x '*.orig' linux-4.4/drivers/block/loop.h linux-4.4/drivers/block/loop.h
824 --- linux-4.4/drivers/block/loop.h 2021-02-24 16:56:10.632383321 +0100
825 +++ linux-4.4/drivers/block/loop.h 2021-02-24 16:56:24.542821396 +0100
826 @@ -43,6 +43,7 @@ struct loop_device {
827 struct loop_func_table *lo_encryption;
829 kuid_t lo_key_owner; /* Who set the key */
831 int (*ioctl)(struct loop_device *, int cmd,
834 diff -urNp -x '*.orig' linux-4.4/drivers/block/vroot.c linux-4.4/drivers/block/vroot.c
835 --- linux-4.4/drivers/block/vroot.c 1970-01-01 01:00:00.000000000 +0100
836 +++ linux-4.4/drivers/block/vroot.c 2021-02-24 16:56:24.542821396 +0100
839 + * linux/drivers/block/vroot.c
841 + * written by Herbert P?tzl, 9/11/2002
842 + * ported to 2.6.10 by Herbert P?tzl, 30/12/2004
844 + * based on the loop.c code by Theodore Ts'o.
846 + * Copyright (C) 2002-2007 by Herbert P?tzl.
847 + * Redistribution of this file is permitted under the
848 + * GNU General Public License.
852 +#include <linux/module.h>
853 +#include <linux/moduleparam.h>
854 +#include <linux/file.h>
855 +#include <linux/major.h>
856 +#include <linux/blkdev.h>
857 +#include <linux/slab.h>
859 +#include <linux/vroot.h>
860 +#include <linux/vs_context.h>
863 +static int max_vroot = 8;
865 +static struct vroot_device *vroot_dev;
866 +static struct gendisk **disks;
869 +static int vroot_set_dev(
870 + struct vroot_device *vr,
871 + struct block_device *bdev,
874 + struct block_device *real_bdev;
876 + struct inode *inode;
880 + if (vr->vr_state != Vr_unbound)
889 + inode = file->f_path.dentry->d_inode;
892 + if (S_ISBLK(inode->i_mode)) {
893 + real_bdev = inode->i_bdev;
894 + vr->vr_device = real_bdev;
895 + __iget(real_bdev->bd_inode);
899 + vxdprintk(VXD_CBIT(misc, 0),
900 + "vroot[%d]_set_dev: dev=" VXF_DEV,
901 + vr->vr_number, VXD_DEV(real_bdev));
903 + vr->vr_state = Vr_bound;
912 +static int vroot_clr_dev(
913 + struct vroot_device *vr,
914 + struct block_device *bdev)
916 + struct block_device *real_bdev;
918 + if (vr->vr_state != Vr_bound)
920 + if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */
923 + real_bdev = vr->vr_device;
925 + vxdprintk(VXD_CBIT(misc, 0),
926 + "vroot[%d]_clr_dev: dev=" VXF_DEV,
927 + vr->vr_number, VXD_DEV(real_bdev));
930 + vr->vr_state = Vr_unbound;
931 + vr->vr_device = NULL;
936 +static int vr_ioctl(struct block_device *bdev, fmode_t mode,
937 + unsigned int cmd, unsigned long arg)
939 + struct vroot_device *vr = bdev->bd_disk->private_data;
942 + down(&vr->vr_ctl_mutex);
944 + case VROOT_SET_DEV:
945 + err = vroot_set_dev(vr, bdev, arg);
947 + case VROOT_CLR_DEV:
948 + err = vroot_clr_dev(vr, bdev);
954 + up(&vr->vr_ctl_mutex);
958 +static int vr_open(struct block_device *bdev, fmode_t mode)
960 + struct vroot_device *vr = bdev->bd_disk->private_data;
962 + down(&vr->vr_ctl_mutex);
964 + up(&vr->vr_ctl_mutex);
968 +static void vr_release(struct gendisk *disk, fmode_t mode)
970 + struct vroot_device *vr = disk->private_data;
972 + down(&vr->vr_ctl_mutex);
974 + up(&vr->vr_ctl_mutex);
977 +static struct block_device_operations vr_fops = {
978 + .owner = THIS_MODULE,
980 + .release = vr_release,
984 +static blk_qc_t vroot_make_request(struct request_queue *q, struct bio *bio)
986 + printk("vroot_make_request %p, %p\n", q, bio);
988 + return BLK_QC_T_NONE;
991 +struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
993 + struct inode *inode = bdev->bd_inode;
994 + struct vroot_device *vr;
995 + struct block_device *real_bdev;
996 + int minor = iminor(inode);
998 + vr = &vroot_dev[minor];
999 + real_bdev = vr->vr_device;
1001 + vxdprintk(VXD_CBIT(misc, 0),
1002 + "vroot[%d]_get_real_bdev: dev=" VXF_DEV,
1003 + vr->vr_number, VXD_DEV(real_bdev));
1005 + if (vr->vr_state != Vr_bound)
1006 + return ERR_PTR(-ENXIO);
1008 + __iget(real_bdev->bd_inode);
1015 + * And now the modules code and kernel interface.
1018 +module_param(max_vroot, int, 0);
1020 +MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
1021 +MODULE_LICENSE("GPL");
1022 +MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
1024 +MODULE_AUTHOR ("Herbert P?tzl");
1025 +MODULE_DESCRIPTION ("Virtual Root Device Mapper");
1028 +int __init vroot_init(void)
1032 + if (max_vroot < 1 || max_vroot > 256) {
1033 + max_vroot = MAX_VROOT_DEFAULT;
1034 + printk(KERN_WARNING "vroot: invalid max_vroot "
1035 + "(must be between 1 and 256), "
1036 + "using default (%d)\n", max_vroot);
1039 + if (register_blkdev(VROOT_MAJOR, "vroot"))
1043 + vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
1046 + memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
1048 + disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
1052 + for (i = 0; i < max_vroot; i++) {
1053 + disks[i] = alloc_disk(1);
1056 + disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
1057 + if (!disks[i]->queue)
1059 + blk_queue_make_request(disks[i]->queue, vroot_make_request);
1062 + for (i = 0; i < max_vroot; i++) {
1063 + struct vroot_device *vr = &vroot_dev[i];
1064 + struct gendisk *disk = disks[i];
1066 + memset(vr, 0, sizeof(*vr));
1067 + sema_init(&vr->vr_ctl_mutex, 1);
1068 + vr->vr_number = i;
1069 + disk->major = VROOT_MAJOR;
1070 + disk->first_minor = i;
1071 + disk->fops = &vr_fops;
1072 + sprintf(disk->disk_name, "vroot%d", i);
1073 + disk->private_data = vr;
1076 + err = register_vroot_grb(&__vroot_get_real_bdev);
1080 + for (i = 0; i < max_vroot; i++)
1081 + add_disk(disks[i]);
1082 + printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
1087 + put_disk(disks[i]);
1092 + unregister_blkdev(VROOT_MAJOR, "vroot");
1093 + printk(KERN_ERR "vroot: ran out of memory\n");
1097 +void vroot_exit(void)
1101 + if (unregister_vroot_grb(&__vroot_get_real_bdev))
1102 + printk(KERN_WARNING "vroot: cannot unregister grb\n");
1104 + for (i = 0; i < max_vroot; i++) {
1105 + del_gendisk(disks[i]);
1106 + put_disk(disks[i]);
1108 + unregister_blkdev(VROOT_MAJOR, "vroot");
1114 +module_init(vroot_init);
1115 +module_exit(vroot_exit);
1119 +static int __init max_vroot_setup(char *str)
1121 + max_vroot = simple_strtol(str, NULL, 0);
1125 +__setup("max_vroot=", max_vroot_setup);
1129 diff -urNp -x '*.orig' linux-4.4/drivers/infiniband/core/addr.c linux-4.4/drivers/infiniband/core/addr.c
1130 --- linux-4.4/drivers/infiniband/core/addr.c 2021-02-24 16:56:10.869057440 +0100
1131 +++ linux-4.4/drivers/infiniband/core/addr.c 2021-02-24 16:56:24.546154835 +0100
1132 @@ -299,7 +299,7 @@ static int addr6_resolve(struct sockaddr
1134 if (ipv6_addr_any(&fl6.saddr)) {
1135 ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
1136 - &fl6.daddr, 0, &fl6.saddr);
1137 + &fl6.daddr, 0, &fl6.saddr, NULL);
1141 diff -urNp -x '*.orig' linux-4.4/drivers/md/dm-ioctl.c linux-4.4/drivers/md/dm-ioctl.c
1142 --- linux-4.4/drivers/md/dm-ioctl.c 2021-02-24 16:56:10.939059645 +0100
1143 +++ linux-4.4/drivers/md/dm-ioctl.c 2021-02-24 16:56:24.546154835 +0100
1145 #include <linux/dm-ioctl.h>
1146 #include <linux/hdreg.h>
1147 #include <linux/compat.h>
1148 +#include <linux/vs_context.h>
1150 #include <asm/uaccess.h>
1152 @@ -114,7 +115,8 @@ static struct hash_cell *__get_name_cell
1153 unsigned int h = hash_str(str);
1155 list_for_each_entry (hc, _name_buckets + h, name_list)
1156 - if (!strcmp(hc->name, str)) {
1157 + if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1158 + !strcmp(hc->name, str)) {
1162 @@ -128,7 +130,8 @@ static struct hash_cell *__get_uuid_cell
1163 unsigned int h = hash_str(str);
1165 list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
1166 - if (!strcmp(hc->uuid, str)) {
1167 + if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1168 + !strcmp(hc->uuid, str)) {
1172 @@ -139,13 +142,15 @@ static struct hash_cell *__get_uuid_cell
1173 static struct hash_cell *__get_dev_cell(uint64_t dev)
1175 struct mapped_device *md;
1176 - struct hash_cell *hc;
1177 + struct hash_cell *hc = NULL;
1179 md = dm_get_md(huge_decode_dev(dev));
1183 - hc = dm_get_mdptr(md);
1184 + if (vx_check(dm_get_xid(md), VS_WATCH_P | VS_IDENT))
1185 + hc = dm_get_mdptr(md);
1190 @@ -467,6 +472,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl
1192 static int remove_all(struct dm_ioctl *param, size_t param_size)
1194 + if (!vx_check(0, VS_ADMIN))
1197 dm_hash_remove_all(true, !!(param->flags & DM_DEFERRED_REMOVE), false);
1198 param->data_size = 0;
1200 @@ -514,6 +522,8 @@ static int list_devices(struct dm_ioctl
1202 for (i = 0; i < NUM_BUCKETS; i++) {
1203 list_for_each_entry (hc, _name_buckets + i, name_list) {
1204 + if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1206 needed += sizeof(struct dm_name_list);
1207 needed += strlen(hc->name) + 1;
1208 needed += ALIGN_MASK;
1209 @@ -537,6 +547,8 @@ static int list_devices(struct dm_ioctl
1211 for (i = 0; i < NUM_BUCKETS; i++) {
1212 list_for_each_entry (hc, _name_buckets + i, name_list) {
1213 + if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1216 old_nl->next = (uint32_t) ((void *) nl -
1218 @@ -1796,8 +1808,8 @@ static int ctl_ioctl(uint command, struc
1219 size_t input_param_size;
1220 struct dm_ioctl param_kernel;
1222 - /* only root can play with this */
1223 - if (!capable(CAP_SYS_ADMIN))
1224 + /* only root and certain contexts can play with this */
1225 + if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER))
1228 if (_IOC_TYPE(command) != DM_IOCTL)
1229 diff -urNp -x '*.orig' linux-4.4/drivers/md/dm.c linux-4.4/drivers/md/dm.c
1230 --- linux-4.4/drivers/md/dm.c 2021-02-24 16:56:10.945726521 +0100
1231 +++ linux-4.4/drivers/md/dm.c 2021-02-24 16:56:24.546154835 +0100
1233 #include <linux/elevator.h> /* for rq_end_sector() */
1234 #include <linux/blk-mq.h>
1235 #include <linux/pr.h>
1236 +#include <linux/vs_base.h>
1238 #include <trace/events/block.h>
1240 @@ -144,6 +145,7 @@ struct mapped_device {
1241 struct mutex suspend_lock;
1243 atomic_t open_count;
1247 * The current mapping.
1248 @@ -445,6 +447,7 @@ int dm_deleting_md(struct mapped_device
1249 static int dm_blk_open(struct block_device *bdev, fmode_t mode)
1251 struct mapped_device *md;
1254 spin_lock(&_minor_lock);
1256 @@ -453,17 +456,19 @@ static int dm_blk_open(struct block_devi
1259 if (test_bit(DMF_FREEING, &md->flags) ||
1260 - dm_deleting_md(md)) {
1262 + dm_deleting_md(md))
1266 + if (!vx_check(md->xid, VS_IDENT|VS_HOSTID))
1271 atomic_inc(&md->open_count);
1274 spin_unlock(&_minor_lock);
1276 - return md ? 0 : -ENXIO;
1280 static void dm_blk_close(struct gendisk *disk, fmode_t mode)
1281 @@ -909,6 +914,14 @@ int dm_set_geometry(struct mapped_device
1286 + * Get the xid associated with a dm device
1288 +vxid_t dm_get_xid(struct mapped_device *md)
1293 /*-----------------------------------------------------------------
1295 * A more elegant soln is in the works that uses the queue
1296 @@ -2381,6 +2394,7 @@ static struct mapped_device *alloc_dev(i
1297 INIT_LIST_HEAD(&md->table_devices);
1298 spin_lock_init(&md->uevent_lock);
1300 + md->xid = vx_current_xid();
1301 md->queue = blk_alloc_queue(GFP_KERNEL);
1304 diff -urNp -x '*.orig' linux-4.4/drivers/md/dm.h linux-4.4/drivers/md/dm.h
1305 --- linux-4.4/drivers/md/dm.h 2016-01-11 00:01:32.000000000 +0100
1306 +++ linux-4.4/drivers/md/dm.h 2021-02-24 16:56:24.546154835 +0100
1307 @@ -52,6 +52,8 @@ struct dm_dev_internal {
1309 struct dm_md_mempools;
1311 +vxid_t dm_get_xid(struct mapped_device *md);
1313 /*-----------------------------------------------------------------
1314 * Internal table functions.
1315 *---------------------------------------------------------------*/
1316 diff -urNp -x '*.orig' linux-4.4/drivers/net/tun.c linux-4.4/drivers/net/tun.c
1317 --- linux-4.4/drivers/net/tun.c 2021-02-24 16:56:11.222401901 +0100
1318 +++ linux-4.4/drivers/net/tun.c 2021-02-24 16:56:24.546154835 +0100
1320 #include <linux/nsproxy.h>
1321 #include <linux/virtio_net.h>
1322 #include <linux/rcupdate.h>
1323 +#include <linux/vs_network.h>
1324 #include <net/net_namespace.h>
1325 #include <net/netns/generic.h>
1326 #include <net/rtnetlink.h>
1327 @@ -181,6 +182,7 @@ struct tun_struct {
1333 struct net_device *dev;
1334 netdev_features_t set_features;
1335 @@ -475,6 +477,7 @@ static inline bool tun_not_capable(struc
1336 return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
1337 (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
1338 !ns_capable(net->user_ns, CAP_NET_ADMIN);
1339 + /* !cap_raised(current_cap(), CAP_NET_ADMIN) */
1342 static void tun_set_real_num_queues(struct tun_struct *tun)
1343 @@ -1465,6 +1468,7 @@ static void tun_setup(struct net_device
1345 tun->owner = INVALID_UID;
1346 tun->group = INVALID_GID;
1347 + tun->nid = nx_current_nid();
1349 dev->ethtool_ops = &tun_ethtool_ops;
1350 dev->destructor = tun_free_netdev;
1351 @@ -1661,7 +1665,7 @@ static int tun_set_iff(struct net *net,
1352 int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
1355 - if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1356 + if (!nx_ns_capable(net->user_ns, CAP_NET_ADMIN, NXC_TUN_CREATE))
1358 err = security_tun_dev_create();
1360 @@ -2018,6 +2022,16 @@ static long __tun_chr_ioctl(struct file
1361 from_kgid(&init_user_ns, tun->group));
1365 + if (!capable(CAP_CONTEXT))
1368 + /* Set nid owner of the device */
1369 + tun->nid = (vnid_t) arg;
1371 + tun_debug(KERN_INFO, tun, "nid owner set to %u\n", tun->nid);
1375 /* Only allow setting the type when the interface is down */
1376 if (tun->dev->flags & IFF_UP) {
1377 diff -urNp -x '*.orig' linux-4.4/drivers/scsi/cxgbi/libcxgbi.c linux-4.4/drivers/scsi/cxgbi/libcxgbi.c
1378 --- linux-4.4/drivers/scsi/cxgbi/libcxgbi.c 2021-02-24 16:56:11.419074761 +0100
1379 +++ linux-4.4/drivers/scsi/cxgbi/libcxgbi.c 2021-02-24 16:56:24.546154835 +0100
1380 @@ -773,7 +773,8 @@ static struct cxgbi_sock *cxgbi_check_ro
1381 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
1383 err = ipv6_dev_get_saddr(&init_net, idev ? idev->dev : NULL,
1384 - &daddr6->sin6_addr, 0, &pref_saddr);
1385 + &daddr6->sin6_addr, 0, &pref_saddr,
1388 pr_info("failed to get source address to reach %pI6\n",
1389 &daddr6->sin6_addr);
1390 diff -urNp -x '*.orig' linux-4.4/drivers/tty/sysrq.c linux-4.4/drivers/tty/sysrq.c
1391 --- linux-4.4/drivers/tty/sysrq.c 2021-02-24 16:56:11.559079170 +0100
1392 +++ linux-4.4/drivers/tty/sysrq.c 2021-02-24 16:56:24.546154835 +0100
1394 #include <linux/syscalls.h>
1395 #include <linux/of.h>
1396 #include <linux/rcupdate.h>
1397 +#include <linux/vserver/debug.h>
1399 #include <asm/ptrace.h>
1400 #include <asm/irq_regs.h>
1401 @@ -427,6 +428,21 @@ static struct sysrq_key_op sysrq_unrt_op
1402 .enable_mask = SYSRQ_ENABLE_RTNICE,
1406 +#ifdef CONFIG_VSERVER_DEBUG
1407 +static void sysrq_handle_vxinfo(int key)
1409 + dump_vx_info_inactive((key == 'x') ? 0 : 1);
1412 +static struct sysrq_key_op sysrq_showvxinfo_op = {
1413 + .handler = sysrq_handle_vxinfo,
1414 + .help_msg = "conteXt",
1415 + .action_msg = "Show Context Info",
1416 + .enable_mask = SYSRQ_ENABLE_DUMP,
1420 /* Key Operations table and lock */
1421 static DEFINE_SPINLOCK(sysrq_key_table_lock);
1423 @@ -483,7 +499,11 @@ static struct sysrq_key_op *sysrq_key_ta
1424 /* x: May be registered on mips for TLB dump */
1425 /* x: May be registered on ppc/powerpc for xmon */
1426 /* x: May be registered on sparc64 for global PMU dump */
1427 +#ifdef CONFIG_VSERVER_DEBUG
1428 + &sysrq_showvxinfo_op, /* x */
1432 /* y: May be registered on sparc64 for global register dump */
1434 &sysrq_ftrace_dump_op, /* z */
1435 @@ -498,6 +518,8 @@ static int sysrq_key_table_key2index(int
1437 else if ((key >= 'a') && (key <= 'z'))
1438 retval = key + 10 - 'a';
1439 + else if ((key >= 'A') && (key <= 'Z'))
1440 + retval = key + 10 - 'A';
1444 diff -urNp -x '*.orig' linux-4.4/drivers/tty/tty_io.c linux-4.4/drivers/tty/tty_io.c
1445 --- linux-4.4/drivers/tty/tty_io.c 2021-02-24 16:56:11.559079170 +0100
1446 +++ linux-4.4/drivers/tty/tty_io.c 2021-02-24 16:56:24.546154835 +0100
1449 #include <linux/kmod.h>
1450 #include <linux/nsproxy.h>
1451 +#include <linux/vs_pid.h>
1453 #undef TTY_DEBUG_HANGUP
1454 #ifdef TTY_DEBUG_HANGUP
1455 @@ -2296,7 +2297,8 @@ static int tiocsti(struct tty_struct *tt
1457 struct tty_ldisc *ld;
1459 - if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
1460 + if (((current->signal->tty != tty) &&
1461 + !vx_capable(CAP_SYS_ADMIN, VXC_TIOCSTI)))
1463 if (get_user(ch, p))
1465 @@ -2607,6 +2609,7 @@ static int tiocspgrp(struct tty_struct *
1467 if (get_user(pgrp_nr, p))
1469 + pgrp_nr = vx_rmap_pid(pgrp_nr);
1473 diff -urNp -x '*.orig' linux-4.4/fs/attr.c linux-4.4/fs/attr.c
1474 --- linux-4.4/fs/attr.c 2021-02-24 16:56:11.675749511 +0100
1475 +++ linux-4.4/fs/attr.c 2021-02-24 16:56:24.549488273 +0100
1477 #include <linux/security.h>
1478 #include <linux/evm.h>
1479 #include <linux/ima.h>
1480 +#include <linux/proc_fs.h>
1481 +#include <linux/devpts_fs.h>
1482 +#include <linux/vs_tag.h>
1485 * inode_change_ok - check if attribute changes to an inode are allowed
1486 @@ -77,6 +80,10 @@ int inode_change_ok(const struct inode *
1490 + /* check for inode tag permission */
1491 + if (dx_permission(inode, MAY_WRITE))
1496 EXPORT_SYMBOL(inode_change_ok);
1497 @@ -147,6 +154,8 @@ void setattr_copy(struct inode *inode, c
1498 inode->i_uid = attr->ia_uid;
1499 if (ia_valid & ATTR_GID)
1500 inode->i_gid = attr->ia_gid;
1501 + if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
1502 + inode->i_tag = attr->ia_tag;
1503 if (ia_valid & ATTR_ATIME)
1504 inode->i_atime = timespec_trunc(attr->ia_atime,
1505 inode->i_sb->s_time_gran);
1506 @@ -197,7 +206,8 @@ int notify_change(struct dentry * dentry
1508 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
1510 - if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
1511 + if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
1512 + ATTR_TAG | ATTR_TIMES_SET)) {
1513 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1516 diff -urNp -x '*.orig' linux-4.4/fs/block_dev.c linux-4.4/fs/block_dev.c
1517 --- linux-4.4/fs/block_dev.c 2021-02-24 16:56:11.679082949 +0100
1518 +++ linux-4.4/fs/block_dev.c 2021-02-24 16:56:24.549488273 +0100
1520 #include <linux/log2.h>
1521 #include <linux/cleancache.h>
1522 #include <linux/dax.h>
1523 +#include <linux/vs_device.h>
1524 #include <asm/uaccess.h>
1525 #include "internal.h"
1527 @@ -645,6 +646,7 @@ struct block_device *bdget(dev_t dev)
1528 bdev->bd_invalidated = 0;
1529 inode->i_mode = S_IFBLK;
1530 inode->i_rdev = dev;
1531 + inode->i_mdev = dev;
1532 inode->i_bdev = bdev;
1533 inode->i_data.a_ops = &def_blk_aops;
1534 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
1535 @@ -691,6 +693,11 @@ EXPORT_SYMBOL(bdput);
1536 static struct block_device *bd_acquire(struct inode *inode)
1538 struct block_device *bdev;
1541 + if (!vs_map_blkdev(inode->i_rdev, &mdev, DATTR_OPEN))
1543 + inode->i_mdev = mdev;
1545 spin_lock(&bdev_lock);
1546 bdev = inode->i_bdev;
1547 @@ -701,7 +708,7 @@ static struct block_device *bd_acquire(s
1549 spin_unlock(&bdev_lock);
1551 - bdev = bdget(inode->i_rdev);
1552 + bdev = bdget(mdev);
1554 spin_lock(&bdev_lock);
1555 if (!inode->i_bdev) {
1556 diff -urNp -x '*.orig' linux-4.4/fs/btrfs/ctree.h linux-4.4/fs/btrfs/ctree.h
1557 --- linux-4.4/fs/btrfs/ctree.h 2021-02-24 16:56:11.682416387 +0100
1558 +++ linux-4.4/fs/btrfs/ctree.h 2021-02-24 16:56:24.549488273 +0100
1559 @@ -732,11 +732,14 @@ struct btrfs_inode_item {
1560 /* modification sequence number for NFS */
1565 * a little future expansion, for more than this we can
1566 * just grow the inode item and version it
1568 - __le64 reserved[4];
1569 + __le16 reserved16;
1570 + __le32 reserved32;
1571 + __le64 reserved[3];
1572 struct btrfs_timespec atime;
1573 struct btrfs_timespec ctime;
1574 struct btrfs_timespec mtime;
1575 @@ -2189,6 +2192,8 @@ struct btrfs_ioctl_defrag_range_args {
1576 #define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
1577 #define BTRFS_DEFAULT_MAX_INLINE (8192)
1579 +#define BTRFS_MOUNT_TAGGED (1 << 24)
1581 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1582 #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
1583 #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
1584 @@ -2531,6 +2536,7 @@ BTRFS_SETGET_FUNCS(inode_block_group, st
1585 BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
1586 BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
1587 BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
1588 +BTRFS_SETGET_FUNCS(inode_tag, struct btrfs_inode_item, tag, 16);
1589 BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
1590 BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
1591 BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
1592 @@ -2578,6 +2584,10 @@ BTRFS_SETGET_FUNCS(extent_flags, struct
1594 BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
1596 +#define BTRFS_INODE_IXUNLINK (1 << 24)
1597 +#define BTRFS_INODE_BARRIER (1 << 25)
1598 +#define BTRFS_INODE_COW (1 << 26)
1601 BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
1603 @@ -4041,6 +4051,7 @@ long btrfs_ioctl(struct file *file, unsi
1604 void btrfs_update_iflags(struct inode *inode);
1605 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
1606 int btrfs_is_empty_uuid(u8 *uuid);
1607 +int btrfs_sync_flags(struct inode *inode, int, int);
1608 int btrfs_defrag_file(struct inode *inode, struct file *file,
1609 struct btrfs_ioctl_defrag_range_args *range,
1610 u64 newer_than, unsigned long max_pages);
1611 diff -urNp -x '*.orig' linux-4.4/fs/btrfs/disk-io.c linux-4.4/fs/btrfs/disk-io.c
1612 --- linux-4.4/fs/btrfs/disk-io.c 2021-02-24 16:56:11.685749826 +0100
1613 +++ linux-4.4/fs/btrfs/disk-io.c 2021-02-24 16:56:24.549488273 +0100
1614 @@ -2666,6 +2666,9 @@ int open_ctree(struct super_block *sb,
1618 + if (btrfs_test_opt(tree_root, TAGGED))
1619 + sb->s_flags |= MS_TAGGED;
1621 features = btrfs_super_incompat_flags(disk_super) &
1622 ~BTRFS_FEATURE_INCOMPAT_SUPP;
1624 diff -urNp -x '*.orig' linux-4.4/fs/btrfs/inode.c linux-4.4/fs/btrfs/inode.c
1625 --- linux-4.4/fs/btrfs/inode.c 2021-02-24 16:56:11.692416702 +0100
1626 +++ linux-4.4/fs/btrfs/inode.c 2021-02-24 16:56:24.552821711 +0100
1628 #include <linux/blkdev.h>
1629 #include <linux/posix_acl_xattr.h>
1630 #include <linux/uio.h>
1631 +#include <linux/vs_tag.h>
1633 #include "disk-io.h"
1634 #include "transaction.h"
1635 @@ -3666,6 +3667,9 @@ static void btrfs_read_locked_inode(stru
1643 bool filled = false;
1644 int first_xattr_slot;
1645 @@ -3693,8 +3697,14 @@ static void btrfs_read_locked_inode(stru
1646 struct btrfs_inode_item);
1647 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
1648 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
1649 - i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
1650 - i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
1652 + kuid = make_kuid(&init_user_ns, btrfs_inode_uid(leaf, inode_item));
1653 + kgid = make_kgid(&init_user_ns, btrfs_inode_gid(leaf, inode_item));
1654 + ktag = make_ktag(&init_user_ns, btrfs_inode_tag(leaf, inode_item));
1656 + inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
1657 + inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
1658 + inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, ktag);
1659 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
1661 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
1662 @@ -3850,11 +3860,18 @@ static void fill_inode_item(struct btrfs
1663 struct inode *inode)
1665 struct btrfs_map_token token;
1666 + uid_t uid = from_kuid(&init_user_ns,
1667 + TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
1668 + gid_t gid = from_kgid(&init_user_ns,
1669 + TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
1671 btrfs_init_map_token(&token);
1673 - btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
1674 - btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
1675 + btrfs_set_token_inode_uid(leaf, item, uid, &token);
1676 + btrfs_set_token_inode_gid(leaf, item, gid, &token);
1677 +#ifdef CONFIG_TAGGING_INTERN
1678 + btrfs_set_token_inode_tag(leaf, item, i_tag_read(inode), &token);
1680 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
1682 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
1683 @@ -10133,6 +10150,7 @@ static const struct inode_operations btr
1684 .listxattr = btrfs_listxattr,
1685 .removexattr = btrfs_removexattr,
1686 .permission = btrfs_permission,
1687 + .sync_flags = btrfs_sync_flags,
1688 .get_acl = btrfs_get_acl,
1689 .set_acl = btrfs_set_acl,
1690 .update_time = btrfs_update_time,
1691 @@ -10141,6 +10159,7 @@ static const struct inode_operations btr
1692 static const struct inode_operations btrfs_dir_ro_inode_operations = {
1693 .lookup = btrfs_lookup,
1694 .permission = btrfs_permission,
1695 + .sync_flags = btrfs_sync_flags,
1696 .get_acl = btrfs_get_acl,
1697 .set_acl = btrfs_set_acl,
1698 .update_time = btrfs_update_time,
1699 @@ -10211,6 +10230,7 @@ static const struct inode_operations btr
1700 .removexattr = btrfs_removexattr,
1701 .permission = btrfs_permission,
1702 .fiemap = btrfs_fiemap,
1703 + .sync_flags = btrfs_sync_flags,
1704 .get_acl = btrfs_get_acl,
1705 .set_acl = btrfs_set_acl,
1706 .update_time = btrfs_update_time,
1707 diff -urNp -x '*.orig' linux-4.4/fs/btrfs/ioctl.c linux-4.4/fs/btrfs/ioctl.c
1708 --- linux-4.4/fs/btrfs/ioctl.c 2021-02-24 16:56:11.692416702 +0100
1709 +++ linux-4.4/fs/btrfs/ioctl.c 2021-02-24 16:56:24.552821711 +0100
1710 @@ -109,10 +109,13 @@ static unsigned int btrfs_flags_to_ioctl
1712 unsigned int iflags = 0;
1714 - if (flags & BTRFS_INODE_SYNC)
1715 - iflags |= FS_SYNC_FL;
1716 if (flags & BTRFS_INODE_IMMUTABLE)
1717 iflags |= FS_IMMUTABLE_FL;
1718 + if (flags & BTRFS_INODE_IXUNLINK)
1719 + iflags |= FS_IXUNLINK_FL;
1721 + if (flags & BTRFS_INODE_SYNC)
1722 + iflags |= FS_SYNC_FL;
1723 if (flags & BTRFS_INODE_APPEND)
1724 iflags |= FS_APPEND_FL;
1725 if (flags & BTRFS_INODE_NODUMP)
1726 @@ -129,34 +132,84 @@ static unsigned int btrfs_flags_to_ioctl
1727 else if (flags & BTRFS_INODE_NOCOMPRESS)
1728 iflags |= FS_NOCOMP_FL;
1730 + if (flags & BTRFS_INODE_BARRIER)
1731 + iflags |= FS_BARRIER_FL;
1732 + if (flags & BTRFS_INODE_COW)
1733 + iflags |= FS_COW_FL;
1738 - * Update inode->i_flags based on the btrfs internal flags.
1739 + * Update inode->i_(v)flags based on the btrfs internal flags.
1741 void btrfs_update_iflags(struct inode *inode)
1743 struct btrfs_inode *ip = BTRFS_I(inode);
1744 unsigned int new_fl = 0;
1746 - if (ip->flags & BTRFS_INODE_SYNC)
1748 if (ip->flags & BTRFS_INODE_IMMUTABLE)
1749 new_fl |= S_IMMUTABLE;
1750 + if (ip->flags & BTRFS_INODE_IXUNLINK)
1751 + new_fl |= S_IXUNLINK;
1753 + if (ip->flags & BTRFS_INODE_SYNC)
1755 if (ip->flags & BTRFS_INODE_APPEND)
1757 if (ip->flags & BTRFS_INODE_NOATIME)
1758 new_fl |= S_NOATIME;
1759 if (ip->flags & BTRFS_INODE_DIRSYNC)
1760 new_fl |= S_DIRSYNC;
1762 set_mask_bits(&inode->i_flags,
1763 - S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
1764 + S_SYNC | S_APPEND | S_IMMUTABLE | S_IXUNLINK | S_NOATIME | S_DIRSYNC,
1768 + if (ip->flags & BTRFS_INODE_BARRIER)
1769 + new_fl |= V_BARRIER;
1770 + if (ip->flags & BTRFS_INODE_COW)
1773 + set_mask_bits(&inode->i_vflags,
1774 + V_BARRIER | V_COW, new_fl);
1778 + * Update btrfs internal flags from inode->i_(v)flags.
1780 +void btrfs_update_flags(struct inode *inode)
1782 + struct btrfs_inode *ip = BTRFS_I(inode);
1784 + unsigned int flags = inode->i_flags;
1785 + unsigned int vflags = inode->i_vflags;
1787 + ip->flags &= ~(BTRFS_INODE_SYNC | BTRFS_INODE_APPEND |
1788 + BTRFS_INODE_IMMUTABLE | BTRFS_INODE_IXUNLINK |
1789 + BTRFS_INODE_NOATIME | BTRFS_INODE_DIRSYNC |
1790 + BTRFS_INODE_BARRIER | BTRFS_INODE_COW);
1792 + if (flags & S_IMMUTABLE)
1793 + ip->flags |= BTRFS_INODE_IMMUTABLE;
1794 + if (flags & S_IXUNLINK)
1795 + ip->flags |= BTRFS_INODE_IXUNLINK;
1797 + if (flags & S_SYNC)
1798 + ip->flags |= BTRFS_INODE_SYNC;
1799 + if (flags & S_APPEND)
1800 + ip->flags |= BTRFS_INODE_APPEND;
1801 + if (flags & S_NOATIME)
1802 + ip->flags |= BTRFS_INODE_NOATIME;
1803 + if (flags & S_DIRSYNC)
1804 + ip->flags |= BTRFS_INODE_DIRSYNC;
1806 + if (vflags & V_BARRIER)
1807 + ip->flags |= BTRFS_INODE_BARRIER;
1808 + if (vflags & V_COW)
1809 + ip->flags |= BTRFS_INODE_COW;
1813 * Inherit flags from the parent inode.
1815 * Currently only the compression flags and the cow flags are inherited.
1816 @@ -169,6 +222,7 @@ void btrfs_inherit_iflags(struct inode *
1819 flags = BTRFS_I(dir)->flags;
1820 + flags &= ~BTRFS_INODE_BARRIER;
1822 if (flags & BTRFS_INODE_NOCOMPRESS) {
1823 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
1824 @@ -187,6 +241,30 @@ void btrfs_inherit_iflags(struct inode *
1825 btrfs_update_iflags(inode);
1828 +int btrfs_sync_flags(struct inode *inode, int flags, int vflags)
1830 + struct btrfs_inode *ip = BTRFS_I(inode);
1831 + struct btrfs_root *root = ip->root;
1832 + struct btrfs_trans_handle *trans;
1835 + trans = btrfs_join_transaction(root);
1838 + inode->i_flags = flags;
1839 + inode->i_vflags = vflags;
1840 + btrfs_update_flags(inode);
1842 + ret = btrfs_update_inode(trans, root, inode);
1845 + btrfs_update_iflags(inode);
1846 + inode->i_ctime = CURRENT_TIME;
1847 + btrfs_end_transaction(trans, root);
1852 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
1854 struct btrfs_inode *ip = BTRFS_I(file_inode(file));
1855 @@ -249,21 +327,27 @@ static int btrfs_ioctl_setflags(struct f
1857 flags = btrfs_mask_flags(inode->i_mode, flags);
1858 oldflags = btrfs_flags_to_ioctl(ip->flags);
1859 - if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
1860 + if ((flags ^ oldflags) & (FS_APPEND_FL |
1861 + FS_IMMUTABLE_FL | FS_IXUNLINK_FL)) {
1862 if (!capable(CAP_LINUX_IMMUTABLE)) {
1868 - if (flags & FS_SYNC_FL)
1869 - ip->flags |= BTRFS_INODE_SYNC;
1871 - ip->flags &= ~BTRFS_INODE_SYNC;
1872 if (flags & FS_IMMUTABLE_FL)
1873 ip->flags |= BTRFS_INODE_IMMUTABLE;
1875 ip->flags &= ~BTRFS_INODE_IMMUTABLE;
1876 + if (flags & FS_IXUNLINK_FL)
1877 + ip->flags |= BTRFS_INODE_IXUNLINK;
1879 + ip->flags &= ~BTRFS_INODE_IXUNLINK;
1881 + if (flags & FS_SYNC_FL)
1882 + ip->flags |= BTRFS_INODE_SYNC;
1884 + ip->flags &= ~BTRFS_INODE_SYNC;
1885 if (flags & FS_APPEND_FL)
1886 ip->flags |= BTRFS_INODE_APPEND;
1888 diff -urNp -x '*.orig' linux-4.4/fs/btrfs/super.c linux-4.4/fs/btrfs/super.c
1889 --- linux-4.4/fs/btrfs/super.c 2021-02-24 16:56:11.699083579 +0100
1890 +++ linux-4.4/fs/btrfs/super.c 2021-02-24 16:56:24.552821711 +0100
1891 @@ -306,7 +306,7 @@ enum {
1892 #ifdef CONFIG_BTRFS_DEBUG
1893 Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
1896 + Opt_tag, Opt_notag, Opt_tagid, Opt_err,
1899 static match_table_t tokens = {
1900 @@ -363,6 +363,9 @@ static match_table_t tokens = {
1901 {Opt_fragment_metadata, "fragment=metadata"},
1902 {Opt_fragment_all, "fragment=all"},
1905 + {Opt_notag, "notag"},
1906 + {Opt_tagid, "tagid=%u"},
1910 @@ -745,6 +748,22 @@ int btrfs_parse_options(struct btrfs_roo
1911 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
1914 +#ifndef CONFIG_TAGGING_NONE
1916 + printk(KERN_INFO "btrfs: use tagging\n");
1917 + btrfs_set_opt(info->mount_opt, TAGGED);
1920 + printk(KERN_INFO "btrfs: disabled tagging\n");
1921 + btrfs_clear_opt(info->mount_opt, TAGGED);
1924 +#ifdef CONFIG_PROPAGATE
1927 + btrfs_set_opt(info->mount_opt, TAGGED);
1931 btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
1933 @@ -1653,6 +1672,12 @@ static int btrfs_remount(struct super_bl
1934 btrfs_resize_thread_pool(fs_info,
1935 fs_info->thread_pool_size, old_thread_pool_size);
1937 + if (btrfs_test_opt(root, TAGGED) && !(sb->s_flags & MS_TAGGED)) {
1938 + printk("btrfs: %s: tagging not permitted on remount.\n",
1943 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1946 diff -urNp -x '*.orig' linux-4.4/fs/char_dev.c linux-4.4/fs/char_dev.c
1947 --- linux-4.4/fs/char_dev.c 2021-02-24 16:56:11.709083894 +0100
1948 +++ linux-4.4/fs/char_dev.c 2021-02-24 16:56:24.552821711 +0100
1950 #include <linux/mutex.h>
1951 #include <linux/backing-dev.h>
1952 #include <linux/tty.h>
1953 +#include <linux/vs_context.h>
1954 +#include <linux/vs_device.h>
1956 #include "internal.h"
1958 @@ -356,14 +358,21 @@ static int chrdev_open(struct inode *ino
1960 struct cdev *new = NULL;
1964 + if (!vs_map_chrdev(inode->i_rdev, &mdev, DATTR_OPEN))
1966 + inode->i_mdev = mdev;
1968 spin_lock(&cdev_lock);
1971 struct kobject *kobj;
1974 spin_unlock(&cdev_lock);
1975 - kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
1977 + kobj = kobj_lookup(cdev_map, mdev, &idx);
1980 new = container_of(kobj, struct cdev, kobj);
1981 diff -urNp -x '*.orig' linux-4.4/fs/dcache.c linux-4.4/fs/dcache.c
1982 --- linux-4.4/fs/dcache.c 2021-02-24 16:56:11.722417647 +0100
1983 +++ linux-4.4/fs/dcache.c 2021-02-24 16:56:24.552821711 +0100
1985 #include <linux/ratelimit.h>
1986 #include <linux/list_lru.h>
1987 #include <linux/kasan.h>
1988 +#include <linux/vs_limit.h>
1990 #include "internal.h"
1992 @@ -683,6 +684,7 @@ static inline bool fast_dput(struct dent
1993 spin_lock(&dentry->d_lock);
1994 if (dentry->d_lockref.count > 1) {
1995 dentry->d_lockref.count--;
1996 + vx_dentry_dec(dentry);
1997 spin_unlock(&dentry->d_lock);
2000 @@ -812,6 +814,7 @@ repeat:
2001 dentry_lru_add(dentry);
2003 dentry->d_lockref.count--;
2004 + vx_dentry_dec(dentry);
2005 spin_unlock(&dentry->d_lock);
2008 @@ -829,6 +832,7 @@ EXPORT_SYMBOL(dput);
2009 static inline void __dget_dlock(struct dentry *dentry)
2011 dentry->d_lockref.count++;
2012 + vx_dentry_inc(dentry);
2015 static inline void __dget(struct dentry *dentry)
2016 @@ -841,6 +845,8 @@ struct dentry *dget_parent(struct dentry
2020 + vx_dentry_dec(dentry);
2023 * Do optimistic parent lookup without any
2025 @@ -871,6 +877,7 @@ repeat:
2027 BUG_ON(!ret->d_lockref.count);
2028 ret->d_lockref.count++;
2029 + vx_dentry_inc(ret);
2030 spin_unlock(&ret->d_lock);
2033 @@ -1025,6 +1032,7 @@ static void shrink_dentry_list(struct li
2034 parent = lock_parent(dentry);
2035 if (dentry->d_lockref.count != 1) {
2036 dentry->d_lockref.count--;
2037 + vx_dentry_dec(dentry);
2038 spin_unlock(&dentry->d_lock);
2040 spin_unlock(&parent->d_lock);
2041 @@ -1581,6 +1589,9 @@ struct dentry *__d_alloc(struct super_bl
2042 struct dentry *dentry;
2045 + if (!vx_dentry_avail(1))
2048 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
2051 @@ -1619,6 +1630,7 @@ struct dentry *__d_alloc(struct super_bl
2053 dentry->d_lockref.count = 1;
2054 dentry->d_flags = 0;
2055 + vx_dentry_inc(dentry);
2056 spin_lock_init(&dentry->d_lock);
2057 seqcount_init(&dentry->d_seq);
2058 dentry->d_inode = NULL;
2059 @@ -2379,6 +2391,7 @@ struct dentry *__d_lookup(const struct d
2062 dentry->d_lockref.count++;
2063 + vx_dentry_inc(dentry);
2065 spin_unlock(&dentry->d_lock);
2067 @@ -3395,6 +3408,7 @@ static enum d_walk_ret d_genocide_kill(v
2068 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2069 dentry->d_flags |= DCACHE_GENOCIDE;
2070 dentry->d_lockref.count--;
2071 + vx_dentry_dec(dentry);
2074 return D_WALK_CONTINUE;
2075 diff -urNp -x '*.orig' linux-4.4/fs/devpts/inode.c linux-4.4/fs/devpts/inode.c
2076 --- linux-4.4/fs/devpts/inode.c 2021-02-24 16:56:11.722417647 +0100
2077 +++ linux-4.4/fs/devpts/inode.c 2021-02-24 16:56:24.552821711 +0100
2079 #include <linux/parser.h>
2080 #include <linux/fsnotify.h>
2081 #include <linux/seq_file.h>
2082 +#include <linux/vs_base.h>
2084 #define DEVPTS_DEFAULT_MODE 0600
2087 #define DEVPTS_DEFAULT_PTMX_MODE 0000
2088 #define PTMX_MINOR 2
2090 +static int devpts_permission(struct inode *inode, int mask)
2092 + int ret = -EACCES;
2094 + /* devpts is xid tagged */
2095 + if (vx_check((vxid_t)i_tag_read(inode), VS_WATCH_P | VS_IDENT))
2096 + ret = generic_permission(inode, mask);
2100 +static struct inode_operations devpts_file_inode_operations = {
2101 + .permission = devpts_permission,
2106 * sysctl support for setting limits on the number of Unix98 ptys allocated.
2107 * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
2108 @@ -353,6 +369,34 @@ static int devpts_show_options(struct se
2112 +static int devpts_filter(struct dentry *de)
2116 + /* devpts is xid tagged */
2117 + if (de && de->d_inode)
2118 + xid = (vxid_t)i_tag_read(de->d_inode);
2119 +#ifdef CONFIG_VSERVER_WARN_DEVPTS
2121 + vxwprintk_task(1, "devpts " VS_Q("%.*s") " without inode.",
2122 + de->d_name.len, de->d_name.name);
2124 + return vx_check(xid, VS_WATCH_P | VS_IDENT);
2127 +static int devpts_readdir(struct file * filp, struct dir_context *ctx)
2129 + return dcache_readdir_filter(filp, ctx, devpts_filter);
2132 +static struct file_operations devpts_dir_operations = {
2133 + .open = dcache_dir_open,
2134 + .release = dcache_dir_close,
2135 + .llseek = dcache_dir_lseek,
2136 + .read = generic_read_dir,
2137 + .iterate = devpts_readdir,
2140 static const struct super_operations devpts_sops = {
2141 .statfs = simple_statfs,
2142 .remount_fs = devpts_remount,
2143 @@ -397,8 +441,10 @@ devpts_fill_super(struct super_block *s,
2144 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2145 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
2146 inode->i_op = &simple_dir_inode_operations;
2147 - inode->i_fop = &simple_dir_operations;
2148 + inode->i_fop = &devpts_dir_operations;
2149 set_nlink(inode, 2);
2150 + /* devpts is xid tagged */
2151 + i_tag_write(inode, (vtag_t)vx_current_xid());
2153 s->s_root = d_make_root(inode);
2155 @@ -630,6 +676,9 @@ struct inode *devpts_pty_new(struct pts_
2156 inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
2157 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2158 init_special_inode(inode, S_IFCHR|opts->mode, device);
2159 + /* devpts is xid tagged */
2160 + i_tag_write(inode, (vtag_t)vx_current_xid());
2161 + inode->i_op = &devpts_file_inode_operations;
2162 inode->i_private = priv;
2164 sprintf(s, "%d", index);
2165 diff -urNp -x '*.orig' linux-4.4/fs/ext2/balloc.c linux-4.4/fs/ext2/balloc.c
2166 --- linux-4.4/fs/ext2/balloc.c 2016-01-11 00:01:32.000000000 +0100
2167 +++ linux-4.4/fs/ext2/balloc.c 2021-02-24 16:56:24.552821711 +0100
2168 @@ -693,7 +693,6 @@ ext2_try_to_allocate(struct super_block
2170 end = EXT2_BLOCKS_PER_GROUP(sb);
2173 BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
2176 diff -urNp -x '*.orig' linux-4.4/fs/ext2/ext2.h linux-4.4/fs/ext2/ext2.h
2177 --- linux-4.4/fs/ext2/ext2.h 2016-01-11 00:01:32.000000000 +0100
2178 +++ linux-4.4/fs/ext2/ext2.h 2021-02-24 16:56:24.552821711 +0100
2179 @@ -244,8 +244,12 @@ struct ext2_group_desc
2180 #define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
2181 #define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
2182 #define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
2183 +#define EXT2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
2184 #define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
2186 +#define EXT2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
2187 +#define EXT2_COW_FL FS_COW_FL /* Copy on Write marker */
2189 #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
2190 #define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
2192 @@ -329,7 +333,8 @@ struct ext2_inode {
2194 __le16 l_i_uid_high; /* these 2 fields */
2195 __le16 l_i_gid_high; /* were reserved2[0] */
2196 - __u32 l_i_reserved2;
2197 + __le16 l_i_tag; /* Context Tag */
2198 + __u16 l_i_reserved2;
2201 __u8 h_i_frag; /* Fragment number */
2202 @@ -357,6 +362,7 @@ struct ext2_inode {
2203 #define i_gid_low i_gid
2204 #define i_uid_high osd2.linux2.l_i_uid_high
2205 #define i_gid_high osd2.linux2.l_i_gid_high
2206 +#define i_raw_tag osd2.linux2.l_i_tag
2207 #define i_reserved2 osd2.linux2.l_i_reserved2
2210 @@ -389,6 +395,7 @@ struct ext2_inode {
2212 #define EXT2_MOUNT_DAX 0
2214 +#define EXT2_MOUNT_TAGGED 0x200000 /* Enable Context Tags */
2217 #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
2218 @@ -776,6 +783,7 @@ extern void ext2_set_inode_flags(struct
2219 extern void ext2_get_inode_flags(struct ext2_inode_info *);
2220 extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2221 u64 start, u64 len);
2222 +extern int ext2_sync_flags(struct inode *, int, int);
2225 extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
2226 diff -urNp -x '*.orig' linux-4.4/fs/ext2/file.c linux-4.4/fs/ext2/file.c
2227 --- linux-4.4/fs/ext2/file.c 2016-01-11 00:01:32.000000000 +0100
2228 +++ linux-4.4/fs/ext2/file.c 2021-02-24 16:56:24.552821711 +0100
2229 @@ -202,4 +202,5 @@ const struct inode_operations ext2_file_
2230 .get_acl = ext2_get_acl,
2231 .set_acl = ext2_set_acl,
2232 .fiemap = ext2_fiemap,
2233 + .sync_flags = ext2_sync_flags,
2235 diff -urNp -x '*.orig' linux-4.4/fs/ext2/ialloc.c linux-4.4/fs/ext2/ialloc.c
2236 --- linux-4.4/fs/ext2/ialloc.c 2021-02-24 16:56:11.729084524 +0100
2237 +++ linux-4.4/fs/ext2/ialloc.c 2021-02-24 16:56:24.556155150 +0100
2239 #include <linux/backing-dev.h>
2240 #include <linux/buffer_head.h>
2241 #include <linux/random.h>
2242 +#include <linux/vs_tag.h>
2246 @@ -547,6 +548,7 @@ got:
2247 inode->i_mode = mode;
2248 inode->i_uid = current_fsuid();
2249 inode->i_gid = dir->i_gid;
2250 + i_tag_write(inode, dx_current_fstag(sb));
2252 inode_init_owner(inode, dir, mode);
2254 diff -urNp -x '*.orig' linux-4.4/fs/ext2/inode.c linux-4.4/fs/ext2/inode.c
2255 --- linux-4.4/fs/ext2/inode.c 2021-02-24 16:56:11.729084524 +0100
2256 +++ linux-4.4/fs/ext2/inode.c 2021-02-24 16:56:24.556155150 +0100
2258 #include <linux/fiemap.h>
2259 #include <linux/namei.h>
2260 #include <linux/uio.h>
2261 +#include <linux/vs_tag.h>
2265 @@ -1274,39 +1275,62 @@ void ext2_set_inode_flags(struct inode *
2267 unsigned int flags = EXT2_I(inode)->i_flags;
2269 - inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
2270 - S_DIRSYNC | S_DAX);
2271 + inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK | S_DAX |
2272 + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2274 + if (flags & EXT2_IMMUTABLE_FL)
2275 + inode->i_flags |= S_IMMUTABLE;
2276 + if (flags & EXT2_IXUNLINK_FL)
2277 + inode->i_flags |= S_IXUNLINK;
2279 if (flags & EXT2_SYNC_FL)
2280 inode->i_flags |= S_SYNC;
2281 if (flags & EXT2_APPEND_FL)
2282 inode->i_flags |= S_APPEND;
2283 - if (flags & EXT2_IMMUTABLE_FL)
2284 - inode->i_flags |= S_IMMUTABLE;
2285 if (flags & EXT2_NOATIME_FL)
2286 inode->i_flags |= S_NOATIME;
2287 if (flags & EXT2_DIRSYNC_FL)
2288 inode->i_flags |= S_DIRSYNC;
2289 if (test_opt(inode->i_sb, DAX))
2290 inode->i_flags |= S_DAX;
2292 + inode->i_vflags &= ~(V_BARRIER | V_COW);
2294 + if (flags & EXT2_BARRIER_FL)
2295 + inode->i_vflags |= V_BARRIER;
2296 + if (flags & EXT2_COW_FL)
2297 + inode->i_vflags |= V_COW;
2300 /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
2301 void ext2_get_inode_flags(struct ext2_inode_info *ei)
2303 unsigned int flags = ei->vfs_inode.i_flags;
2304 + unsigned int vflags = ei->vfs_inode.i_vflags;
2306 + ei->i_flags &= ~(EXT2_SYNC_FL | EXT2_APPEND_FL |
2307 + EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL |
2308 + EXT2_NOATIME_FL | EXT2_DIRSYNC_FL |
2309 + EXT2_BARRIER_FL | EXT2_COW_FL);
2311 + if (flags & S_IMMUTABLE)
2312 + ei->i_flags |= EXT2_IMMUTABLE_FL;
2313 + if (flags & S_IXUNLINK)
2314 + ei->i_flags |= EXT2_IXUNLINK_FL;
2316 - ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
2317 - EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
2319 ei->i_flags |= EXT2_SYNC_FL;
2320 if (flags & S_APPEND)
2321 ei->i_flags |= EXT2_APPEND_FL;
2322 - if (flags & S_IMMUTABLE)
2323 - ei->i_flags |= EXT2_IMMUTABLE_FL;
2324 if (flags & S_NOATIME)
2325 ei->i_flags |= EXT2_NOATIME_FL;
2326 if (flags & S_DIRSYNC)
2327 ei->i_flags |= EXT2_DIRSYNC_FL;
2329 + if (vflags & V_BARRIER)
2330 + ei->i_flags |= EXT2_BARRIER_FL;
2331 + if (vflags & V_COW)
2332 + ei->i_flags |= EXT2_COW_FL;
2335 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
2336 @@ -1342,8 +1366,10 @@ struct inode *ext2_iget (struct super_bl
2337 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2338 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2340 - i_uid_write(inode, i_uid);
2341 - i_gid_write(inode, i_gid);
2342 + i_uid_write(inode, INOTAG_UID(DX_TAG(inode), i_uid, i_gid));
2343 + i_gid_write(inode, INOTAG_GID(DX_TAG(inode), i_uid, i_gid));
2344 + i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), i_uid, i_gid,
2345 + le16_to_cpu(raw_inode->i_raw_tag)));
2346 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2347 inode->i_size = le32_to_cpu(raw_inode->i_size);
2348 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2349 @@ -1439,8 +1465,10 @@ static int __ext2_write_inode(struct ino
2350 struct ext2_inode_info *ei = EXT2_I(inode);
2351 struct super_block *sb = inode->i_sb;
2352 ino_t ino = inode->i_ino;
2353 - uid_t uid = i_uid_read(inode);
2354 - gid_t gid = i_gid_read(inode);
2355 + uid_t uid = from_kuid(&init_user_ns,
2356 + TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
2357 + gid_t gid = from_kgid(&init_user_ns,
2358 + TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
2359 struct buffer_head * bh;
2360 struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
2362 @@ -1476,6 +1504,9 @@ static int __ext2_write_inode(struct ino
2363 raw_inode->i_uid_high = 0;
2364 raw_inode->i_gid_high = 0;
2366 +#ifdef CONFIG_TAGGING_INTERN
2367 + raw_inode->i_raw_tag = cpu_to_le16(i_tag_read(inode));
2369 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2370 raw_inode->i_size = cpu_to_le32(inode->i_size);
2371 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
2372 @@ -1559,7 +1590,8 @@ int ext2_setattr(struct dentry *dentry,
2375 if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
2376 - (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
2377 + (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) ||
2378 + (iattr->ia_valid & ATTR_TAG && !tag_eq(iattr->ia_tag, inode->i_tag))) {
2379 error = dquot_transfer(inode, iattr);
2382 diff -urNp -x '*.orig' linux-4.4/fs/ext2/ioctl.c linux-4.4/fs/ext2/ioctl.c
2383 --- linux-4.4/fs/ext2/ioctl.c 2016-01-11 00:01:32.000000000 +0100
2384 +++ linux-4.4/fs/ext2/ioctl.c 2021-02-24 16:56:24.556155150 +0100
2386 #include <asm/uaccess.h>
2389 +int ext2_sync_flags(struct inode *inode, int flags, int vflags)
2391 + inode->i_flags = flags;
2392 + inode->i_vflags = vflags;
2393 + ext2_get_inode_flags(EXT2_I(inode));
2394 + inode->i_ctime = CURRENT_TIME_SEC;
2395 + mark_inode_dirty(inode);
2399 long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2401 struct inode *inode = file_inode(filp);
2402 @@ -51,6 +61,11 @@ long ext2_ioctl(struct file *filp, unsig
2404 flags = ext2_mask_flags(inode->i_mode, flags);
2406 + if (IS_BARRIER(inode)) {
2407 + vxwprintk_task(1, "messing with the barrier.");
2411 mutex_lock(&inode->i_mutex);
2412 /* Is it quota file? Do not allow user to mess with it */
2413 if (IS_NOQUOTA(inode)) {
2414 @@ -66,7 +81,9 @@ long ext2_ioctl(struct file *filp, unsig
2416 * This test looks nicer. Thanks to Pauline Middelink
2418 - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
2419 + if ((oldflags & EXT2_IMMUTABLE_FL) ||
2420 + ((flags ^ oldflags) & (EXT2_APPEND_FL |
2421 + EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL))) {
2422 if (!capable(CAP_LINUX_IMMUTABLE)) {
2423 mutex_unlock(&inode->i_mutex);
2425 @@ -74,7 +91,7 @@ long ext2_ioctl(struct file *filp, unsig
2429 - flags = flags & EXT2_FL_USER_MODIFIABLE;
2430 + flags &= EXT2_FL_USER_MODIFIABLE;
2431 flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
2432 ei->i_flags = flags;
2434 diff -urNp -x '*.orig' linux-4.4/fs/ext2/namei.c linux-4.4/fs/ext2/namei.c
2435 --- linux-4.4/fs/ext2/namei.c 2021-02-24 16:56:11.729084524 +0100
2436 +++ linux-4.4/fs/ext2/namei.c 2021-02-24 16:56:24.556155150 +0100
2439 #include <linux/pagemap.h>
2440 #include <linux/quotaops.h>
2441 +#include <linux/vs_tag.h>
2445 @@ -71,6 +72,7 @@ static struct dentry *ext2_lookup(struct
2446 (unsigned long) ino);
2447 return ERR_PTR(-EIO);
2449 + dx_propagate_tag(nd, inode);
2451 return d_splice_alias(inode, dentry);
2453 @@ -444,6 +446,7 @@ const struct inode_operations ext2_speci
2454 .removexattr = generic_removexattr,
2456 .setattr = ext2_setattr,
2457 + .sync_flags = ext2_sync_flags,
2458 .get_acl = ext2_get_acl,
2459 .set_acl = ext2_set_acl,
2461 diff -urNp -x '*.orig' linux-4.4/fs/ext2/super.c linux-4.4/fs/ext2/super.c
2462 --- linux-4.4/fs/ext2/super.c 2021-02-24 16:56:11.729084524 +0100
2463 +++ linux-4.4/fs/ext2/super.c 2021-02-24 16:56:24.556155150 +0100
2464 @@ -408,7 +408,8 @@ enum {
2465 Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
2466 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
2467 Opt_acl, Opt_noacl, Opt_xip, Opt_dax, Opt_ignore, Opt_err, Opt_quota,
2468 - Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
2469 + Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation,
2470 + Opt_tag, Opt_notag, Opt_tagid
2473 static const match_table_t tokens = {
2474 @@ -436,6 +437,9 @@ static const match_table_t tokens = {
2476 {Opt_noacl, "noacl"},
2479 + {Opt_notag, "notag"},
2480 + {Opt_tagid, "tagid=%u"},
2482 {Opt_grpquota, "grpquota"},
2483 {Opt_ignore, "noquota"},
2484 @@ -520,6 +524,20 @@ static int parse_options(char *options,
2486 set_opt (sbi->s_mount_opt, NO_UID32);
2488 +#ifndef CONFIG_TAGGING_NONE
2490 + set_opt (sbi->s_mount_opt, TAGGED);
2493 + clear_opt (sbi->s_mount_opt, TAGGED);
2496 +#ifdef CONFIG_PROPAGATE
2499 + set_opt (sbi->s_mount_opt, TAGGED);
2503 clear_opt (sbi->s_mount_opt, CHECK);
2505 @@ -895,6 +913,8 @@ static int ext2_fill_super(struct super_
2506 if (!parse_options((char *) data, sb))
2509 + if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
2510 + sb->s_flags |= MS_TAGGED;
2511 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2512 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
2514 @@ -1305,6 +1325,14 @@ static int ext2_remount (struct super_bl
2519 + if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
2520 + !(sb->s_flags & MS_TAGGED)) {
2521 + printk("EXT2-fs: %s: tagging not permitted on remount.\n",
2524 + goto restore_opts;
2527 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2528 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2529 diff -urNp -x '*.orig' linux-4.4/fs/ext4/ext4.h linux-4.4/fs/ext4/ext4.h
2530 --- linux-4.4/fs/ext4/ext4.h 2021-02-24 16:56:11.732417962 +0100
2531 +++ linux-4.4/fs/ext4/ext4.h 2021-02-24 16:56:24.556155150 +0100
2532 @@ -375,8 +375,11 @@ struct flex_groups {
2533 #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
2534 #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
2535 #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
2536 +#define EXT4_BARRIER_FL 0x04000000 /* Barrier for chroot() */
2537 +#define EXT4_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
2538 #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
2539 #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
2540 +#define EXT4_COW_FL 0x40000000 /* Copy on Write marker */
2541 #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
2543 #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
2544 @@ -674,7 +677,7 @@ struct ext4_inode {
2545 __le16 l_i_uid_high; /* these 2 fields */
2546 __le16 l_i_gid_high; /* were reserved2[0] */
2547 __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
2548 - __le16 l_i_reserved;
2549 + __le16 l_i_tag; /* Context Tag */
2552 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
2553 @@ -831,6 +834,7 @@ do { \
2554 #define i_gid_low i_gid
2555 #define i_uid_high osd2.linux2.l_i_uid_high
2556 #define i_gid_high osd2.linux2.l_i_gid_high
2557 +#define i_raw_tag osd2.linux2.l_i_tag
2558 #define i_checksum_lo osd2.linux2.l_i_checksum_lo
2560 #elif defined(__GNU__)
2561 @@ -1068,6 +1072,7 @@ struct ext4_inode_info {
2562 #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
2563 #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
2564 #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
2565 +#define EXT4_MOUNT_TAGGED 0x40000 /* Enable Context Tags */
2566 #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
2567 #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
2568 #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
2569 @@ -2528,6 +2533,7 @@ extern int ext4_punch_hole(struct inode
2570 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
2571 extern void ext4_set_inode_flags(struct inode *);
2572 extern void ext4_get_inode_flags(struct ext4_inode_info *);
2573 +extern int ext4_sync_flags(struct inode *, int, int);
2574 extern int ext4_alloc_da_blocks(struct inode *inode);
2575 extern void ext4_set_aops(struct inode *inode);
2576 extern int ext4_writepage_trans_blocks(struct inode *);
2577 diff -urNp -x '*.orig' linux-4.4/fs/ext4/file.c linux-4.4/fs/ext4/file.c
2578 --- linux-4.4/fs/ext4/file.c 2021-02-24 16:56:11.735751400 +0100
2579 +++ linux-4.4/fs/ext4/file.c 2021-02-24 16:56:24.556155150 +0100
2580 @@ -749,5 +749,6 @@ const struct inode_operations ext4_file_
2581 .get_acl = ext4_get_acl,
2582 .set_acl = ext4_set_acl,
2583 .fiemap = ext4_fiemap,
2584 + .sync_flags = ext4_sync_flags,
2587 diff -urNp -x '*.orig' linux-4.4/fs/ext4/ialloc.c linux-4.4/fs/ext4/ialloc.c
2588 --- linux-4.4/fs/ext4/ialloc.c 2021-02-24 16:56:11.735751400 +0100
2589 +++ linux-4.4/fs/ext4/ialloc.c 2021-02-24 16:56:24.556155150 +0100
2591 #include <linux/random.h>
2592 #include <linux/bitops.h>
2593 #include <linux/blkdev.h>
2594 +#include <linux/vs_tag.h>
2595 #include <asm/byteorder.h>
2598 @@ -780,6 +781,7 @@ struct inode *__ext4_new_inode(handle_t
2599 inode->i_mode = mode;
2600 inode->i_uid = current_fsuid();
2601 inode->i_gid = dir->i_gid;
2602 + i_tag_write(inode, dx_current_fstag(sb));
2604 inode_init_owner(inode, dir, mode);
2605 err = dquot_initialize(inode);
2606 diff -urNp -x '*.orig' linux-4.4/fs/ext4/inode.c linux-4.4/fs/ext4/inode.c
2607 --- linux-4.4/fs/ext4/inode.c 2021-02-24 16:56:11.739084838 +0100
2608 +++ linux-4.4/fs/ext4/inode.c 2021-02-24 16:56:24.556155150 +0100
2610 #include <linux/printk.h>
2611 #include <linux/slab.h>
2612 #include <linux/bitops.h>
2613 +#include <linux/vs_tag.h>
2615 #include "ext4_jbd2.h"
2617 @@ -4149,12 +4150,15 @@ void ext4_set_inode_flags(struct inode *
2618 unsigned int flags = EXT4_I(inode)->i_flags;
2619 unsigned int new_fl = 0;
2621 + if (flags & EXT4_IMMUTABLE_FL)
2622 + new_fl |= S_IMMUTABLE;
2623 + if (flags & EXT4_IXUNLINK_FL)
2624 + new_fl |= S_IXUNLINK;
2626 if (flags & EXT4_SYNC_FL)
2628 if (flags & EXT4_APPEND_FL)
2630 - if (flags & EXT4_IMMUTABLE_FL)
2631 - new_fl |= S_IMMUTABLE;
2632 if (flags & EXT4_NOATIME_FL)
2633 new_fl |= S_NOATIME;
2634 if (flags & EXT4_DIRSYNC_FL)
2635 @@ -4162,31 +4166,52 @@ void ext4_set_inode_flags(struct inode *
2636 if (test_opt(inode->i_sb, DAX))
2638 inode_set_flags(inode, new_fl,
2639 - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
2640 + S_IXUNLINK | S_IMMUTABLE | S_DAX |
2641 + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2644 + if (flags & EXT4_BARRIER_FL)
2645 + new_fl |= V_BARRIER;
2646 + if (flags & EXT4_COW_FL)
2649 + set_mask_bits(&inode->i_vflags,
2650 + V_BARRIER | V_COW, new_fl);
2653 /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
2654 void ext4_get_inode_flags(struct ext4_inode_info *ei)
2656 - unsigned int vfs_fl;
2657 + unsigned int vfs_fl, vfs_vf;
2658 unsigned long old_fl, new_fl;
2661 vfs_fl = ei->vfs_inode.i_flags;
2662 + vfs_vf = ei->vfs_inode.i_vflags;
2663 old_fl = ei->i_flags;
2664 new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
2665 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
2667 + EXT4_DIRSYNC_FL|EXT4_BARRIER_FL|
2670 + if (vfs_fl & S_IMMUTABLE)
2671 + new_fl |= EXT4_IMMUTABLE_FL;
2672 + if (vfs_fl & S_IXUNLINK)
2673 + new_fl |= EXT4_IXUNLINK_FL;
2675 if (vfs_fl & S_SYNC)
2676 new_fl |= EXT4_SYNC_FL;
2677 if (vfs_fl & S_APPEND)
2678 new_fl |= EXT4_APPEND_FL;
2679 - if (vfs_fl & S_IMMUTABLE)
2680 - new_fl |= EXT4_IMMUTABLE_FL;
2681 if (vfs_fl & S_NOATIME)
2682 new_fl |= EXT4_NOATIME_FL;
2683 if (vfs_fl & S_DIRSYNC)
2684 new_fl |= EXT4_DIRSYNC_FL;
2686 + if (vfs_vf & V_BARRIER)
2687 + new_fl |= EXT4_BARRIER_FL;
2688 + if (vfs_vf & V_COW)
2689 + new_fl |= EXT4_COW_FL;
2690 } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
2693 @@ -4318,8 +4343,10 @@ struct inode *__ext4_iget(struct super_b
2694 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2695 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2697 - i_uid_write(inode, i_uid);
2698 - i_gid_write(inode, i_gid);
2699 + i_uid_write(inode, INOTAG_UID(DX_TAG(inode), i_uid, i_gid));
2700 + i_gid_write(inode, INOTAG_GID(DX_TAG(inode), i_uid, i_gid));
2701 + i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), i_uid, i_gid,
2702 + le16_to_cpu(raw_inode->i_raw_tag)));
2703 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2705 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
2706 @@ -4641,8 +4668,10 @@ static int ext4_do_update_inode(handle_t
2708 ext4_get_inode_flags(ei);
2709 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
2710 - i_uid = i_uid_read(inode);
2711 - i_gid = i_gid_read(inode);
2712 + i_uid = from_kuid(&init_user_ns,
2713 + TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
2714 + i_gid = from_kgid(&init_user_ns,
2715 + TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
2716 if (!(test_opt(inode->i_sb, NO_UID32))) {
2717 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
2718 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
2719 @@ -4665,6 +4694,9 @@ static int ext4_do_update_inode(handle_t
2720 raw_inode->i_uid_high = 0;
2721 raw_inode->i_gid_high = 0;
2723 +#ifdef CONFIG_TAGGING_INTERN
2724 + raw_inode->i_raw_tag = cpu_to_le16(i_tag_read(inode));
2726 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2728 EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
2729 @@ -4910,7 +4942,8 @@ int ext4_setattr(struct dentry *dentry,
2732 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
2733 - (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
2734 + (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid)) ||
2735 + (ia_valid & ATTR_TAG && !tag_eq(attr->ia_tag, inode->i_tag))) {
2738 /* (user+group)*(old+new) structure, inode write (sb,
2739 @@ -4933,6 +4966,8 @@ int ext4_setattr(struct dentry *dentry,
2740 inode->i_uid = attr->ia_uid;
2741 if (attr->ia_valid & ATTR_GID)
2742 inode->i_gid = attr->ia_gid;
2743 + if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
2744 + inode->i_tag = attr->ia_tag;
2745 error = ext4_mark_inode_dirty(handle, inode);
2746 ext4_journal_stop(handle);
2748 diff -urNp -x '*.orig' linux-4.4/fs/ext4/ioctl.c linux-4.4/fs/ext4/ioctl.c
2749 --- linux-4.4/fs/ext4/ioctl.c 2021-02-24 16:56:11.739084838 +0100
2750 +++ linux-4.4/fs/ext4/ioctl.c 2021-02-24 16:56:24.559488588 +0100
2752 #include <linux/mount.h>
2753 #include <linux/file.h>
2754 #include <linux/random.h>
2755 +#include <linux/vs_tag.h>
2756 #include <asm/uaccess.h>
2757 #include "ext4_jbd2.h"
2759 @@ -202,6 +203,33 @@ static int uuid_is_zero(__u8 u[16])
2763 +int ext4_sync_flags(struct inode *inode, int flags, int vflags)
2765 + handle_t *handle = NULL;
2766 + struct ext4_iloc iloc;
2769 + handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
2770 + if (IS_ERR(handle))
2771 + return PTR_ERR(handle);
2773 + if (IS_SYNC(inode))
2774 + ext4_handle_sync(handle);
2775 + err = ext4_reserve_inode_write(handle, inode, &iloc);
2779 + inode->i_flags = flags;
2780 + inode->i_vflags = vflags;
2781 + ext4_get_inode_flags(EXT4_I(inode));
2782 + inode->i_ctime = ext4_current_time(inode);
2784 + err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2786 + ext4_journal_stop(handle);
2790 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2792 struct inode *inode = file_inode(filp);
2793 @@ -235,6 +263,11 @@ long ext4_ioctl(struct file *filp, unsig
2795 flags = ext4_mask_flags(inode->i_mode, flags);
2797 + if (IS_BARRIER(inode)) {
2798 + vxwprintk_task(1, "messing with the barrier.");
2803 mutex_lock(&inode->i_mutex);
2804 /* Is it quota file? Do not allow user to mess with it */
2805 @@ -252,7 +285,9 @@ long ext4_ioctl(struct file *filp, unsig
2807 * This test looks nicer. Thanks to Pauline Middelink
2809 - if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
2810 + if ((oldflags & EXT4_IMMUTABLE_FL) ||
2811 + ((flags ^ oldflags) & (EXT4_APPEND_FL |
2812 + EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL))) {
2813 if (!capable(CAP_LINUX_IMMUTABLE))
2816 diff -urNp -x '*.orig' linux-4.4/fs/ext4/namei.c linux-4.4/fs/ext4/namei.c
2817 --- linux-4.4/fs/ext4/namei.c 2021-02-24 16:56:11.739084838 +0100
2818 +++ linux-4.4/fs/ext4/namei.c 2021-02-24 16:56:24.559488588 +0100
2820 #include <linux/quotaops.h>
2821 #include <linux/buffer_head.h>
2822 #include <linux/bio.h>
2823 +#include <linux/vs_tag.h>
2825 #include "ext4_jbd2.h"
2827 @@ -1429,6 +1430,7 @@ restart:
2828 ll_rw_block(READ | REQ_META | REQ_PRIO,
2831 + dx_propagate_tag(nd, inode);
2833 if ((bh = bh_use[ra_ptr++]) == NULL)
2835 @@ -3872,6 +3874,7 @@ const struct inode_operations ext4_dir_i
2836 .get_acl = ext4_get_acl,
2837 .set_acl = ext4_set_acl,
2838 .fiemap = ext4_fiemap,
2839 + .sync_flags = ext4_sync_flags,
2842 const struct inode_operations ext4_special_inode_operations = {
2843 diff -urNp -x '*.orig' linux-4.4/fs/ext4/super.c linux-4.4/fs/ext4/super.c
2844 --- linux-4.4/fs/ext4/super.c 2021-02-24 16:56:11.742418277 +0100
2845 +++ linux-4.4/fs/ext4/super.c 2021-02-24 16:56:24.559488588 +0100
2846 @@ -1179,6 +1179,7 @@ enum {
2847 Opt_dioread_nolock, Opt_dioread_lock,
2848 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
2849 Opt_max_dir_size_kb, Opt_nojournal_checksum,
2850 + Opt_tag, Opt_notag, Opt_tagid
2853 static const match_table_t tokens = {
2854 @@ -1264,6 +1265,9 @@ static const match_table_t tokens = {
2855 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
2856 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
2857 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
2859 + {Opt_notag, "notag"},
2860 + {Opt_tagid, "tagid=%u"},
2864 @@ -1506,6 +1510,20 @@ static int handle_mount_opt(struct super
2865 case Opt_nolazytime:
2866 sb->s_flags &= ~MS_LAZYTIME;
2868 +#ifndef CONFIG_TAGGING_NONE
2870 + set_opt(sb, TAGGED);
2873 + clear_opt(sb, TAGGED);
2876 +#ifdef CONFIG_PROPAGATE
2879 + set_opt(sb, TAGGED);
2884 for (m = ext4_mount_opts; m->token != Opt_err; m++)
2885 @@ -3477,6 +3495,9 @@ static int ext4_fill_super(struct super_
2886 sb->s_iflags |= SB_I_CGROUPWB;
2889 + if (EXT4_SB(sb)->s_mount_opt & EXT4_MOUNT_TAGGED)
2890 + sb->s_flags |= MS_TAGGED;
2892 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2893 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
2895 @@ -4852,6 +4873,14 @@ static int ext4_remount(struct super_blo
2896 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
2897 ext4_abort(sb, "Abort forced by user");
2899 + if ((sbi->s_mount_opt & EXT4_MOUNT_TAGGED) &&
2900 + !(sb->s_flags & MS_TAGGED)) {
2901 + printk("EXT4-fs: %s: tagging not permitted on remount.\n",
2904 + goto restore_opts;
2907 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2908 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
2910 diff -urNp -x '*.orig' linux-4.4/fs/fcntl.c linux-4.4/fs/fcntl.c
2911 --- linux-4.4/fs/fcntl.c 2021-02-24 16:56:11.752418592 +0100
2912 +++ linux-4.4/fs/fcntl.c 2021-02-24 16:56:24.559488588 +0100
2914 #include <linux/pid_namespace.h>
2915 #include <linux/user_namespace.h>
2916 #include <linux/shmem_fs.h>
2917 +#include <linux/vs_limit.h>
2919 #include <asm/poll.h>
2920 #include <asm/siginfo.h>
2921 @@ -389,6 +390,8 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, f
2925 + if (!vx_files_avail(1))
2928 if (unlikely(f.file->f_mode & FMODE_PATH)) {
2929 if (!check_fcntl_cmd(cmd))
2930 diff -urNp -x '*.orig' linux-4.4/fs/file.c linux-4.4/fs/file.c
2931 --- linux-4.4/fs/file.c 2021-02-24 16:56:11.752418592 +0100
2932 +++ linux-4.4/fs/file.c 2021-02-24 16:56:24.559488588 +0100
2934 #include <linux/spinlock.h>
2935 #include <linux/rcupdate.h>
2936 #include <linux/workqueue.h>
2937 +#include <linux/vs_limit.h>
2939 int sysctl_nr_open __read_mostly = 1024*1024;
2940 int sysctl_nr_open_min = BITS_PER_LONG;
2941 @@ -356,6 +357,8 @@ struct files_struct *dup_fd(struct files
2942 struct file *f = *old_fds++;
2945 + /* TODO: sum it first for check and performance */
2946 + vx_openfd_inc(open_files - i);
2949 * The fd may be claimed in the fd bitmap but not yet
2950 @@ -405,9 +408,11 @@ static struct fdtable *close_files(struc
2951 filp_close(file, files);
2952 cond_resched_rcu_qs();
2962 @@ -539,6 +544,7 @@ repeat:
2964 __clear_close_on_exec(fd, fdt);
2966 + vx_openfd_inc(fd);
2969 if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
2970 @@ -569,6 +575,7 @@ static void __put_unused_fd(struct files
2971 __clear_open_fd(fd, fdt);
2972 if (fd < files->next_fd)
2973 files->next_fd = fd;
2974 + vx_openfd_dec(fd);
2977 void put_unused_fd(unsigned int fd)
2978 @@ -851,6 +858,8 @@ __releases(&files->file_lock)
2981 filp_close(tofree, files);
2983 + vx_openfd_inc(fd); /* fd was unused */
2987 diff -urNp -x '*.orig' linux-4.4/fs/file_table.c linux-4.4/fs/file_table.c
2988 --- linux-4.4/fs/file_table.c 2016-01-11 00:01:32.000000000 +0100
2989 +++ linux-4.4/fs/file_table.c 2021-02-24 16:56:24.559488588 +0100
2991 #include <linux/task_work.h>
2992 #include <linux/ima.h>
2993 #include <linux/swap.h>
2994 +#include <linux/vs_limit.h>
2995 +#include <linux/vs_context.h>
2997 #include <linux/atomic.h>
2999 @@ -137,6 +139,8 @@ struct file *get_empty_filp(void)
3000 mutex_init(&f->f_pos_lock);
3001 eventpoll_init_file(f);
3002 /* f->f_version: 0 */
3003 + f->f_xid = vx_current_xid();
3008 @@ -219,6 +223,8 @@ static void __fput(struct file *file)
3009 put_write_access(inode);
3010 __mnt_drop_write(mnt);
3012 + vx_files_dec(file);
3014 file->f_path.dentry = NULL;
3015 file->f_path.mnt = NULL;
3016 file->f_inode = NULL;
3017 @@ -305,6 +311,8 @@ void put_filp(struct file *file)
3019 if (atomic_long_dec_and_test(&file->f_count)) {
3020 security_file_free(file);
3021 + vx_files_dec(file);
3026 diff -urNp -x '*.orig' linux-4.4/fs/fs_struct.c linux-4.4/fs/fs_struct.c
3027 --- linux-4.4/fs/fs_struct.c 2016-01-11 00:01:32.000000000 +0100
3028 +++ linux-4.4/fs/fs_struct.c 2021-02-24 16:56:24.559488588 +0100
3030 #include <linux/path.h>
3031 #include <linux/slab.h>
3032 #include <linux/fs_struct.h>
3033 +#include <linux/vserver/global.h>
3034 #include "internal.h"
3037 @@ -87,6 +88,7 @@ void free_fs_struct(struct fs_struct *fs
3039 path_put(&fs->root);
3041 + atomic_dec(&vs_global_fs);
3042 kmem_cache_free(fs_cachep, fs);
3045 @@ -124,6 +126,7 @@ struct fs_struct *copy_fs_struct(struct
3048 spin_unlock(&old->lock);
3049 + atomic_inc(&vs_global_fs);
3053 diff -urNp -x '*.orig' linux-4.4/fs/gfs2/file.c linux-4.4/fs/gfs2/file.c
3054 --- linux-4.4/fs/gfs2/file.c 2021-02-24 16:56:11.755752030 +0100
3055 +++ linux-4.4/fs/gfs2/file.c 2021-02-24 16:56:24.559488588 +0100
3056 @@ -137,6 +137,9 @@ static const u32 fsflags_to_gfs2[32] = {
3057 [12] = GFS2_DIF_EXHASH,
3058 [14] = GFS2_DIF_INHERIT_JDATA,
3059 [17] = GFS2_DIF_TOPDIR,
3060 + [27] = GFS2_DIF_IXUNLINK,
3061 + [26] = GFS2_DIF_BARRIER,
3062 + [29] = GFS2_DIF_COW,
3065 static const u32 gfs2_to_fsflags[32] = {
3066 @@ -147,6 +150,9 @@ static const u32 gfs2_to_fsflags[32] = {
3067 [gfs2fl_ExHash] = FS_INDEX_FL,
3068 [gfs2fl_TopLevel] = FS_TOPDIR_FL,
3069 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
3070 + [gfs2fl_IXUnlink] = FS_IXUNLINK_FL,
3071 + [gfs2fl_Barrier] = FS_BARRIER_FL,
3072 + [gfs2fl_Cow] = FS_COW_FL,
3075 static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
3076 @@ -177,12 +183,17 @@ void gfs2_set_inode_flags(struct inode *
3078 struct gfs2_inode *ip = GFS2_I(inode);
3079 unsigned int flags = inode->i_flags;
3080 + unsigned int vflags = inode->i_vflags;
3082 + flags &= ~(S_IMMUTABLE | S_IXUNLINK |
3083 + S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC | S_NOSEC);
3085 - flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
3086 if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
3088 if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
3089 flags |= S_IMMUTABLE;
3090 + if (ip->i_diskflags & GFS2_DIF_IXUNLINK)
3091 + flags |= S_IXUNLINK;
3092 if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
3094 if (ip->i_diskflags & GFS2_DIF_NOATIME)
3095 @@ -190,6 +201,43 @@ void gfs2_set_inode_flags(struct inode *
3096 if (ip->i_diskflags & GFS2_DIF_SYNC)
3098 inode->i_flags = flags;
3100 + vflags &= ~(V_BARRIER | V_COW);
3102 + if (ip->i_diskflags & GFS2_DIF_BARRIER)
3103 + vflags |= V_BARRIER;
3104 + if (ip->i_diskflags & GFS2_DIF_COW)
3106 + inode->i_vflags = vflags;
3109 +void gfs2_get_inode_flags(struct inode *inode)
3111 + struct gfs2_inode *ip = GFS2_I(inode);
3112 + unsigned int flags = inode->i_flags;
3113 + unsigned int vflags = inode->i_vflags;
3115 + ip->i_diskflags &= ~(GFS2_DIF_APPENDONLY |
3116 + GFS2_DIF_NOATIME | GFS2_DIF_SYNC |
3117 + GFS2_DIF_IMMUTABLE | GFS2_DIF_IXUNLINK |
3118 + GFS2_DIF_BARRIER | GFS2_DIF_COW);
3120 + if (flags & S_IMMUTABLE)
3121 + ip->i_diskflags |= GFS2_DIF_IMMUTABLE;
3122 + if (flags & S_IXUNLINK)
3123 + ip->i_diskflags |= GFS2_DIF_IXUNLINK;
3125 + if (flags & S_APPEND)
3126 + ip->i_diskflags |= GFS2_DIF_APPENDONLY;
3127 + if (flags & S_NOATIME)
3128 + ip->i_diskflags |= GFS2_DIF_NOATIME;
3129 + if (flags & S_SYNC)
3130 + ip->i_diskflags |= GFS2_DIF_SYNC;
3132 + if (vflags & V_BARRIER)
3133 + ip->i_diskflags |= GFS2_DIF_BARRIER;
3134 + if (vflags & V_COW)
3135 + ip->i_diskflags |= GFS2_DIF_COW;
3138 /* Flags that can be set by user space */
3139 @@ -305,6 +353,37 @@ static int gfs2_set_flags(struct file *f
3140 return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
3143 +int gfs2_sync_flags(struct inode *inode, int flags, int vflags)
3145 + struct gfs2_inode *ip = GFS2_I(inode);
3146 + struct gfs2_sbd *sdp = GFS2_SB(inode);
3147 + struct buffer_head *bh;
3148 + struct gfs2_holder gh;
3151 + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
3154 + error = gfs2_trans_begin(sdp, RES_DINODE, 0);
3157 + error = gfs2_meta_inode_buffer(ip, &bh);
3159 + goto out_trans_end;
3160 + gfs2_trans_add_meta(ip->i_gl, bh);
3161 + inode->i_flags = flags;
3162 + inode->i_vflags = vflags;
3163 + gfs2_get_inode_flags(inode);
3164 + gfs2_dinode_out(ip, bh->b_data);
3166 + gfs2_set_aops(inode);
3168 + gfs2_trans_end(sdp);
3170 + gfs2_glock_dq_uninit(&gh);
3174 static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3177 diff -urNp -x '*.orig' linux-4.4/fs/gfs2/inode.h linux-4.4/fs/gfs2/inode.h
3178 --- linux-4.4/fs/gfs2/inode.h 2016-01-11 00:01:32.000000000 +0100
3179 +++ linux-4.4/fs/gfs2/inode.h 2021-02-24 16:56:24.559488588 +0100
3180 @@ -118,6 +118,7 @@ extern const struct file_operations gfs2
3181 extern const struct file_operations gfs2_dir_fops_nolock;
3183 extern void gfs2_set_inode_flags(struct inode *inode);
3184 +extern int gfs2_sync_flags(struct inode *inode, int flags, int vflags);
3186 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
3187 extern const struct file_operations gfs2_file_fops;
3188 diff -urNp -x '*.orig' linux-4.4/fs/hostfs/hostfs.h linux-4.4/fs/hostfs/hostfs.h
3189 --- linux-4.4/fs/hostfs/hostfs.h 2016-01-11 00:01:32.000000000 +0100
3190 +++ linux-4.4/fs/hostfs/hostfs.h 2021-02-24 16:56:24.559488588 +0100
3191 @@ -42,6 +42,7 @@ struct hostfs_iattr {
3192 unsigned short ia_mode;
3197 struct timespec ia_atime;
3198 struct timespec ia_mtime;
3199 diff -urNp -x '*.orig' linux-4.4/fs/inode.c linux-4.4/fs/inode.c
3200 --- linux-4.4/fs/inode.c 2021-02-24 16:56:11.762418907 +0100
3201 +++ linux-4.4/fs/inode.c 2021-02-24 16:56:24.559488588 +0100
3203 #include <linux/buffer_head.h> /* for inode_has_buffers */
3204 #include <linux/ratelimit.h>
3205 #include <linux/list_lru.h>
3206 +#include <linux/vs_tag.h>
3207 #include <trace/events/writeback.h>
3208 #include "internal.h"
3210 @@ -133,6 +134,8 @@ int inode_init_always(struct super_block
3211 struct address_space *const mapping = &inode->i_data;
3215 + /* essential because of inode slab reuse */
3216 inode->i_blkbits = sb->s_blocksize_bits;
3218 atomic64_set(&inode->i_sequence, 0);
3219 @@ -143,6 +146,7 @@ int inode_init_always(struct super_block
3220 inode->i_opflags = 0;
3221 i_uid_write(inode, 0);
3222 i_gid_write(inode, 0);
3223 + i_tag_write(inode, 0);
3224 atomic_set(&inode->i_writecount, 0);
3226 inode->i_blocks = 0;
3227 @@ -153,6 +157,7 @@ int inode_init_always(struct super_block
3228 inode->i_cdev = NULL;
3229 inode->i_link = NULL;
3231 + inode->i_mdev = 0;
3232 inode->dirtied_when = 0;
3234 #ifdef CONFIG_CGROUP_WRITEBACK
3235 @@ -476,6 +481,8 @@ void __insert_inode_hash(struct inode *i
3237 EXPORT_SYMBOL(__insert_inode_hash);
3239 +EXPORT_SYMBOL_GPL(__iget);
3242 * __remove_inode_hash - remove an inode from the hash
3243 * @inode: inode to unhash
3244 @@ -1923,9 +1930,11 @@ void init_special_inode(struct inode *in
3245 if (S_ISCHR(mode)) {
3246 inode->i_fop = &def_chr_fops;
3247 inode->i_rdev = rdev;
3248 + inode->i_mdev = rdev;
3249 } else if (S_ISBLK(mode)) {
3250 inode->i_fop = &def_blk_fops;
3251 inode->i_rdev = rdev;
3252 + inode->i_mdev = rdev;
3253 } else if (S_ISFIFO(mode))
3254 inode->i_fop = &pipefifo_fops;
3255 else if (S_ISSOCK(mode))
3256 @@ -1960,6 +1969,7 @@ void inode_init_owner(struct inode *inod
3258 inode->i_gid = current_fsgid();
3259 inode->i_mode = mode;
3260 + i_tag_write(inode, dx_current_fstag(inode->i_sb));
3262 EXPORT_SYMBOL(inode_init_owner);
3264 diff -urNp -x '*.orig' linux-4.4/fs/ioctl.c linux-4.4/fs/ioctl.c
3265 --- linux-4.4/fs/ioctl.c 2016-01-11 00:01:32.000000000 +0100
3266 +++ linux-4.4/fs/ioctl.c 2021-02-24 16:56:24.562822026 +0100
3268 #include <linux/writeback.h>
3269 #include <linux/buffer_head.h>
3270 #include <linux/falloc.h>
3271 +#include <linux/proc_fs.h>
3272 +#include <linux/vserver/inode.h>
3273 +#include <linux/vs_tag.h>
3275 #include <asm/ioctls.h>
3277 diff -urNp -x '*.orig' linux-4.4/fs/jfs/file.c linux-4.4/fs/jfs/file.c
3278 --- linux-4.4/fs/jfs/file.c 2016-01-11 00:01:32.000000000 +0100
3279 +++ linux-4.4/fs/jfs/file.c 2021-02-24 16:56:24.562822026 +0100
3280 @@ -113,7 +113,8 @@ int jfs_setattr(struct dentry *dentry, s
3283 if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
3284 - (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
3285 + (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) ||
3286 + (iattr->ia_valid & ATTR_TAG && !tag_eq(iattr->ia_tag, inode->i_tag))) {
3287 rc = dquot_transfer(inode, iattr);
3290 @@ -149,6 +150,7 @@ const struct inode_operations jfs_file_i
3291 .get_acl = jfs_get_acl,
3292 .set_acl = jfs_set_acl,
3294 + .sync_flags = jfs_sync_flags,
3297 const struct file_operations jfs_file_operations = {
3298 diff -urNp -x '*.orig' linux-4.4/fs/jfs/ioctl.c linux-4.4/fs/jfs/ioctl.c
3299 --- linux-4.4/fs/jfs/ioctl.c 2016-01-11 00:01:32.000000000 +0100
3300 +++ linux-4.4/fs/jfs/ioctl.c 2021-02-24 16:56:24.562822026 +0100
3302 #include <linux/time.h>
3303 #include <linux/sched.h>
3304 #include <linux/blkdev.h>
3305 +#include <linux/mount.h>
3306 #include <asm/current.h>
3307 #include <asm/uaccess.h>
3309 @@ -56,6 +57,16 @@ static long jfs_map_ext2(unsigned long f
3313 +int jfs_sync_flags(struct inode *inode, int flags, int vflags)
3315 + inode->i_flags = flags;
3316 + inode->i_vflags = vflags;
3317 + jfs_get_inode_flags(JFS_IP(inode));
3318 + inode->i_ctime = CURRENT_TIME_SEC;
3319 + mark_inode_dirty(inode);
3323 long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3325 struct inode *inode = file_inode(filp);
3326 @@ -89,6 +100,11 @@ long jfs_ioctl(struct file *filp, unsign
3327 if (!S_ISDIR(inode->i_mode))
3328 flags &= ~JFS_DIRSYNC_FL;
3330 + if (IS_BARRIER(inode)) {
3331 + vxwprintk_task(1, "messing with the barrier.");
3335 /* Is it quota file? Do not allow user to mess with it */
3336 if (IS_NOQUOTA(inode)) {
3338 @@ -106,8 +122,8 @@ long jfs_ioctl(struct file *filp, unsign
3339 * the relevant capability.
3341 if ((oldflags & JFS_IMMUTABLE_FL) ||
3342 - ((flags ^ oldflags) &
3343 - (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
3344 + ((flags ^ oldflags) & (JFS_APPEND_FL |
3345 + JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL))) {
3346 if (!capable(CAP_LINUX_IMMUTABLE)) {
3347 mutex_unlock(&inode->i_mutex);
3349 @@ -115,7 +131,7 @@ long jfs_ioctl(struct file *filp, unsign
3353 - flags = flags & JFS_FL_USER_MODIFIABLE;
3354 + flags &= JFS_FL_USER_MODIFIABLE;
3355 flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
3356 jfs_inode->mode2 = flags;
3358 diff -urNp -x '*.orig' linux-4.4/fs/jfs/jfs_dinode.h linux-4.4/fs/jfs/jfs_dinode.h
3359 --- linux-4.4/fs/jfs/jfs_dinode.h 2016-01-11 00:01:32.000000000 +0100
3360 +++ linux-4.4/fs/jfs/jfs_dinode.h 2021-02-24 16:56:24.562822026 +0100
3361 @@ -161,9 +161,13 @@ struct dinode {
3363 #define JFS_APPEND_FL 0x01000000 /* writes to file may only append */
3364 #define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */
3365 +#define JFS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
3367 -#define JFS_FL_USER_VISIBLE 0x03F80000
3368 -#define JFS_FL_USER_MODIFIABLE 0x03F80000
3369 +#define JFS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
3370 +#define JFS_COW_FL 0x20000000 /* Copy on Write marker */
3372 +#define JFS_FL_USER_VISIBLE 0x07F80000
3373 +#define JFS_FL_USER_MODIFIABLE 0x07F80000
3374 #define JFS_FL_INHERIT 0x03C80000
3376 /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
3377 diff -urNp -x '*.orig' linux-4.4/fs/jfs/jfs_filsys.h linux-4.4/fs/jfs/jfs_filsys.h
3378 --- linux-4.4/fs/jfs/jfs_filsys.h 2016-01-11 00:01:32.000000000 +0100
3379 +++ linux-4.4/fs/jfs/jfs_filsys.h 2021-02-24 16:56:24.562822026 +0100
3381 #define JFS_NAME_MAX 255
3382 #define JFS_PATH_MAX BPSIZE
3384 +#define JFS_TAGGED 0x00800000 /* Context Tagging */
3387 * file system state (superblock state)
3388 diff -urNp -x '*.orig' linux-4.4/fs/jfs/jfs_imap.c linux-4.4/fs/jfs/jfs_imap.c
3389 --- linux-4.4/fs/jfs/jfs_imap.c 2016-01-11 00:01:32.000000000 +0100
3390 +++ linux-4.4/fs/jfs/jfs_imap.c 2021-02-24 16:56:24.562822026 +0100
3392 #include <linux/pagemap.h>
3393 #include <linux/quotaops.h>
3394 #include <linux/slab.h>
3395 +#include <linux/vs_tag.h>
3397 #include "jfs_incore.h"
3398 #include "jfs_inode.h"
3399 @@ -3047,6 +3048,8 @@ static int copy_from_dinode(struct dinod
3401 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3402 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3406 jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3407 jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
3408 @@ -3067,14 +3070,18 @@ static int copy_from_dinode(struct dinod
3410 set_nlink(ip, le32_to_cpu(dip->di_nlink));
3412 - jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3413 + kuid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3414 + kgid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3415 + ip->i_tag = INOTAG_KTAG(DX_TAG(ip), kuid, kgid, GLOBAL_ROOT_TAG);
3417 + jfs_ip->saved_uid = INOTAG_KUID(DX_TAG(ip), kuid, kgid);
3418 if (!uid_valid(sbi->uid))
3419 ip->i_uid = jfs_ip->saved_uid;
3421 ip->i_uid = sbi->uid;
3424 - jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3425 + jfs_ip->saved_gid = INOTAG_KGID(DX_TAG(ip), kuid, kgid);
3426 if (!gid_valid(sbi->gid))
3427 ip->i_gid = jfs_ip->saved_gid;
3429 @@ -3139,16 +3146,14 @@ static void copy_to_dinode(struct dinode
3430 dip->di_size = cpu_to_le64(ip->i_size);
3431 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3432 dip->di_nlink = cpu_to_le32(ip->i_nlink);
3433 - if (!uid_valid(sbi->uid))
3434 - dip->di_uid = cpu_to_le32(i_uid_read(ip));
3436 - dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns,
3437 - jfs_ip->saved_uid));
3438 - if (!gid_valid(sbi->gid))
3439 - dip->di_gid = cpu_to_le32(i_gid_read(ip));
3441 - dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3442 - jfs_ip->saved_gid));
3443 + dip->di_uid = cpu_to_le32(from_kuid(&init_user_ns,
3444 + TAGINO_KUID(DX_TAG(ip),
3445 + !uid_valid(sbi->uid) ? ip->i_uid : jfs_ip->saved_uid,
3447 + dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3448 + TAGINO_KGID(DX_TAG(ip),
3449 + !gid_valid(sbi->gid) ? ip->i_gid : jfs_ip->saved_gid,
3451 jfs_get_inode_flags(jfs_ip);
3453 * mode2 is only needed for storing the higher order bits.
3454 diff -urNp -x '*.orig' linux-4.4/fs/jfs/jfs_inode.c linux-4.4/fs/jfs/jfs_inode.c
3455 --- linux-4.4/fs/jfs/jfs_inode.c 2016-01-11 00:01:32.000000000 +0100
3456 +++ linux-4.4/fs/jfs/jfs_inode.c 2021-02-24 16:56:24.562822026 +0100
3459 #include <linux/fs.h>
3460 #include <linux/quotaops.h>
3461 +#include <linux/vs_tag.h>
3462 #include "jfs_incore.h"
3463 #include "jfs_inode.h"
3464 #include "jfs_filsys.h"
3465 @@ -33,6 +34,9 @@ void jfs_set_inode_flags(struct inode *i
3467 if (flags & JFS_IMMUTABLE_FL)
3468 new_fl |= S_IMMUTABLE;
3469 + if (flags & JFS_IXUNLINK_FL)
3470 + new_fl |= S_IXUNLINK;
3472 if (flags & JFS_APPEND_FL)
3474 if (flags & JFS_NOATIME_FL)
3475 @@ -41,18 +45,35 @@ void jfs_set_inode_flags(struct inode *i
3476 new_fl |= S_DIRSYNC;
3477 if (flags & JFS_SYNC_FL)
3479 - inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND | S_NOATIME |
3480 - S_DIRSYNC | S_SYNC);
3482 + inode_set_flags(inode, new_fl, S_IMMUTABLE | S_IXUNLINK |
3483 + S_APPEND | S_NOATIME | S_DIRSYNC | S_SYNC);
3486 + if (flags & JFS_BARRIER_FL)
3487 + new_fl |= V_BARRIER;
3488 + if (flags & JFS_COW_FL)
3491 + set_mask_bits(&inode->i_vflags,
3492 + V_BARRIER | V_COW, new_fl);
3495 void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
3497 unsigned int flags = jfs_ip->vfs_inode.i_flags;
3498 + unsigned int vflags = jfs_ip->vfs_inode.i_vflags;
3500 + jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL |
3501 + JFS_APPEND_FL | JFS_NOATIME_FL |
3502 + JFS_DIRSYNC_FL | JFS_SYNC_FL |
3503 + JFS_BARRIER_FL | JFS_COW_FL);
3505 - jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
3506 - JFS_DIRSYNC_FL | JFS_SYNC_FL);
3507 if (flags & S_IMMUTABLE)
3508 jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
3509 + if (flags & S_IXUNLINK)
3510 + jfs_ip->mode2 |= JFS_IXUNLINK_FL;
3512 if (flags & S_APPEND)
3513 jfs_ip->mode2 |= JFS_APPEND_FL;
3514 if (flags & S_NOATIME)
3515 @@ -61,6 +82,11 @@ void jfs_get_inode_flags(struct jfs_inod
3516 jfs_ip->mode2 |= JFS_DIRSYNC_FL;
3518 jfs_ip->mode2 |= JFS_SYNC_FL;
3520 + if (vflags & V_BARRIER)
3521 + jfs_ip->mode2 |= JFS_BARRIER_FL;
3522 + if (vflags & V_COW)
3523 + jfs_ip->mode2 |= JFS_COW_FL;
3527 diff -urNp -x '*.orig' linux-4.4/fs/jfs/jfs_inode.h linux-4.4/fs/jfs/jfs_inode.h
3528 --- linux-4.4/fs/jfs/jfs_inode.h 2016-01-11 00:01:32.000000000 +0100
3529 +++ linux-4.4/fs/jfs/jfs_inode.h 2021-02-24 16:56:24.562822026 +0100
3530 @@ -39,6 +39,7 @@ extern struct dentry *jfs_fh_to_dentry(s
3531 extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
3532 int fh_len, int fh_type);
3533 extern void jfs_set_inode_flags(struct inode *);
3534 +extern int jfs_sync_flags(struct inode *, int, int);
3535 extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
3536 extern int jfs_setattr(struct dentry *, struct iattr *);
3538 diff -urNp -x '*.orig' linux-4.4/fs/jfs/namei.c linux-4.4/fs/jfs/namei.c
3539 --- linux-4.4/fs/jfs/namei.c 2021-02-24 16:56:11.769085783 +0100
3540 +++ linux-4.4/fs/jfs/namei.c 2021-02-24 16:56:24.562822026 +0100
3542 #include <linux/ctype.h>
3543 #include <linux/quotaops.h>
3544 #include <linux/exportfs.h>
3545 +#include <linux/vs_tag.h>
3546 #include "jfs_incore.h"
3547 #include "jfs_superblock.h"
3548 #include "jfs_inode.h"
3549 @@ -1476,6 +1477,7 @@ static struct dentry *jfs_lookup(struct
3550 jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum);
3553 + dx_propagate_tag(nd, ip);
3554 return d_splice_alias(ip, dentry);
3557 @@ -1541,6 +1543,7 @@ const struct inode_operations jfs_dir_in
3558 .get_acl = jfs_get_acl,
3559 .set_acl = jfs_set_acl,
3561 + .sync_flags = jfs_sync_flags,
3564 const struct file_operations jfs_dir_operations = {
3565 diff -urNp -x '*.orig' linux-4.4/fs/jfs/super.c linux-4.4/fs/jfs/super.c
3566 --- linux-4.4/fs/jfs/super.c 2021-02-24 16:56:11.769085783 +0100
3567 +++ linux-4.4/fs/jfs/super.c 2021-02-24 16:56:24.562822026 +0100
3568 @@ -206,7 +206,8 @@ enum {
3569 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
3570 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
3571 Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
3572 - Opt_discard, Opt_nodiscard, Opt_discard_minblk
3573 + Opt_discard, Opt_nodiscard, Opt_discard_minblk,
3574 + Opt_tag, Opt_notag, Opt_tagid
3577 static const match_table_t tokens = {
3578 @@ -216,6 +217,10 @@ static const match_table_t tokens = {
3579 {Opt_resize, "resize=%u"},
3580 {Opt_resize_nosize, "resize"},
3581 {Opt_errors, "errors=%s"},
3583 + {Opt_notag, "notag"},
3584 + {Opt_tagid, "tagid=%u"},
3585 + {Opt_tag, "tagxid"},
3586 {Opt_ignore, "noquota"},
3587 {Opt_ignore, "quota"},
3588 {Opt_usrquota, "usrquota"},
3589 @@ -405,7 +410,20 @@ static int parse_options(char *options,
3590 pr_err("JFS: discard option not supported on device\n");
3594 +#ifndef CONFIG_TAGGING_NONE
3596 + *flag |= JFS_TAGGED;
3599 + *flag &= JFS_TAGGED;
3602 +#ifdef CONFIG_PROPAGATE
3605 + *flag |= JFS_TAGGED;
3609 printk("jfs: Unrecognized mount option \"%s\" or missing value\n",
3611 @@ -437,6 +455,12 @@ static int jfs_remount(struct super_bloc
3612 if (!parse_options(data, sb, &newLVSize, &flag))
3615 + if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
3616 + printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
3622 if (sb->s_flags & MS_RDONLY) {
3623 pr_err("JFS: resize requires volume to be mounted read-write\n");
3624 @@ -517,6 +541,9 @@ static int jfs_fill_super(struct super_b
3625 #ifdef CONFIG_JFS_POSIX_ACL
3626 sb->s_flags |= MS_POSIXACL;
3628 + /* map mount option tagxid */
3629 + if (sbi->flag & JFS_TAGGED)
3630 + sb->s_flags |= MS_TAGGED;
3633 pr_err("resize option for remount only\n");
3634 diff -urNp -x '*.orig' linux-4.4/fs/libfs.c linux-4.4/fs/libfs.c
3635 --- linux-4.4/fs/libfs.c 2021-02-24 16:56:11.769085783 +0100
3636 +++ linux-4.4/fs/libfs.c 2021-02-24 16:56:24.562822026 +0100
3637 @@ -141,13 +141,14 @@ static inline unsigned char dt_type(stru
3638 * both impossible due to the lock on directory.
3641 -int dcache_readdir(struct file *file, struct dir_context *ctx)
3642 +static inline int do_dcache_readdir_filter(struct file *filp,
3643 + struct dir_context *ctx, int (*filter)(struct dentry *dentry))
3645 - struct dentry *dentry = file->f_path.dentry;
3646 - struct dentry *cursor = file->private_data;
3647 + struct dentry *dentry = filp->f_path.dentry;
3648 + struct dentry *cursor = filp->private_data;
3649 struct list_head *p, *q = &cursor->d_child;
3651 - if (!dir_emit_dots(file, ctx))
3652 + if (!dir_emit_dots(filp, ctx))
3654 spin_lock(&dentry->d_lock);
3656 @@ -155,6 +156,8 @@ int dcache_readdir(struct file *file, st
3658 for (p = q->next; p != &dentry->d_subdirs; p = p->next) {
3659 struct dentry *next = list_entry(p, struct dentry, d_child);
3660 + if (filter && !filter(next))
3662 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
3663 if (!simple_positive(next)) {
3664 spin_unlock(&next->d_lock);
3665 @@ -177,8 +180,22 @@ int dcache_readdir(struct file *file, st
3666 spin_unlock(&dentry->d_lock);
3670 EXPORT_SYMBOL(dcache_readdir);
3672 +int dcache_readdir(struct file *filp, struct dir_context *ctx)
3674 + return do_dcache_readdir_filter(filp, ctx, NULL);
3677 +EXPORT_SYMBOL(dcache_readdir_filter);
3679 +int dcache_readdir_filter(struct file *filp, struct dir_context *ctx,
3680 + int (*filter)(struct dentry *))
3682 + return do_dcache_readdir_filter(filp, ctx, filter);
3685 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
3688 diff -urNp -x '*.orig' linux-4.4/fs/locks.c linux-4.4/fs/locks.c
3689 --- linux-4.4/fs/locks.c 2021-02-24 16:56:11.769085783 +0100
3690 +++ linux-4.4/fs/locks.c 2021-02-24 16:56:24.562822026 +0100
3692 #include <linux/hashtable.h>
3693 #include <linux/percpu.h>
3694 #include <linux/lglock.h>
3695 +#include <linux/vs_base.h>
3696 +#include <linux/vs_limit.h>
3698 #define CREATE_TRACE_POINTS
3699 #include <trace/events/filelock.h>
3700 @@ -255,11 +257,15 @@ static void locks_init_lock_heads(struct
3701 /* Allocate an empty lock structure. */
3702 struct file_lock *locks_alloc_lock(void)
3704 - struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
3705 + struct file_lock *fl;
3708 - locks_init_lock_heads(fl);
3709 + fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
3712 + locks_init_lock_heads(fl);
3718 EXPORT_SYMBOL_GPL(locks_alloc_lock);
3719 @@ -311,6 +317,7 @@ void locks_init_lock(struct file_lock *f
3721 memset(fl, 0, sizeof(struct file_lock));
3722 locks_init_lock_heads(fl);
3726 EXPORT_SYMBOL(locks_init_lock);
3727 @@ -328,6 +335,7 @@ void locks_copy_conflock(struct file_loc
3728 new->fl_start = fl->fl_start;
3729 new->fl_end = fl->fl_end;
3730 new->fl_lmops = fl->fl_lmops;
3731 + new->fl_xid = fl->fl_xid;
3735 @@ -389,7 +397,10 @@ flock_make_lock(struct file *filp, unsig
3736 fl->fl_flags = FL_FLOCK;
3738 fl->fl_end = OFFSET_MAX;
3741 + vxd_assert(filp->f_xid == vx_current_xid(),
3742 + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3743 + fl->fl_xid = filp->f_xid;
3747 @@ -511,6 +522,7 @@ static int lease_init(struct file *filp,
3749 fl->fl_owner = filp;
3750 fl->fl_pid = current->tgid;
3751 + fl->fl_xid = vx_current_xid();
3754 fl->fl_flags = FL_LEASE;
3755 @@ -530,6 +542,10 @@ static struct file_lock *lease_alloc(str
3757 return ERR_PTR(error);
3759 + fl->fl_xid = vx_current_xid();
3761 + vxd_assert(filp->f_xid == fl->fl_xid,
3762 + "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
3763 error = lease_init(filp, type, fl);
3765 locks_free_lock(fl);
3766 @@ -908,6 +924,7 @@ static int flock_lock_inode(struct inode
3770 + new_fl->fl_xid = -1;
3772 list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
3773 if (!flock_locks_conflict(request, fl))
3774 @@ -934,7 +951,8 @@ out:
3778 -static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
3779 +static int __posix_lock_file(struct inode *inode, struct file_lock *request,
3780 + struct file_lock *conflock, vxid_t xid)
3782 struct file_lock *fl, *tmp;
3783 struct file_lock *new_fl = NULL;
3784 @@ -950,6 +968,9 @@ static int __posix_lock_file(struct inod
3786 return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
3789 + vxd_assert(xid == vx_current_xid(),
3790 + "xid(%d) == current(%d)", xid, vx_current_xid());
3792 * We may need two file_lock structures for this operation,
3793 * so we get them in advance to avoid races.
3794 @@ -960,7 +981,11 @@ static int __posix_lock_file(struct inod
3795 (request->fl_type != F_UNLCK ||
3796 request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
3797 new_fl = locks_alloc_lock();
3798 + new_fl->fl_xid = xid;
3799 + // vx_locks_inc(new_fl);
3800 new_fl2 = locks_alloc_lock();
3801 + new_fl2->fl_xid = xid;
3802 + // vx_locks_inc(new_fl2);
3805 spin_lock(&ctx->flc_lock);
3806 @@ -1162,7 +1187,8 @@ static int __posix_lock_file(struct inod
3807 int posix_lock_file(struct file *filp, struct file_lock *fl,
3808 struct file_lock *conflock)
3810 - return __posix_lock_file(file_inode(filp), fl, conflock);
3811 + return __posix_lock_file(file_inode(filp),
3812 + fl, conflock, filp->f_xid);
3814 EXPORT_SYMBOL(posix_lock_file);
3816 @@ -1178,7 +1204,7 @@ static int posix_lock_inode_wait(struct
3820 - error = __posix_lock_file(inode, fl, NULL);
3821 + error = __posix_lock_file(inode, fl, NULL, 0);
3822 if (error != FILE_LOCK_DEFERRED)
3824 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
3825 @@ -1257,10 +1283,13 @@ int locks_mandatory_area(int read_write,
3826 fl.fl_end = offset + count - 1;
3833 fl.fl_flags &= ~FL_SLEEP;
3834 - error = __posix_lock_file(inode, &fl, NULL);
3835 + f_xid = filp->f_xid;
3836 + error = __posix_lock_file(inode, &fl, NULL, f_xid);
3840 @@ -1268,7 +1297,7 @@ int locks_mandatory_area(int read_write,
3842 fl.fl_flags |= FL_SLEEP;
3843 fl.fl_owner = current->files;
3844 - error = __posix_lock_file(inode, &fl, NULL);
3845 + error = __posix_lock_file(inode, &fl, NULL, f_xid);
3846 if (error != FILE_LOCK_DEFERRED)
3848 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
3849 @@ -2165,6 +2194,11 @@ int fcntl_setlk(unsigned int fd, struct
3850 if (file_lock == NULL)
3853 + vxd_assert(filp->f_xid == vx_current_xid(),
3854 + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3855 + file_lock->fl_xid = filp->f_xid;
3856 + // vx_locks_inc(file_lock);
3859 * This might block, so we do it before checking the inode.
3861 @@ -2309,6 +2343,11 @@ int fcntl_setlk64(unsigned int fd, struc
3862 if (file_lock == NULL)
3865 + vxd_assert(filp->f_xid == vx_current_xid(),
3866 + "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3867 + file_lock->fl_xid = filp->f_xid;
3868 + // vx_locks_inc(file_lock);
3871 * This might block, so we do it before checking the inode.
3873 @@ -2624,8 +2663,11 @@ static int locks_show(struct seq_file *f
3875 lock_get_status(f, fl, iter->li_pos, "");
3877 - list_for_each_entry(bfl, &fl->fl_block, fl_block)
3878 + list_for_each_entry(bfl, &fl->fl_block, fl_block) {
3879 + if (!vx_check(fl->fl_xid, VS_WATCH_P | VS_IDENT))
3881 lock_get_status(f, bfl, iter->li_pos, " ->");
3886 diff -urNp -x '*.orig' linux-4.4/fs/mount.h linux-4.4/fs/mount.h
3887 --- linux-4.4/fs/mount.h 2021-02-24 16:56:11.772419221 +0100
3888 +++ linux-4.4/fs/mount.h 2021-02-24 16:56:24.562822026 +0100
3889 @@ -68,6 +68,7 @@ struct mount {
3890 struct hlist_head mnt_pins;
3891 struct fs_pin mnt_umount;
3892 struct dentry *mnt_ex_mountpoint;
3893 + vtag_t mnt_tag; /* tagging used for vfsmount */
3896 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
3897 diff -urNp -x '*.orig' linux-4.4/fs/namei.c linux-4.4/fs/namei.c
3898 --- linux-4.4/fs/namei.c 2021-02-24 16:56:11.772419221 +0100
3899 +++ linux-4.4/fs/namei.c 2021-02-24 16:56:24.566155465 +0100
3901 #include <linux/device_cgroup.h>
3902 #include <linux/fs_struct.h>
3903 #include <linux/posix_acl.h>
3904 +#include <linux/proc_fs.h>
3905 +#include <linux/magic.h>
3906 +#include <linux/vserver/inode.h>
3907 +#include <linux/vs_base.h>
3908 +#include <linux/vs_tag.h>
3909 +#include <linux/vs_cowbl.h>
3910 +#include <linux/vs_device.h>
3911 +#include <linux/vs_context.h>
3912 +#include <linux/pid_namespace.h>
3913 #include <linux/hash.h>
3914 #include <asm/uaccess.h>
3916 #include "internal.h"
3917 +#include "proc/internal.h"
3920 /* [Feb-1997 T. Schoebel-Theuer]
3921 @@ -284,6 +294,93 @@ static int check_acl(struct inode *inode
3925 +static inline int dx_barrier(const struct inode *inode)
3927 + if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN | VS_WATCH)) {
3928 + vxwprintk_task(1, "did hit the barrier.");
3934 +static int __dx_permission(const struct inode *inode, int mask)
3936 + if (dx_barrier(inode))
3939 + if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) {
3940 + /* devpts is xid tagged */
3941 + if (S_ISDIR(inode->i_mode) ||
3942 + vx_check((vxid_t)i_tag_read(inode), VS_IDENT | VS_WATCH_P))
3945 + /* just pretend we didn't find anything */
3948 + else if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
3949 + struct proc_dir_entry *de = PDE(inode);
3951 + if (de && !vx_hide_check(0, de->vx_flags)) {
3952 + vxdprintk(VXD_CBIT(misc, 9),
3953 + VS_Q("%*s") " hidden by _dx_permission",
3954 + de->namelen, de->name);
3958 + if ((mask & (MAY_WRITE | MAY_APPEND))) {
3960 + struct task_struct *tsk;
3962 + if (vx_check(0, VS_ADMIN | VS_WATCH_P) ||
3963 + vx_flags(VXF_STATE_SETUP, 0))
3966 + pid = PROC_I(inode)->pid;
3971 + tsk = pid_task(pid, PIDTYPE_PID);
3972 + vxdprintk(VXD_CBIT(tag, 0), "accessing %p[#%u]",
3973 + tsk, (tsk ? vx_task_xid(tsk) : 0));
3975 + vx_check(vx_task_xid(tsk), VS_IDENT | VS_WATCH_P)) {
3976 + rcu_read_unlock();
3979 + rcu_read_unlock();
3982 + /* FIXME: Should we block some entries here? */
3987 + if (dx_notagcheck(inode->i_sb) ||
3988 + dx_check((vxid_t)i_tag_read(inode),
3989 + DX_HOSTID | DX_ADMIN | DX_WATCH | DX_IDENT))
3997 +int dx_permission(const struct inode *inode, int mask)
3999 + int ret = __dx_permission(inode, mask);
4000 + if (unlikely(ret)) {
4001 +#ifndef CONFIG_VSERVER_WARN_DEVPTS
4002 + if (inode->i_sb->s_magic != DEVPTS_SUPER_MAGIC)
4005 + "denied [0x%x] access to inode %s:%p[#%d,%lu]",
4006 + mask, inode->i_sb->s_id, inode,
4007 + i_tag_read(inode), inode->i_ino);
4013 * This does the basic permission checking
4015 @@ -408,10 +505,14 @@ int __inode_permission(struct inode *ino
4017 * Nobody gets write access to an immutable file.
4019 - if (IS_IMMUTABLE(inode))
4020 + if (IS_IMMUTABLE(inode) && !IS_COW(inode))
4024 + retval = dx_permission(inode, mask);
4028 retval = do_inode_permission(inode, mask);
4031 @@ -1627,6 +1728,9 @@ static int lookup_fast(struct nameidata
4036 + /* FIXME: check dx permission */
4039 path->dentry = dentry;
4040 if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
4041 @@ -1657,6 +1761,8 @@ unlazy:
4046 + /* FIXME: check dx permission */
4048 path->dentry = dentry;
4049 err = follow_managed(path, nd);
4050 @@ -2618,7 +2724,7 @@ static int may_delete(struct inode *dir,
4053 if (check_sticky(dir, inode) || IS_APPEND(inode) ||
4054 - IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
4055 + IS_IXORUNLINK(inode) || IS_SWAPFILE(inode))
4058 if (!d_is_dir(victim))
4059 @@ -2700,19 +2806,25 @@ int vfs_create(struct inode *dir, struct
4062 int error = may_create(dir, dentry);
4065 + vxdprintk(VXD_CBIT(misc, 3), "may_create failed with %d", error);
4069 if (!dir->i_op->create)
4070 return -EACCES; /* shouldn't it be ENOSYS? */
4073 error = security_inode_create(dir, dentry, mode);
4076 + vxdprintk(VXD_CBIT(misc, 3), "security_inode_create failed with %d", error);
4079 error = dir->i_op->create(dir, dentry, mode, want_excl);
4081 fsnotify_create(dir, dentry);
4083 + vxdprintk(VXD_CBIT(misc, 3), "i_op->create failed with %d", error);
4086 EXPORT_SYMBOL(vfs_create);
4087 @@ -2748,6 +2860,15 @@ static int may_open(struct path *path, i
4091 +#ifdef CONFIG_VSERVER_COWBL
4092 + if (IS_COW(inode) &&
4093 + ((flag & O_ACCMODE) != O_RDONLY)) {
4094 + if (IS_COW_LINK(inode))
4096 + inode->i_flags &= ~(S_IXUNLINK|S_IMMUTABLE);
4097 + mark_inode_dirty(inode);
4100 error = inode_permission(inode, acc_mode);
4103 @@ -3233,6 +3354,16 @@ finish_open:
4105 finish_open_created:
4106 error = may_open(&nd->path, acc_mode, open_flag);
4107 +#ifdef CONFIG_VSERVER_COWBL
4108 + if (error == -EMLINK) {
4109 + struct dentry *dentry;
4110 + dentry = cow_break_link(nd->name->name);
4111 + if (IS_ERR(dentry))
4112 + error = PTR_ERR(dentry);
4120 @@ -3357,6 +3488,9 @@ static struct file *path_openat(struct n
4124 +#ifdef CONFIG_VSERVER_COWBL
4127 file = get_empty_filp();
4130 @@ -3383,6 +3517,12 @@ static struct file *path_openat(struct n
4134 +#ifdef CONFIG_VSERVER_COWBL
4135 + if (error == -EMLINK) {
4136 + // path_cleanup(nd);
4141 if (!(opened & FILE_OPENED)) {
4143 @@ -3503,6 +3643,11 @@ static struct dentry *filename_create(in
4147 + vxdprintk(VXD_CBIT(misc, 3), "filename_create path.dentry = %p (%.*s), dentry = %p (%.*s), d_inode = %p",
4148 + path->dentry, path->dentry->d_name.len,
4149 + path->dentry->d_name.name, dentry,
4150 + dentry->d_name.len, dentry->d_name.name,
4151 + path->dentry->d_inode);
4155 @@ -3619,6 +3764,7 @@ retry:
4156 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
4161 done_path_create(&path, dentry);
4162 if (retry_estale(error, lookup_flags)) {
4163 @@ -4065,7 +4211,7 @@ int vfs_link(struct dentry *old_dentry,
4165 * A link to an append-only or immutable file cannot be created.
4167 - if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4168 + if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
4170 if (!dir->i_op->link)
4172 @@ -4574,6 +4720,330 @@ int generic_readlink(struct dentry *dent
4174 EXPORT_SYMBOL(generic_readlink);
4177 +#ifdef CONFIG_VSERVER_COWBL
4180 +void dump_path(const char *name, struct path *path)
4182 + vxdprintk(VXD_CBIT(misc, 3),
4183 + "%s: path=%p mnt=%p dentry=%p", name, path,
4184 + path ? path->mnt : NULL,
4185 + path ? path->dentry : NULL);
4187 + if (path && path->mnt)
4188 + vxdprintk(VXD_CBIT(misc, 3),
4189 + "%s: path mnt_sb=%p[#%d,#%d] mnt_root=%p[#%d]", name,
4190 + path->mnt->mnt_sb,
4191 + path->mnt->mnt_sb ? path->mnt->mnt_sb->s_count : -1,
4192 + path->mnt->mnt_sb ? atomic_read(&path->mnt->mnt_sb->s_active) : -1,
4193 + path->mnt->mnt_root,
4194 + path->mnt->mnt_root ? path->mnt->mnt_root->d_lockref.count : -1);
4196 + if (path && path->dentry)
4197 + vxdprintk(VXD_CBIT(misc, 3),
4198 + "%s: path dentry=%p[#%d]", name,
4200 + path->dentry ? path->dentry->d_lockref.count : -1);
4204 +long do_cow_splice(struct file *in, struct file *out, size_t len)
4209 + return do_splice_direct(in, &ppos, out, &opos, len, 0);
4212 +struct dentry *cow_break_link(const char *pathname)
4214 + int ret, mode, pathlen, redo = 0, drop = 1;
4215 + struct path old_path = {}, par_path = {}, dir_path = {}, *new_path = NULL;
4216 + struct dentry *dir, *old_dentry, *new_dentry = NULL;
4217 + struct file *old_file;
4218 + struct file *new_file;
4219 + struct qstr new_qstr;
4221 + char *to, *path, pad='\251';
4223 + struct filename *filename = getname_kernel(pathname);
4224 + struct filename *to_filename;
4226 + vxdprintk(VXD_CBIT(misc, 1),
4227 + "cow_break_link(" VS_Q("%s") ")", pathname);
4229 + path = kmalloc(PATH_MAX, GFP_KERNEL);
4231 + if (!path || IS_ERR(filename))
4234 + /* old_path will have refs to dentry and mnt */
4235 + ret = filename_lookup(AT_FDCWD, filename, LOOKUP_FOLLOW, &old_path, NULL);
4236 + vxdprintk(VXD_CBIT(misc, 2),
4237 + "do_path_lookup(old): %d", ret);
4239 + goto out_free_path;
4241 + dump_path("cow (old)", &old_path);
4243 + /* no explicit reference for old_dentry here */
4244 + old_dentry = old_path.dentry;
4246 + /* speculative put */
4247 + // dput(old_dentry);
4249 + mode = old_dentry->d_inode->i_mode;
4250 + to = d_path(&old_path, path, PATH_MAX-2);
4251 + pathlen = strlen(to);
4252 + vxdprintk(VXD_CBIT(misc, 2),
4253 + "old path " VS_Q("%s") " [%p:" VS_Q("%.*s") ":%d]", to,
4255 + old_dentry->d_name.len, old_dentry->d_name.name,
4256 + old_dentry->d_name.len);
4258 + to[pathlen + 1] = 0;
4260 + new_dentry = NULL;
4261 + to[pathlen] = pad--;
4263 + if (pad <= '\240')
4266 + vxdprintk(VXD_CBIT(misc, 1), "temp copy " VS_Q("%s"), to);
4268 + /* dir_path will have refs to dentry and mnt */
4269 + to_filename = getname_kernel(to);
4270 + to_filename = filename_parentat(AT_FDCWD, to_filename,
4271 + LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE, &par_path, &new_qstr, &new_type);
4272 + vxdprintk(VXD_CBIT(misc, 2), "filename_parentat(new): %p", to_filename);
4273 + dump_path("cow (par)", &par_path);
4274 + if (IS_ERR(to_filename))
4277 + vxdprintk(VXD_CBIT(misc, 2), "to_filename refcnt=%d", to_filename->refcnt);
4278 + // putname(to_filename);
4280 + /* this puppy downs the dir inode mutex if successful.
4281 + dir_path will hold refs to dentry and mnt and
4282 + we'll have write access to the mnt */
4283 + new_dentry = filename_create(AT_FDCWD, to_filename, &dir_path, 0);
4284 + if (!new_dentry || IS_ERR(new_dentry)) {
4285 + path_put(&par_path);
4286 + vxdprintk(VXD_CBIT(misc, 2),
4287 + "filename_create(new) failed with %ld",
4288 + PTR_ERR(new_dentry));
4291 + vxdprintk(VXD_CBIT(misc, 2),
4292 + "filename_create(new): %p [" VS_Q("%.*s") ":%d]",
4294 + new_dentry->d_name.len, new_dentry->d_name.name,
4295 + new_dentry->d_name.len);
4297 + dump_path("cow (dir)", &dir_path);
4299 + /* take a reference on new_dentry */
4302 + /* dentry/mnt refs handed over to new_path */
4303 + new_path = &dir_path;
4305 + /* dentry for old/new dir */
4306 + dir = par_path.dentry;
4308 + /* give up reference on dir */
4309 + dput(new_path->dentry);
4311 + /* new_dentry already has a reference */
4312 + new_path->dentry = new_dentry;
4314 + ret = vfs_create(dir->d_inode, new_dentry, mode, 1);
4315 + vxdprintk(VXD_CBIT(misc, 2),
4316 + "vfs_create(new): %d", ret);
4317 + if (ret == -EEXIST) {
4318 + path_put(&par_path);
4319 + mutex_unlock(&dir->d_inode->i_mutex);
4320 + mnt_drop_write(new_path->mnt);
4321 + path_put(new_path);
4322 + new_dentry = NULL;
4326 + goto out_unlock_new;
4328 + /* the old file went away */
4330 + if ((redo = d_unhashed(old_dentry)))
4331 + goto out_unlock_new;
4333 + /* doesn't change refs for old_path */
4334 + old_file = dentry_open(&old_path, O_RDONLY, current_cred());
4335 + vxdprintk(VXD_CBIT(misc, 2),
4336 + "dentry_open(old): %p", old_file);
4337 + if (IS_ERR(old_file)) {
4338 + ret = PTR_ERR(old_file);
4339 + goto out_unlock_new;
4342 + /* doesn't change refs for new_path */
4343 + new_file = dentry_open(new_path, O_WRONLY, current_cred());
4344 + vxdprintk(VXD_CBIT(misc, 2),
4345 + "dentry_open(new): %p", new_file);
4346 + if (IS_ERR(new_file)) {
4347 + ret = PTR_ERR(new_file);
4348 + goto out_fput_old;
4351 + /* unlock the inode mutex from filename_create() */
4352 + mutex_unlock(&dir->d_inode->i_mutex);
4354 + /* drop write access to mnt */
4355 + mnt_drop_write(new_path->mnt);
4359 + size = i_size_read(old_file->f_path.dentry->d_inode);
4360 + ret = do_cow_splice(old_file, new_file, size);
4361 + vxdprintk(VXD_CBIT(misc, 2), "do_splice_direct: %d", ret);
4363 + goto out_fput_both;
4364 + } else if (ret < size) {
4366 + goto out_fput_both;
4368 + struct inode *old_inode = old_dentry->d_inode;
4369 + struct inode *new_inode = new_dentry->d_inode;
4370 + struct iattr attr = {
4371 + .ia_uid = old_inode->i_uid,
4372 + .ia_gid = old_inode->i_gid,
4373 + .ia_valid = ATTR_UID | ATTR_GID
4376 + setattr_copy(new_inode, &attr);
4377 + mark_inode_dirty(new_inode);
4380 + /* lock rename mutex */
4381 + mutex_lock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
4383 + /* drop out late */
4385 + if ((redo = d_unhashed(old_dentry)))
4388 + vxdprintk(VXD_CBIT(misc, 2),
4389 + "vfs_rename: [" VS_Q("%*s") ":%d] -> [" VS_Q("%*s") ":%d]",
4390 + new_dentry->d_name.len, new_dentry->d_name.name,
4391 + new_dentry->d_name.len,
4392 + old_dentry->d_name.len, old_dentry->d_name.name,
4393 + old_dentry->d_name.len);
4394 + ret = vfs_rename(par_path.dentry->d_inode, new_dentry,
4395 + old_dentry->d_parent->d_inode, old_dentry, NULL, 0);
4396 + vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
4399 + mutex_unlock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
4402 + vxdprintk(VXD_CBIT(misc, 3),
4403 + "fput(new_file=%p[#%ld])", new_file,
4404 + atomic_long_read(&new_file->f_count));
4408 + vxdprintk(VXD_CBIT(misc, 3),
4409 + "fput(old_file=%p[#%ld])", old_file,
4410 + atomic_long_read(&old_file->f_count));
4414 + /* drop references from par_path */
4415 + path_put(&par_path);
4418 + /* unlock the inode mutex from filename_create() */
4419 + mutex_unlock(&dir->d_inode->i_mutex);
4421 + /* drop write access to mnt */
4422 + mnt_drop_write(new_path->mnt);
4428 + /* error path cleanup */
4429 + vfs_unlink(dir->d_inode, new_dentry, NULL);
4433 + goto out_rel_both;
4435 + /* lookup dentry once again
4436 + old_path will be freed as old_path in out_rel_old */
4437 + ret = filename_lookup(AT_FDCWD, filename, LOOKUP_FOLLOW, &old_path, NULL);
4439 + goto out_rel_both;
4441 + /* drop reference on new_dentry */
4443 + new_dentry = old_path.dentry;
4445 + vxdprintk(VXD_CBIT(misc, 2),
4446 + "do_path_lookup(redo): %p [" VS_Q("%.*s") ":%d]",
4448 + new_dentry->d_name.len, new_dentry->d_name.name,
4449 + new_dentry->d_name.len);
4452 + dump_path("put (new)", new_path);
4454 + path_put(new_path);
4456 + dump_path("put (old)", &old_path);
4457 + path_put(&old_path);
4463 + new_dentry = ERR_PTR(ret);
4465 + // if (!IS_ERR(filename))
4466 + // putname(filename);
4467 + vxdprintk(VXD_CBIT(misc, 3),
4468 + "cow_break_link returning with %p", new_dentry);
4469 + return new_dentry;
4474 +int vx_info_mnt_namespace(struct mnt_namespace *ns, char *buffer)
4477 + struct vfsmount *vmnt;
4478 + char *pstr, *root;
4481 + pstr = kmalloc(PATH_MAX, GFP_KERNEL);
4485 + vmnt = &ns->root->mnt;
4487 + path.dentry = vmnt->mnt_root;
4488 + root = d_path(&path, pstr, PATH_MAX - 2);
4489 + length = sprintf(buffer + length,
4490 + "Namespace:\t%p [#%u]\n"
4491 + "RootPath:\t%s\n",
4492 + ns, atomic_read(&ns->count),
4498 +EXPORT_SYMBOL(vx_info_mnt_namespace);
4500 /* get the link contents into pagecache */
4501 static char *page_getlink(struct dentry * dentry, struct page **ppage)
4503 diff -urNp -x '*.orig' linux-4.4/fs/namespace.c linux-4.4/fs/namespace.c
4504 --- linux-4.4/fs/namespace.c 2021-02-24 16:56:11.772419221 +0100
4505 +++ linux-4.4/fs/namespace.c 2021-02-24 16:56:24.566155465 +0100
4507 #include <linux/magic.h>
4508 #include <linux/bootmem.h>
4509 #include <linux/task_work.h>
4510 +#include <linux/vs_base.h>
4511 +#include <linux/vs_context.h>
4512 +#include <linux/vs_tag.h>
4513 +#include <linux/vserver/space.h>
4514 +#include <linux/vserver/global.h>
4516 #include "internal.h"
4518 @@ -980,6 +985,10 @@ vfs_kern_mount(struct file_system_type *
4520 return ERR_PTR(-ENODEV);
4522 + if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
4523 + !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
4524 + return ERR_PTR(-EPERM);
4526 mnt = alloc_vfsmnt(name);
4528 return ERR_PTR(-ENOMEM);
4529 @@ -1056,6 +1065,7 @@ static struct mount *clone_mnt(struct mo
4530 mnt->mnt.mnt_root = dget(root);
4531 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
4532 mnt->mnt_parent = mnt;
4533 + mnt->mnt_tag = old->mnt_tag;
4535 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
4536 unlock_mount_hash();
4537 @@ -1651,7 +1661,8 @@ out_unlock:
4539 static inline bool may_mount(void)
4541 - return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
4542 + return vx_ns_capable(current->nsproxy->mnt_ns->user_ns,
4543 + CAP_SYS_ADMIN, VXC_SECURE_MOUNT);
4547 @@ -2158,6 +2169,7 @@ static int do_change_type(struct path *p
4551 + // mnt->mnt_flags = mnt_flags;
4554 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
4555 @@ -2186,12 +2198,14 @@ static bool has_locked_children(struct m
4556 * do loopback mount.
4558 static int do_loopback(struct path *path, const char *old_name,
4560 + vtag_t tag, unsigned long flags, int mnt_flags)
4562 struct path old_path;
4563 struct mount *mnt = NULL, *old, *parent;
4564 struct mountpoint *mp;
4565 + int recurse = flags & MS_REC;
4568 if (!old_name || !*old_name)
4570 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
4571 @@ -2271,7 +2285,7 @@ static int change_mount_flags(struct vfs
4572 * on it - tough luck.
4574 static int do_remount(struct path *path, int flags, int mnt_flags,
4576 + void *data, vxid_t xid)
4579 struct super_block *sb = path->mnt->mnt_sb;
4580 @@ -2779,6 +2793,7 @@ long do_mount(const char *dev_name, cons
4587 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
4588 @@ -2804,6 +2819,12 @@ long do_mount(const char *dev_name, cons
4589 if (!(flags & MS_NOATIME))
4590 mnt_flags |= MNT_RELATIME;
4592 + if (dx_parse_tag(data_page, &tag, 1, &mnt_flags, &flags)) {
4593 + /* FIXME: bind and re-mounts get the tag flag? */
4594 + if (flags & (MS_BIND|MS_REMOUNT))
4595 + flags |= MS_TAGID;
4598 /* Separate the per-mountpoint flags */
4599 if (flags & MS_NOSUID)
4600 mnt_flags |= MNT_NOSUID;
4601 @@ -2828,15 +2849,17 @@ long do_mount(const char *dev_name, cons
4602 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
4605 + if (!vx_capable(CAP_SYS_ADMIN, VXC_DEV_MOUNT))
4606 + mnt_flags |= MNT_NODEV;
4607 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
4608 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
4611 if (flags & MS_REMOUNT)
4612 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
4615 else if (flags & MS_BIND)
4616 - retval = do_loopback(&path, dev_name, flags & MS_REC);
4617 + retval = do_loopback(&path, dev_name, tag, flags, mnt_flags);
4618 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
4619 retval = do_change_type(&path, flags);
4620 else if (flags & MS_MOVE)
4621 @@ -2956,6 +2979,7 @@ struct mnt_namespace *copy_mnt_ns(unsign
4622 p = next_mnt(p, old);
4625 + atomic_inc(&vs_global_mnt_ns);
4629 @@ -3131,9 +3155,10 @@ SYSCALL_DEFINE2(pivot_root, const char _
4630 new_mnt = real_mount(new.mnt);
4631 root_mnt = real_mount(root.mnt);
4632 old_mnt = real_mount(old.mnt);
4633 - if (IS_MNT_SHARED(old_mnt) ||
4634 + if ((IS_MNT_SHARED(old_mnt) ||
4635 IS_MNT_SHARED(new_mnt->mnt_parent) ||
4636 - IS_MNT_SHARED(root_mnt->mnt_parent))
4637 + IS_MNT_SHARED(root_mnt->mnt_parent)) &&
4638 + !vx_flags(VXF_STATE_SETUP, 0))
4640 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
4642 @@ -3271,6 +3296,7 @@ void put_mnt_ns(struct mnt_namespace *ns
4643 if (!atomic_dec_and_test(&ns->count))
4645 drop_collected_mounts(&ns->root->mnt);
4646 + atomic_dec(&vs_global_mnt_ns);
4650 diff -urNp -x '*.orig' linux-4.4/fs/nfs/client.c linux-4.4/fs/nfs/client.c
4651 --- linux-4.4/fs/nfs/client.c 2016-01-11 00:01:32.000000000 +0100
4652 +++ linux-4.4/fs/nfs/client.c 2021-02-24 16:56:24.566155465 +0100
4653 @@ -583,6 +583,9 @@ int nfs_init_server_rpcclient(struct nfs
4654 if (server->flags & NFS_MOUNT_SOFT)
4655 server->client->cl_softrtry = 1;
4657 + server->client->cl_tag = 0;
4658 + if (server->flags & NFS_MOUNT_TAGGED)
4659 + server->client->cl_tag = 1;
4662 EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);
4663 @@ -760,6 +763,10 @@ static void nfs_server_set_fsinfo(struct
4664 server->acdirmin = server->acdirmax = 0;
4667 + /* FIXME: needs fsinfo
4668 + if (server->flags & NFS_MOUNT_TAGGED)
4669 + sb->s_flags |= MS_TAGGED; */
4671 server->maxfilesize = fsinfo->maxfilesize;
4673 server->time_delta = fsinfo->time_delta;
4674 diff -urNp -x '*.orig' linux-4.4/fs/nfs/dir.c linux-4.4/fs/nfs/dir.c
4675 --- linux-4.4/fs/nfs/dir.c 2021-02-24 16:56:11.775752660 +0100
4676 +++ linux-4.4/fs/nfs/dir.c 2021-02-24 16:56:24.566155465 +0100
4678 #include <linux/sched.h>
4679 #include <linux/kmemleak.h>
4680 #include <linux/xattr.h>
4681 +#include <linux/vs_tag.h>
4683 #include "delegation.h"
4685 @@ -1420,6 +1421,7 @@ struct dentry *nfs_lookup(struct inode *
4686 /* Success: notify readdir to use READDIRPLUS */
4687 nfs_advise_use_readdirplus(dir);
4689 + dx_propagate_tag(nd, inode);
4691 res = d_splice_alias(inode, dentry);
4693 diff -urNp -x '*.orig' linux-4.4/fs/nfs/inode.c linux-4.4/fs/nfs/inode.c
4694 --- linux-4.4/fs/nfs/inode.c 2021-02-24 16:56:11.779086098 +0100
4695 +++ linux-4.4/fs/nfs/inode.c 2021-02-24 16:56:24.566155465 +0100
4697 #include <linux/slab.h>
4698 #include <linux/compat.h>
4699 #include <linux/freezer.h>
4700 +#include <linux/vs_tag.h>
4702 #include <asm/uaccess.h>
4704 @@ -376,6 +377,8 @@ nfs_fhget(struct super_block *sb, struct
4705 if (inode->i_state & I_NEW) {
4706 struct nfs_inode *nfsi = NFS_I(inode);
4707 unsigned long now = jiffies;
4711 /* We set i_ino for the few things that still rely on it,
4712 * such as stat(2) */
4713 @@ -419,8 +422,8 @@ nfs_fhget(struct super_block *sb, struct
4714 inode->i_version = 0;
4717 - inode->i_uid = make_kuid(&init_user_ns, -2);
4718 - inode->i_gid = make_kgid(&init_user_ns, -2);
4719 + kuid = make_kuid(&init_user_ns, -2);
4720 + kgid = make_kgid(&init_user_ns, -2);
4721 inode->i_blocks = 0;
4722 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
4724 @@ -455,11 +458,11 @@ nfs_fhget(struct super_block *sb, struct
4725 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
4726 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
4727 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
4728 - inode->i_uid = fattr->uid;
4729 + kuid = fattr->uid;
4730 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
4731 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
4732 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
4733 - inode->i_gid = fattr->gid;
4734 + kgid = fattr->gid;
4735 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
4736 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
4737 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
4738 @@ -470,6 +473,10 @@ nfs_fhget(struct super_block *sb, struct
4740 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
4742 + inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
4743 + inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
4744 + inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, GLOBAL_ROOT_TAG);
4745 + /* maybe fattr->xid someday */
4747 nfs_setsecurity(inode, fattr, label);
4749 @@ -611,6 +618,8 @@ void nfs_setattr_update_inode(struct ino
4750 inode->i_uid = attr->ia_uid;
4751 if ((attr->ia_valid & ATTR_GID) != 0)
4752 inode->i_gid = attr->ia_gid;
4753 + if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
4754 + inode->i_tag = attr->ia_tag;
4755 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
4756 | NFS_INO_INVALID_ACL);
4758 @@ -1236,7 +1245,9 @@ static int nfs_check_inode_attributes(st
4759 struct nfs_inode *nfsi = NFS_I(inode);
4760 loff_t cur_size, new_isize;
4761 unsigned long invalid = 0;
4767 if (nfs_have_delegated_attributes(inode))
4769 @@ -1263,13 +1274,18 @@ static int nfs_check_inode_attributes(st
4770 if (nfsi->nrequests != 0)
4771 invalid &= ~NFS_INO_REVAL_PAGECACHE;
4773 + kuid = INOTAG_KUID(DX_TAG(inode), fattr->uid, fattr->gid);
4774 + kgid = INOTAG_KGID(DX_TAG(inode), fattr->uid, fattr->gid);
4775 + ktag = INOTAG_KTAG(DX_TAG(inode), fattr->uid, fattr->gid, GLOBAL_ROOT_TAG);
4777 /* Have any file permissions changed? */
4778 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
4779 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4780 - if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
4781 + if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, kuid))
4782 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4783 - if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
4784 + if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, kgid))
4785 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4786 + /* maybe check for tag too? */
4788 /* Has the link count changed? */
4789 if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
4790 @@ -1643,6 +1659,9 @@ static int nfs_update_inode(struct inode
4791 unsigned long now = jiffies;
4792 unsigned long save_cache_validity;
4793 bool cache_revalidated = true;
4798 dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
4799 __func__, inode->i_sb->s_id, inode->i_ino,
4800 @@ -1753,6 +1772,9 @@ static int nfs_update_inode(struct inode
4801 cache_revalidated = false;
4804 + kuid = TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag);
4805 + kgid = TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag);
4806 + ktag = TAGINO_KTAG(DX_TAG(inode), inode->i_tag);
4808 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
4809 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
4810 @@ -1807,6 +1829,10 @@ static int nfs_update_inode(struct inode
4811 cache_revalidated = false;
4814 + inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
4815 + inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
4816 + inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, ktag);
4818 if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
4819 if (inode->i_nlink != fattr->nlink) {
4820 invalid |= NFS_INO_INVALID_ATTR;
4821 diff -urNp -x '*.orig' linux-4.4/fs/nfs/nfs3xdr.c linux-4.4/fs/nfs/nfs3xdr.c
4822 --- linux-4.4/fs/nfs/nfs3xdr.c 2016-01-11 00:01:32.000000000 +0100
4823 +++ linux-4.4/fs/nfs/nfs3xdr.c 2021-02-24 16:56:24.566155465 +0100
4825 #include <linux/nfs3.h>
4826 #include <linux/nfs_fs.h>
4827 #include <linux/nfsacl.h>
4828 +#include <linux/vs_tag.h>
4829 #include "internal.h"
4831 #define NFSDBG_FACILITY NFSDBG_XDR
4832 @@ -558,7 +559,8 @@ static __be32 *xdr_decode_nfstime3(__be3
4836 -static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
4837 +static void encode_sattr3(struct xdr_stream *xdr,
4838 + const struct iattr *attr, int tag)
4842 @@ -590,15 +592,19 @@ static void encode_sattr3(struct xdr_str
4846 - if (attr->ia_valid & ATTR_UID) {
4847 + if (attr->ia_valid & ATTR_UID ||
4848 + (tag && (attr->ia_valid & ATTR_TAG))) {
4850 - *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
4851 + *p++ = cpu_to_be32(from_kuid(&init_user_ns,
4852 + TAGINO_KUID(tag, attr->ia_uid, attr->ia_tag)));
4856 - if (attr->ia_valid & ATTR_GID) {
4857 + if (attr->ia_valid & ATTR_GID ||
4858 + (tag && (attr->ia_valid & ATTR_TAG))) {
4860 - *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
4861 + *p++ = cpu_to_be32(from_kgid(&init_user_ns,
4862 + TAGINO_KGID(tag, attr->ia_gid, attr->ia_tag)));
4866 @@ -887,7 +893,7 @@ static void nfs3_xdr_enc_setattr3args(st
4867 const struct nfs3_sattrargs *args)
4869 encode_nfs_fh3(xdr, args->fh);
4870 - encode_sattr3(xdr, args->sattr);
4871 + encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
4872 encode_sattrguard3(xdr, args);
4875 @@ -1037,13 +1043,13 @@ static void nfs3_xdr_enc_write3args(stru
4878 static void encode_createhow3(struct xdr_stream *xdr,
4879 - const struct nfs3_createargs *args)
4880 + const struct nfs3_createargs *args, int tag)
4882 encode_uint32(xdr, args->createmode);
4883 switch (args->createmode) {
4884 case NFS3_CREATE_UNCHECKED:
4885 case NFS3_CREATE_GUARDED:
4886 - encode_sattr3(xdr, args->sattr);
4887 + encode_sattr3(xdr, args->sattr, tag);
4889 case NFS3_CREATE_EXCLUSIVE:
4890 encode_createverf3(xdr, args->verifier);
4891 @@ -1058,7 +1064,7 @@ static void nfs3_xdr_enc_create3args(str
4892 const struct nfs3_createargs *args)
4894 encode_diropargs3(xdr, args->fh, args->name, args->len);
4895 - encode_createhow3(xdr, args);
4896 + encode_createhow3(xdr, args, req->rq_task->tk_client->cl_tag);
4900 @@ -1074,7 +1080,7 @@ static void nfs3_xdr_enc_mkdir3args(stru
4901 const struct nfs3_mkdirargs *args)
4903 encode_diropargs3(xdr, args->fh, args->name, args->len);
4904 - encode_sattr3(xdr, args->sattr);
4905 + encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
4909 @@ -1091,9 +1097,9 @@ static void nfs3_xdr_enc_mkdir3args(stru
4912 static void encode_symlinkdata3(struct xdr_stream *xdr,
4913 - const struct nfs3_symlinkargs *args)
4914 + const struct nfs3_symlinkargs *args, int tag)
4916 - encode_sattr3(xdr, args->sattr);
4917 + encode_sattr3(xdr, args->sattr, tag);
4918 encode_nfspath3(xdr, args->pages, args->pathlen);
4921 @@ -1102,7 +1108,7 @@ static void nfs3_xdr_enc_symlink3args(st
4922 const struct nfs3_symlinkargs *args)
4924 encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
4925 - encode_symlinkdata3(xdr, args);
4926 + encode_symlinkdata3(xdr, args, req->rq_task->tk_client->cl_tag);
4927 xdr->buf->flags |= XDRBUF_WRITE;
4930 @@ -1131,24 +1137,24 @@ static void nfs3_xdr_enc_symlink3args(st
4933 static void encode_devicedata3(struct xdr_stream *xdr,
4934 - const struct nfs3_mknodargs *args)
4935 + const struct nfs3_mknodargs *args, int tag)
4937 - encode_sattr3(xdr, args->sattr);
4938 + encode_sattr3(xdr, args->sattr, tag);
4939 encode_specdata3(xdr, args->rdev);
4942 static void encode_mknoddata3(struct xdr_stream *xdr,
4943 - const struct nfs3_mknodargs *args)
4944 + const struct nfs3_mknodargs *args, int tag)
4946 encode_ftype3(xdr, args->type);
4947 switch (args->type) {
4950 - encode_devicedata3(xdr, args);
4951 + encode_devicedata3(xdr, args, tag);
4955 - encode_sattr3(xdr, args->sattr);
4956 + encode_sattr3(xdr, args->sattr, tag);
4960 @@ -1163,7 +1169,7 @@ static void nfs3_xdr_enc_mknod3args(stru
4961 const struct nfs3_mknodargs *args)
4963 encode_diropargs3(xdr, args->fh, args->name, args->len);
4964 - encode_mknoddata3(xdr, args);
4965 + encode_mknoddata3(xdr, args, req->rq_task->tk_client->cl_tag);
4969 diff -urNp -x '*.orig' linux-4.4/fs/nfs/super.c linux-4.4/fs/nfs/super.c
4970 --- linux-4.4/fs/nfs/super.c 2021-02-24 16:56:11.785752975 +0100
4971 +++ linux-4.4/fs/nfs/super.c 2021-02-24 16:56:24.569488903 +0100
4973 #include <linux/parser.h>
4974 #include <linux/nsproxy.h>
4975 #include <linux/rcupdate.h>
4976 +#include <linux/vs_tag.h>
4978 #include <asm/uaccess.h>
4980 @@ -102,6 +103,7 @@ enum {
4986 /* Mount options that take string arguments */
4988 @@ -114,6 +116,9 @@ enum {
4989 /* Special mount options */
4990 Opt_userspace, Opt_deprecated, Opt_sloppy,
4992 + /* Linux-VServer tagging options */
4993 + Opt_tag, Opt_notag,
4998 @@ -183,6 +188,10 @@ static const match_table_t nfs_mount_opt
4999 { Opt_fscache_uniq, "fsc=%s" },
5000 { Opt_local_lock, "local_lock=%s" },
5002 + { Opt_tag, "tag" },
5003 + { Opt_notag, "notag" },
5004 + { Opt_tagid, "tagid=%u" },
5006 /* The following needs to be listed after all other options */
5007 { Opt_nfsvers, "v%s" },
5009 @@ -642,6 +651,7 @@ static void nfs_show_mount_options(struc
5010 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
5011 { NFS_MOUNT_UNSHARED, ",nosharecache", "" },
5012 { NFS_MOUNT_NORESVPORT, ",noresvport", "" },
5013 + { NFS_MOUNT_TAGGED, ",tag", "" },
5016 const struct proc_nfs_info *nfs_infop;
5017 @@ -1324,6 +1334,14 @@ static int nfs_parse_mount_options(char
5018 case Opt_nomigration:
5019 mnt->options &= ~NFS_OPTION_MIGRATION;
5021 +#ifndef CONFIG_TAGGING_NONE
5023 + mnt->flags |= NFS_MOUNT_TAGGED;
5026 + mnt->flags &= ~NFS_MOUNT_TAGGED;
5031 * options that take numeric values
5032 @@ -1410,6 +1428,12 @@ static int nfs_parse_mount_options(char
5033 goto out_invalid_value;
5034 mnt->minorversion = option;
5036 +#ifdef CONFIG_PROPAGATE
5039 + nfs_data.flags |= NFS_MOUNT_TAGGED;
5044 * options that take text values
5045 diff -urNp -x '*.orig' linux-4.4/fs/nfsd/auth.c linux-4.4/fs/nfsd/auth.c
5046 --- linux-4.4/fs/nfsd/auth.c 2021-02-24 16:56:11.785752975 +0100
5047 +++ linux-4.4/fs/nfsd/auth.c 2021-02-24 16:56:24.569488903 +0100
5049 /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
5051 #include <linux/sched.h>
5052 +#include <linux/vs_tag.h>
5056 @@ -35,6 +36,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
5058 new->fsuid = rqstp->rq_cred.cr_uid;
5059 new->fsgid = rqstp->rq_cred.cr_gid;
5060 + /* FIXME: this desperately needs a tag :)
5061 + new->xid = (vxid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0);
5064 rqgi = rqstp->rq_cred.cr_group_info;
5066 diff -urNp -x '*.orig' linux-4.4/fs/nfsd/nfs3xdr.c linux-4.4/fs/nfsd/nfs3xdr.c
5067 --- linux-4.4/fs/nfsd/nfs3xdr.c 2021-02-24 16:56:11.785752975 +0100
5068 +++ linux-4.4/fs/nfsd/nfs3xdr.c 2021-02-24 16:56:24.569488903 +0100
5071 #include <linux/namei.h>
5072 #include <linux/sunrpc/svc_xprt.h>
5073 +#include <linux/vs_tag.h>
5077 @@ -98,6 +99,8 @@ static __be32 *
5078 decode_sattr3(__be32 *p, struct iattr *iap)
5081 + kuid_t kuid = GLOBAL_ROOT_UID;
5082 + kgid_t kgid = GLOBAL_ROOT_GID;
5086 @@ -106,15 +109,18 @@ decode_sattr3(__be32 *p, struct iattr *i
5087 iap->ia_mode = ntohl(*p++);
5090 - iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++));
5091 + kuid = make_kuid(&init_user_ns, ntohl(*p++));
5092 if (uid_valid(iap->ia_uid))
5093 iap->ia_valid |= ATTR_UID;
5096 - iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++));
5097 + kgid = make_kgid(&init_user_ns, ntohl(*p++));
5098 if (gid_valid(iap->ia_gid))
5099 iap->ia_valid |= ATTR_GID;
5101 + iap->ia_uid = INOTAG_KUID(DX_TAG_NFSD, kuid, kgid);
5102 + iap->ia_gid = INOTAG_KGID(DX_TAG_NFSD, kuid, kgid);
5103 + iap->ia_tag = INOTAG_KTAG(DX_TAG_NFSD, kuid, kgid, GLOBAL_ROOT_TAG);
5107 @@ -167,8 +173,12 @@ encode_fattr3(struct svc_rqst *rqstp, __
5108 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
5109 *p++ = htonl((u32) (stat->mode & S_IALLUGO));
5110 *p++ = htonl((u32) stat->nlink);
5111 - *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
5112 - *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
5113 + *p++ = htonl((u32) from_kuid(&init_user_ns,
5114 + TAGINO_KUID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
5115 + stat->uid, stat->tag)));
5116 + *p++ = htonl((u32) from_kgid(&init_user_ns,
5117 + TAGINO_KGID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
5118 + stat->gid, stat->tag)));
5119 if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
5120 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
5122 diff -urNp -x '*.orig' linux-4.4/fs/nfsd/nfs4xdr.c linux-4.4/fs/nfsd/nfs4xdr.c
5123 --- linux-4.4/fs/nfsd/nfs4xdr.c 2021-02-24 16:56:11.789086413 +0100
5124 +++ linux-4.4/fs/nfsd/nfs4xdr.c 2021-02-24 16:56:24.569488903 +0100
5126 #include <linux/utsname.h>
5127 #include <linux/pagemap.h>
5128 #include <linux/sunrpc/svcauth_gss.h>
5129 +#include <linux/vs_tag.h>
5133 @@ -2639,12 +2640,16 @@ out_acl:
5134 *p++ = cpu_to_be32(stat.nlink);
5136 if (bmval1 & FATTR4_WORD1_OWNER) {
5137 - status = nfsd4_encode_user(xdr, rqstp, stat.uid);
5138 + status = nfsd4_encode_user(xdr, rqstp,
5139 + TAGINO_KUID(DX_TAG(dentry->d_inode),
5140 + stat.uid, stat.tag));
5144 if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
5145 - status = nfsd4_encode_group(xdr, rqstp, stat.gid);
5146 + status = nfsd4_encode_group(xdr, rqstp,
5147 + TAGINO_KGID(DX_TAG(dentry->d_inode),
5148 + stat.gid, stat.tag));
5152 diff -urNp -x '*.orig' linux-4.4/fs/nfsd/nfsxdr.c linux-4.4/fs/nfsd/nfsxdr.c
5153 --- linux-4.4/fs/nfsd/nfsxdr.c 2021-02-24 16:56:11.789086413 +0100
5154 +++ linux-4.4/fs/nfsd/nfsxdr.c 2021-02-24 16:56:24.569488903 +0100
5159 +#include <linux/vs_tag.h>
5161 #define NFSDDBG_FACILITY NFSDDBG_XDR
5163 @@ -89,6 +90,8 @@ static __be32 *
5164 decode_sattr(__be32 *p, struct iattr *iap)
5167 + kuid_t kuid = GLOBAL_ROOT_UID;
5168 + kgid_t kgid = GLOBAL_ROOT_GID;
5172 @@ -101,15 +104,18 @@ decode_sattr(__be32 *p, struct iattr *ia
5175 if ((tmp = ntohl(*p++)) != (u32)-1) {
5176 - iap->ia_uid = make_kuid(&init_user_ns, tmp);
5177 + kuid = make_kuid(&init_user_ns, tmp);
5178 if (uid_valid(iap->ia_uid))
5179 iap->ia_valid |= ATTR_UID;
5181 if ((tmp = ntohl(*p++)) != (u32)-1) {
5182 - iap->ia_gid = make_kgid(&init_user_ns, tmp);
5183 + kgid = make_kgid(&init_user_ns, tmp);
5184 if (gid_valid(iap->ia_gid))
5185 iap->ia_valid |= ATTR_GID;
5187 + iap->ia_uid = INOTAG_KUID(DX_TAG_NFSD, kuid, kgid);
5188 + iap->ia_gid = INOTAG_KGID(DX_TAG_NFSD, kuid, kgid);
5189 + iap->ia_tag = INOTAG_KTAG(DX_TAG_NFSD, kuid, kgid, GLOBAL_ROOT_TAG);
5190 if ((tmp = ntohl(*p++)) != (u32)-1) {
5191 iap->ia_valid |= ATTR_SIZE;
5193 @@ -154,8 +160,10 @@ encode_fattr(struct svc_rqst *rqstp, __b
5194 *p++ = htonl(nfs_ftypes[type >> 12]);
5195 *p++ = htonl((u32) stat->mode);
5196 *p++ = htonl((u32) stat->nlink);
5197 - *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
5198 - *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
5199 + *p++ = htonl((u32) from_kuid(&init_user_ns,
5200 + TAGINO_KUID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
5201 + *p++ = htonl((u32) from_kgid(&init_user_ns,
5202 + TAGINO_KGID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
5204 if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
5205 *p++ = htonl(NFS_MAXPATHLEN);
5206 diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/dlmglue.c linux-4.4/fs/ocfs2/dlmglue.c
5207 --- linux-4.4/fs/ocfs2/dlmglue.c 2021-02-24 16:56:11.799086728 +0100
5208 +++ linux-4.4/fs/ocfs2/dlmglue.c 2021-02-24 16:56:24.569488903 +0100
5209 @@ -2128,6 +2128,7 @@ static void __ocfs2_stuff_meta_lvb(struc
5210 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
5211 lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode));
5212 lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
5213 + lvb->lvb_itag = cpu_to_be16(i_tag_read(inode));
5214 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
5215 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
5216 lvb->lvb_iatime_packed =
5217 @@ -2178,6 +2179,7 @@ static void ocfs2_refresh_inode_from_lvb
5219 i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
5220 i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
5221 + i_tag_write(inode, be16_to_cpu(lvb->lvb_itag));
5222 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
5223 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
5224 ocfs2_unpack_timespec(&inode->i_atime,
5225 diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/dlmglue.h linux-4.4/fs/ocfs2/dlmglue.h
5226 --- linux-4.4/fs/ocfs2/dlmglue.h 2021-02-24 16:56:11.799086728 +0100
5227 +++ linux-4.4/fs/ocfs2/dlmglue.h 2021-02-24 16:56:24.569488903 +0100
5228 @@ -46,7 +46,8 @@ struct ocfs2_meta_lvb {
5231 __be32 lvb_igeneration;
5232 - __be32 lvb_reserved2;
5234 + __be16 lvb_reserved2;
5237 #define OCFS2_QINFO_LVB_VERSION 1
5238 diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/file.c linux-4.4/fs/ocfs2/file.c
5239 --- linux-4.4/fs/ocfs2/file.c 2021-02-24 16:56:11.802420166 +0100
5240 +++ linux-4.4/fs/ocfs2/file.c 2021-02-24 16:56:24.569488903 +0100
5241 @@ -1151,7 +1151,7 @@ int ocfs2_setattr(struct dentry *dentry,
5242 attr->ia_valid &= ~ATTR_SIZE;
5244 #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
5245 - | ATTR_GID | ATTR_UID | ATTR_MODE)
5246 + | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE)
5247 if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
5250 diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/inode.c linux-4.4/fs/ocfs2/inode.c
5251 --- linux-4.4/fs/ocfs2/inode.c 2016-01-11 00:01:32.000000000 +0100
5252 +++ linux-4.4/fs/ocfs2/inode.c 2021-02-24 16:56:24.569488903 +0100
5254 #include <linux/highmem.h>
5255 #include <linux/pagemap.h>
5256 #include <linux/quotaops.h>
5257 +#include <linux/vs_tag.h>
5259 #include <asm/byteorder.h>
5261 @@ -78,11 +79,13 @@ void ocfs2_set_inode_flags(struct inode
5263 unsigned int flags = OCFS2_I(inode)->ip_attr;
5265 - inode->i_flags &= ~(S_IMMUTABLE |
5266 + inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
5267 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
5269 if (flags & OCFS2_IMMUTABLE_FL)
5270 inode->i_flags |= S_IMMUTABLE;
5271 + if (flags & OCFS2_IXUNLINK_FL)
5272 + inode->i_flags |= S_IXUNLINK;
5274 if (flags & OCFS2_SYNC_FL)
5275 inode->i_flags |= S_SYNC;
5276 @@ -92,25 +95,44 @@ void ocfs2_set_inode_flags(struct inode
5277 inode->i_flags |= S_NOATIME;
5278 if (flags & OCFS2_DIRSYNC_FL)
5279 inode->i_flags |= S_DIRSYNC;
5281 + inode->i_vflags &= ~(V_BARRIER | V_COW);
5283 + if (flags & OCFS2_BARRIER_FL)
5284 + inode->i_vflags |= V_BARRIER;
5285 + if (flags & OCFS2_COW_FL)
5286 + inode->i_vflags |= V_COW;
5289 /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
5290 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
5292 unsigned int flags = oi->vfs_inode.i_flags;
5293 + unsigned int vflags = oi->vfs_inode.i_vflags;
5295 + oi->ip_attr &= ~(OCFS2_SYNC_FL | OCFS2_APPEND_FL |
5296 + OCFS2_IMMUTABLE_FL | OCFS2_IXUNLINK_FL |
5297 + OCFS2_NOATIME_FL | OCFS2_DIRSYNC_FL |
5298 + OCFS2_BARRIER_FL | OCFS2_COW_FL);
5300 + if (flags & S_IMMUTABLE)
5301 + oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5302 + if (flags & S_IXUNLINK)
5303 + oi->ip_attr |= OCFS2_IXUNLINK_FL;
5305 - oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
5306 - OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
5308 oi->ip_attr |= OCFS2_SYNC_FL;
5309 if (flags & S_APPEND)
5310 oi->ip_attr |= OCFS2_APPEND_FL;
5311 - if (flags & S_IMMUTABLE)
5312 - oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5313 if (flags & S_NOATIME)
5314 oi->ip_attr |= OCFS2_NOATIME_FL;
5315 if (flags & S_DIRSYNC)
5316 oi->ip_attr |= OCFS2_DIRSYNC_FL;
5318 + if (vflags & V_BARRIER)
5319 + oi->ip_attr |= OCFS2_BARRIER_FL;
5320 + if (vflags & V_COW)
5321 + oi->ip_attr |= OCFS2_COW_FL;
5324 struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
5325 @@ -268,6 +290,8 @@ void ocfs2_populate_inode(struct inode *
5326 struct super_block *sb;
5327 struct ocfs2_super *osb;
5334 @@ -296,8 +320,12 @@ void ocfs2_populate_inode(struct inode *
5335 inode->i_generation = le32_to_cpu(fe->i_generation);
5336 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
5337 inode->i_mode = le16_to_cpu(fe->i_mode);
5338 - i_uid_write(inode, le32_to_cpu(fe->i_uid));
5339 - i_gid_write(inode, le32_to_cpu(fe->i_gid));
5340 + uid = le32_to_cpu(fe->i_uid);
5341 + gid = le32_to_cpu(fe->i_gid);
5342 + i_uid_write(inode, INOTAG_UID(DX_TAG(inode), uid, gid));
5343 + i_gid_write(inode, INOTAG_GID(DX_TAG(inode), uid, gid));
5344 + i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), uid, gid,
5345 + /* le16_to_cpu(raw_inode->i_raw_tag) */ 0));
5347 /* Fast symlinks will have i_size but no allocated clusters. */
5348 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) {
5349 diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/inode.h linux-4.4/fs/ocfs2/inode.h
5350 --- linux-4.4/fs/ocfs2/inode.h 2016-01-11 00:01:32.000000000 +0100
5351 +++ linux-4.4/fs/ocfs2/inode.h 2021-02-24 16:56:24.569488903 +0100
5352 @@ -161,6 +161,7 @@ struct buffer_head *ocfs2_bread(struct i
5354 void ocfs2_set_inode_flags(struct inode *inode);
5355 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
5356 +int ocfs2_sync_flags(struct inode *inode, int, int);
5358 static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
5360 diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/ioctl.c linux-4.4/fs/ocfs2/ioctl.c
5361 --- linux-4.4/fs/ocfs2/ioctl.c 2021-02-24 16:56:11.802420166 +0100
5362 +++ linux-4.4/fs/ocfs2/ioctl.c 2021-02-24 16:56:24.572822341 +0100
5363 @@ -76,7 +76,41 @@ static int ocfs2_get_inode_attr(struct i
5367 -static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
5368 +int ocfs2_sync_flags(struct inode *inode, int flags, int vflags)
5370 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5371 + struct buffer_head *bh = NULL;
5372 + handle_t *handle = NULL;
5375 + status = ocfs2_inode_lock(inode, &bh, 1);
5377 + mlog_errno(status);
5380 + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5381 + if (IS_ERR(handle)) {
5382 + status = PTR_ERR(handle);
5383 + mlog_errno(status);
5387 + inode->i_flags = flags;
5388 + inode->i_vflags = vflags;
5389 + ocfs2_get_inode_flags(OCFS2_I(inode));
5391 + status = ocfs2_mark_inode_dirty(handle, inode, bh);
5393 + mlog_errno(status);
5395 + ocfs2_commit_trans(osb, handle);
5397 + ocfs2_inode_unlock(inode, 1);
5402 +int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
5405 struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
5406 @@ -116,6 +150,11 @@ static int ocfs2_set_inode_attr(struct i
5410 + if (IS_BARRIER(inode)) {
5411 + vxwprintk_task(1, "messing with the barrier.");
5415 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5416 if (IS_ERR(handle)) {
5417 status = PTR_ERR(handle);
5418 @@ -841,6 +880,7 @@ bail:
5423 long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
5425 struct inode *inode = file_inode(filp);
5426 diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/namei.c linux-4.4/fs/ocfs2/namei.c
5427 --- linux-4.4/fs/ocfs2/namei.c 2021-02-24 16:56:11.802420166 +0100
5428 +++ linux-4.4/fs/ocfs2/namei.c 2021-02-24 16:56:24.572822341 +0100
5430 #include <linux/slab.h>
5431 #include <linux/highmem.h>
5432 #include <linux/quotaops.h>
5433 +#include <linux/vs_tag.h>
5435 #include <cluster/masklog.h>
5437 @@ -516,6 +517,7 @@ static int __ocfs2_mknod_locked(struct i
5438 struct ocfs2_extent_list *fel;
5440 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5445 @@ -553,8 +555,13 @@ static int __ocfs2_mknod_locked(struct i
5446 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
5447 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
5448 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
5449 - fe->i_uid = cpu_to_le32(i_uid_read(inode));
5450 - fe->i_gid = cpu_to_le32(i_gid_read(inode));
5452 + ktag = make_ktag(&init_user_ns, dx_current_fstag(osb->sb));
5453 + fe->i_uid = cpu_to_le32(from_kuid(&init_user_ns,
5454 + TAGINO_KUID(DX_TAG(inode), inode->i_uid, ktag)));
5455 + fe->i_gid = cpu_to_le32(from_kgid(&init_user_ns,
5456 + TAGINO_KGID(DX_TAG(inode), inode->i_gid, ktag)));
5457 + inode->i_tag = ktag; /* is this correct? */
5458 fe->i_mode = cpu_to_le16(inode->i_mode);
5459 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
5460 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
5461 diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/ocfs2.h linux-4.4/fs/ocfs2/ocfs2.h
5462 --- linux-4.4/fs/ocfs2/ocfs2.h 2021-02-24 16:56:11.802420166 +0100
5463 +++ linux-4.4/fs/ocfs2/ocfs2.h 2021-02-24 16:56:24.572822341 +0100
5464 @@ -289,6 +289,7 @@ enum ocfs2_mount_options
5465 OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
5466 OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
5467 OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
5468 + OCFS2_MOUNT_TAGGED = 1 << 18, /* use tagging */
5471 #define OCFS2_OSB_SOFT_RO 0x0001
5472 diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/ocfs2_fs.h linux-4.4/fs/ocfs2/ocfs2_fs.h
5473 --- linux-4.4/fs/ocfs2/ocfs2_fs.h 2021-02-24 16:56:11.802420166 +0100
5474 +++ linux-4.4/fs/ocfs2/ocfs2_fs.h 2021-02-24 16:56:24.572822341 +0100
5475 @@ -275,6 +275,11 @@
5476 #define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
5477 #define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
5479 +#define OCFS2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
5481 +#define OCFS2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
5482 +#define OCFS2_COW_FL FS_COW_FL /* Copy on Write marker */
5484 #define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
5485 #define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
5487 diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/super.c linux-4.4/fs/ocfs2/super.c
5488 --- linux-4.4/fs/ocfs2/super.c 2021-02-24 16:56:11.805753604 +0100
5489 +++ linux-4.4/fs/ocfs2/super.c 2021-02-24 16:56:24.572822341 +0100
5490 @@ -193,6 +193,7 @@ enum {
5492 Opt_journal_async_commit,
5494 + Opt_tag, Opt_notag, Opt_tagid,
5498 @@ -226,6 +227,9 @@ static const match_table_t tokens = {
5499 {Opt_dir_resv_level, "dir_resv_level=%u"},
5500 {Opt_journal_async_commit, "journal_async_commit"},
5501 {Opt_err_cont, "errors=continue"},
5503 + {Opt_notag, "notag"},
5504 + {Opt_tagid, "tagid=%u"},
5508 @@ -676,6 +680,13 @@ static int ocfs2_remount(struct super_bl
5512 + if ((osb->s_mount_opt & OCFS2_MOUNT_TAGGED) !=
5513 + (parsed_options.mount_opt & OCFS2_MOUNT_TAGGED)) {
5515 + mlog(ML_ERROR, "Cannot change tagging on remount\n");
5519 /* We're going to/from readonly mode. */
5520 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
5521 /* Disable quota accounting before remounting RO */
5522 @@ -1165,6 +1176,9 @@ static int ocfs2_fill_super(struct super
5524 ocfs2_complete_mount_recovery(osb);
5526 + if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED)
5527 + sb->s_flags |= MS_TAGGED;
5529 if (ocfs2_mount_local(osb))
5530 snprintf(nodestr, sizeof(nodestr), "local");
5532 @@ -1485,6 +1499,20 @@ static int ocfs2_parse_options(struct su
5533 case Opt_journal_async_commit:
5534 mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
5536 +#ifndef CONFIG_TAGGING_NONE
5538 + mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
5541 + mopt->mount_opt &= ~OCFS2_MOUNT_TAGGED;
5544 +#ifdef CONFIG_PROPAGATE
5547 + mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
5552 "Unrecognized mount option \"%s\" "
5553 diff -urNp -x '*.orig' linux-4.4/fs/open.c linux-4.4/fs/open.c
5554 --- linux-4.4/fs/open.c 2021-02-24 16:56:11.805753604 +0100
5555 +++ linux-4.4/fs/open.c 2021-02-24 16:56:24.572822341 +0100
5557 #include <linux/ima.h>
5558 #include <linux/dnotify.h>
5559 #include <linux/compat.h>
5560 +#include <linux/vs_base.h>
5561 +#include <linux/vs_limit.h>
5562 +#include <linux/vs_tag.h>
5563 +#include <linux/vs_cowbl.h>
5564 +#include <linux/vserver/dlimit.h>
5566 #include "internal.h"
5568 @@ -70,6 +75,11 @@ long vfs_truncate(struct path *path, lof
5569 struct inode *inode;
5572 +#ifdef CONFIG_VSERVER_COWBL
5573 + error = cow_check_and_break(path);
5577 inode = path->dentry->d_inode;
5579 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
5580 @@ -567,6 +577,13 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons
5581 unsigned int lookup_flags = LOOKUP_FOLLOW;
5583 error = user_path_at(dfd, filename, lookup_flags, &path);
5584 +#ifdef CONFIG_VSERVER_COWBL
5586 + error = cow_check_and_break(&path);
5592 error = chmod_common(&path, mode);
5594 @@ -601,13 +618,15 @@ retry_deleg:
5595 if (!uid_valid(uid))
5597 newattrs.ia_valid |= ATTR_UID;
5598 - newattrs.ia_uid = uid;
5599 + newattrs.ia_uid = make_kuid(&init_user_ns,
5600 + dx_map_uid(user));
5602 if (group != (gid_t) -1) {
5603 if (!gid_valid(gid))
5605 newattrs.ia_valid |= ATTR_GID;
5606 - newattrs.ia_gid = gid;
5607 + newattrs.ia_gid = make_kgid(&init_user_ns,
5608 + dx_map_gid(group));
5610 if (!S_ISDIR(inode->i_mode))
5611 newattrs.ia_valid |=
5612 @@ -645,6 +664,10 @@ retry:
5613 error = mnt_want_write(path.mnt);
5616 +#ifdef CONFIG_VSERVER_COWBL
5617 + error = cow_check_and_break(&path);
5620 error = chown_common(&path, user, group);
5621 mnt_drop_write(path.mnt);
5623 diff -urNp -x '*.orig' linux-4.4/fs/proc/array.c linux-4.4/fs/proc/array.c
5624 --- linux-4.4/fs/proc/array.c 2021-02-24 16:56:11.809087043 +0100
5625 +++ linux-4.4/fs/proc/array.c 2021-02-24 16:56:24.572822341 +0100
5627 #include <linux/tracehook.h>
5628 #include <linux/string_helpers.h>
5629 #include <linux/user_namespace.h>
5630 +#include <linux/vs_context.h>
5631 +#include <linux/vs_network.h>
5633 #include <asm/pgtable.h>
5634 #include <asm/processor.h>
5635 @@ -155,6 +157,9 @@ static inline void task_state(struct seq
5636 ppid = pid_alive(p) ?
5637 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
5639 + if (unlikely(vx_current_initpid(p->pid)))
5642 tracer = ptrace_parent(p);
5644 tpid = task_pid_nr_ns(tracer, ns);
5645 @@ -293,8 +298,8 @@ static inline void task_sig(struct seq_f
5646 render_sigset_t(m, "SigCgt:\t", &caught);
5649 -static void render_cap_t(struct seq_file *m, const char *header,
5651 +void render_cap_t(struct seq_file *m, const char *header,
5652 + struct vx_info *vxi, kernel_cap_t *a)
5656 @@ -321,11 +326,12 @@ static inline void task_cap(struct seq_f
5657 cap_ambient = cred->cap_ambient;
5660 - render_cap_t(m, "CapInh:\t", &cap_inheritable);
5661 - render_cap_t(m, "CapPrm:\t", &cap_permitted);
5662 - render_cap_t(m, "CapEff:\t", &cap_effective);
5663 - render_cap_t(m, "CapBnd:\t", &cap_bset);
5664 - render_cap_t(m, "CapAmb:\t", &cap_ambient);
5665 + /* FIXME: maybe move the p->vx_info masking to __task_cred() ? */
5666 + render_cap_t(m, "CapInh:\t", p->vx_info, &cap_inheritable);
5667 + render_cap_t(m, "CapPrm:\t", p->vx_info, &cap_permitted);
5668 + render_cap_t(m, "CapEff:\t", p->vx_info, &cap_effective);
5669 + render_cap_t(m, "CapBnd:\t", p->vx_info, &cap_bset);
5670 + render_cap_t(m, "CapAmb:\t", p->vx_info, &cap_ambient);
5673 static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
5674 @@ -377,6 +383,43 @@ static void task_cpus_allowed(struct seq
5675 cpumask_pr_args(&task->cpus_allowed));
5678 +int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
5679 + struct pid *pid, struct task_struct *task)
5681 + seq_printf(m, "Proxy:\t%p(%c)\n"
5689 + (task->nsproxy == init_task.nsproxy ? 'I' : '-'),
5690 + atomic_read(&task->nsproxy->count),
5691 + task->nsproxy->uts_ns,
5692 + (task->nsproxy->uts_ns == init_task.nsproxy->uts_ns ? 'I' : '-'),
5693 + task->nsproxy->ipc_ns,
5694 + (task->nsproxy->ipc_ns == init_task.nsproxy->ipc_ns ? 'I' : '-'),
5695 + task->nsproxy->mnt_ns,
5696 + (task->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns ? 'I' : '-'),
5697 + task->nsproxy->pid_ns_for_children,
5698 + (task->nsproxy->pid_ns_for_children ==
5699 + init_task.nsproxy->pid_ns_for_children ? 'I' : '-'),
5700 + task->nsproxy->net_ns,
5701 + (task->nsproxy->net_ns == init_task.nsproxy->net_ns ? 'I' : '-'));
5705 +void task_vs_id(struct seq_file *m, struct task_struct *task)
5707 + if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
5710 + seq_printf(m, "VxID:\t%d\n", vx_task_xid(task));
5711 + seq_printf(m, "NxID:\t%d\n", nx_task_nid(task));
5715 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
5716 struct pid *pid, struct task_struct *task)
5718 @@ -394,6 +437,7 @@ int proc_pid_status(struct seq_file *m,
5719 task_seccomp(m, task);
5720 task_cpus_allowed(m, task);
5721 cpuset_task_status_allowed(m, task);
5722 + task_vs_id(m, task);
5723 task_context_switch_counts(m, task);
5726 @@ -509,6 +553,17 @@ static int do_task_stat(struct seq_file
5727 /* convert nsec -> ticks */
5728 start_time = nsec_to_clock_t(task->real_start_time);
5730 + /* fixup start time for virt uptime */
5731 + if (vx_flags(VXF_VIRT_UPTIME, 0)) {
5732 + unsigned long long bias =
5733 + current->vx_info->cvirt.bias_clock;
5735 + if (start_time > bias)
5736 + start_time -= bias;
5741 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
5742 seq_put_decimal_ll(m, ' ', ppid);
5743 seq_put_decimal_ll(m, ' ', pgid);
5744 diff -urNp -x '*.orig' linux-4.4/fs/proc/base.c linux-4.4/fs/proc/base.c
5745 --- linux-4.4/fs/proc/base.c 2021-02-24 16:56:11.809087043 +0100
5746 +++ linux-4.4/fs/proc/base.c 2021-02-24 16:56:24.572822341 +0100
5748 #include <linux/slab.h>
5749 #include <linux/flex_array.h>
5750 #include <linux/posix-timers.h>
5751 +#include <linux/vs_context.h>
5752 +#include <linux/vs_network.h>
5753 #ifdef CONFIG_HARDWALL
5754 #include <asm/hardwall.h>
5756 @@ -1125,11 +1127,15 @@ static ssize_t oom_adj_write(struct file
5757 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
5759 if (oom_adj < task->signal->oom_score_adj &&
5760 - !capable(CAP_SYS_RESOURCE)) {
5761 + !vx_capable(CAP_SYS_RESOURCE, VXC_OOM_ADJUST)) {
5766 + /* prevent guest processes from circumventing the oom killer */
5767 + if (vx_current_xid() && (oom_adj == OOM_DISABLE))
5768 + oom_adj = OOM_ADJUST_MIN;
5771 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
5772 * /proc/pid/oom_score_adj instead.
5773 @@ -1694,6 +1700,8 @@ struct inode *proc_pid_make_inode(struct
5774 inode->i_gid = cred->egid;
5777 + /* procfs is xid tagged */
5778 + i_tag_write(inode, (vtag_t)vx_task_xid(task));
5779 security_task_to_inode(task, inode);
5782 @@ -1739,6 +1747,8 @@ int pid_getattr(struct vfsmount *mnt, st
5786 +// static unsigned name_to_int(struct dentry *dentry);
5789 * Exceptional case: normally we are not allowed to unhash a busy
5790 * directory. In this case, however, we can do it - no aliasing problems
5791 @@ -1767,6 +1777,19 @@ int pid_revalidate(struct dentry *dentry
5792 task = get_proc_task(inode);
5795 + unsigned pid = name_to_int(&dentry->d_name);
5797 + if (pid != ~0U && pid != vx_map_pid(task->pid) &&
5798 + pid != __task_pid_nr_ns(task, PIDTYPE_PID,
5799 + task_active_pid_ns(task))) {
5800 + vxdprintk(VXD_CBIT(misc, 10),
5801 + VS_Q("%*s") " dropped by pid_revalidate(%d!=%d)",
5802 + dentry->d_name.len, dentry->d_name.name,
5803 + pid, vx_map_pid(task->pid));
5804 + put_task_struct(task);
5808 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
5809 task_dumpable(task)) {
5811 @@ -2336,6 +2359,13 @@ static struct dentry *proc_pident_lookup
5815 + /* TODO: maybe we can come up with a generic approach? */
5816 + if (task_vx_flags(task, VXF_HIDE_VINFO, 0) &&
5817 + (dentry->d_name.len == 5) &&
5818 + (!memcmp(dentry->d_name.name, "vinfo", 5) ||
5819 + !memcmp(dentry->d_name.name, "ninfo", 5)))
5823 * Yes, it does not scale. And it should not. Don't add
5824 * new entries into /proc/<tgid>/ without very good reasons.
5825 @@ -2778,6 +2808,11 @@ static int proc_pid_personality(struct s
5826 static const struct file_operations proc_task_operations;
5827 static const struct inode_operations proc_task_inode_operations;
5829 +extern int proc_pid_vx_info(struct seq_file *,
5830 + struct pid_namespace *, struct pid *, struct task_struct *);
5831 +extern int proc_pid_nx_info(struct seq_file *,
5832 + struct pid_namespace *, struct pid *, struct task_struct *);
5834 static const struct pid_entry tgid_base_stuff[] = {
5835 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
5836 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
5837 @@ -2842,6 +2877,8 @@ static const struct pid_entry tgid_base_
5838 #ifdef CONFIG_CGROUPS
5839 ONE("cgroup", S_IRUGO, proc_cgroup_show),
5841 + ONE("vinfo", S_IRUGO, proc_pid_vx_info),
5842 + ONE("ninfo", S_IRUGO, proc_pid_nx_info),
5843 ONE("oom_score", S_IRUGO, proc_oom_score),
5844 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
5845 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
5846 @@ -3056,7 +3093,7 @@ retry:
5848 pid = find_ge_pid(iter.tgid, ns);
5850 - iter.tgid = pid_nr_ns(pid, ns);
5851 + iter.tgid = pid_unmapped_nr_ns(pid, ns);
5852 iter.task = pid_task(pid, PIDTYPE_PID);
5853 /* What we to know is if the pid we have find is the
5854 * pid of a thread_group_leader. Testing for task
5855 @@ -3116,8 +3153,10 @@ int proc_pid_readdir(struct file *file,
5856 if (!has_pid_permissions(ns, iter.task, 2))
5859 - len = snprintf(name, sizeof(name), "%d", iter.tgid);
5860 + len = snprintf(name, sizeof(name), "%d", vx_map_tgid(iter.tgid));
5861 ctx->pos = iter.tgid + TGID_OFFSET;
5862 + if (!vx_proc_task_visible(iter.task))
5864 if (!proc_fill_cache(file, ctx, name, len,
5865 proc_pid_instantiate, iter.task, NULL)) {
5866 put_task_struct(iter.task);
5867 @@ -3254,6 +3293,7 @@ static const struct pid_entry tid_base_s
5868 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
5869 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
5871 + ONE("nsproxy", S_IRUGO, proc_pid_nsproxy),
5874 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
5875 @@ -3320,6 +3360,8 @@ static struct dentry *proc_task_lookup(s
5876 tid = name_to_int(&dentry->d_name);
5879 + if (vx_current_initpid(tid))
5882 ns = dentry->d_sb->s_fs_info;
5884 diff -urNp -x '*.orig' linux-4.4/fs/proc/generic.c linux-4.4/fs/proc/generic.c
5885 --- linux-4.4/fs/proc/generic.c 2021-02-24 16:56:11.809087043 +0100
5886 +++ linux-4.4/fs/proc/generic.c 2021-02-24 16:56:24.572822341 +0100
5888 #include <linux/bitops.h>
5889 #include <linux/spinlock.h>
5890 #include <linux/completion.h>
5891 +#include <linux/vserver/inode.h>
5892 #include <asm/uaccess.h>
5894 #include "internal.h"
5895 @@ -66,8 +67,16 @@ static struct proc_dir_entry *pde_subdir
5896 node = node->rb_left;
5897 else if (result > 0)
5898 node = node->rb_right;
5901 + if (!vx_hide_check(0, de->vx_flags)) {
5902 + vxdprintk(VXD_CBIT(misc, 9),
5904 + " hidden in pde_subdir_find()",
5905 + de->namelen, de->name);
5913 @@ -241,6 +250,8 @@ struct dentry *proc_lookup_de(struct pro
5914 return ERR_PTR(-ENOMEM);
5915 d_set_d_op(dentry, &simple_dentry_operations);
5916 d_add(dentry, inode);
5917 + /* generic proc entries belong to the host */
5918 + i_tag_write(inode, 0);
5921 read_unlock(&proc_subdir_lock);
5922 @@ -287,6 +298,12 @@ int proc_readdir_de(struct proc_dir_entr
5924 struct proc_dir_entry *next;
5926 + if (!vx_hide_check(0, de->vx_flags)) {
5927 + vxdprintk(VXD_CBIT(misc, 9),
5928 + VS_Q("%*s") " hidden in proc_readdir_de()",
5929 + de->namelen, de->name);
5932 read_unlock(&proc_subdir_lock);
5933 if (!dir_emit(ctx, de->name, de->namelen,
5934 de->low_ino, de->mode >> 12)) {
5935 @@ -294,6 +311,7 @@ int proc_readdir_de(struct proc_dir_entr
5938 read_lock(&proc_subdir_lock);
5941 next = pde_subdir_next(de);
5943 @@ -387,6 +405,7 @@ static struct proc_dir_entry *__proc_cre
5946 ent->subdir = RB_ROOT;
5947 + ent->vx_flags = IATTR_PROC_DEFAULT;
5948 atomic_set(&ent->count, 1);
5949 spin_lock_init(&ent->pde_unload_lock);
5950 INIT_LIST_HEAD(&ent->pde_openers);
5951 @@ -411,7 +430,8 @@ struct proc_dir_entry *proc_symlink(cons
5957 + ent->vx_flags = IATTR_PROC_SYMLINK;
5961 diff -urNp -x '*.orig' linux-4.4/fs/proc/inode.c linux-4.4/fs/proc/inode.c
5962 --- linux-4.4/fs/proc/inode.c 2021-02-24 16:56:11.809087043 +0100
5963 +++ linux-4.4/fs/proc/inode.c 2021-02-24 16:56:24.576155779 +0100
5964 @@ -431,6 +431,8 @@ struct inode *proc_get_inode(struct supe
5965 inode->i_uid = de->uid;
5966 inode->i_gid = de->gid;
5969 + PROC_I(inode)->vx_flags = de->vx_flags;
5971 inode->i_size = de->size;
5973 diff -urNp -x '*.orig' linux-4.4/fs/proc/internal.h linux-4.4/fs/proc/internal.h
5974 --- linux-4.4/fs/proc/internal.h 2016-01-11 00:01:32.000000000 +0100
5975 +++ linux-4.4/fs/proc/internal.h 2021-02-24 16:56:24.576155779 +0100
5977 #include <linux/spinlock.h>
5978 #include <linux/atomic.h>
5979 #include <linux/binfmts.h>
5980 +#include <linux/vs_pid.h>
5982 struct ctl_table_header;
5984 @@ -34,6 +35,7 @@ struct proc_dir_entry {
5990 const struct inode_operations *proc_iops;
5991 const struct file_operations *proc_fops;
5992 @@ -51,15 +53,22 @@ struct proc_dir_entry {
6000 int (*proc_get_link)(struct dentry *, struct path *);
6001 int (*proc_show)(struct seq_file *m,
6002 struct pid_namespace *ns, struct pid *pid,
6003 struct task_struct *task);
6004 + int (*proc_vs_read)(char *page);
6005 + int (*proc_vxi_read)(struct vx_info *vxi, char *page);
6006 + int (*proc_nxi_read)(struct nx_info *nxi, char *page);
6014 struct proc_dir_entry *pde;
6015 @@ -92,11 +101,16 @@ static inline struct pid *proc_pid(struc
6016 return PROC_I(inode)->pid;
6019 -static inline struct task_struct *get_proc_task(struct inode *inode)
6020 +static inline struct task_struct *get_proc_task_real(struct inode *inode)
6022 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
6025 +static inline struct task_struct *get_proc_task(struct inode *inode)
6027 + return vx_get_proc_task(inode, proc_pid(inode));
6030 static inline int task_dumpable(struct task_struct *task)
6033 @@ -155,6 +169,8 @@ extern int proc_pid_status(struct seq_fi
6034 struct pid *, struct task_struct *);
6035 extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
6036 struct pid *, struct task_struct *);
6037 +extern int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
6038 + struct pid *pid, struct task_struct *task);
6042 diff -urNp -x '*.orig' linux-4.4/fs/proc/loadavg.c linux-4.4/fs/proc/loadavg.c
6043 --- linux-4.4/fs/proc/loadavg.c 2016-01-11 00:01:32.000000000 +0100
6044 +++ linux-4.4/fs/proc/loadavg.c 2021-02-24 16:56:24.576155779 +0100
6047 static int loadavg_proc_show(struct seq_file *m, void *v)
6049 + unsigned long running;
6050 + unsigned int threads;
6051 unsigned long avnrun[3];
6053 get_avenrun(avnrun, FIXED_1/200, 0);
6055 + if (vx_flags(VXF_VIRT_LOAD, 0)) {
6056 + struct vx_info *vxi = current_vx_info();
6058 + running = atomic_read(&vxi->cvirt.nr_running);
6059 + threads = atomic_read(&vxi->cvirt.nr_threads);
6061 + running = nr_running();
6062 + threads = nr_threads;
6065 seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
6066 LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
6067 LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
6068 LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
6069 - nr_running(), nr_threads,
6071 task_active_pid_ns(current)->last_pid);
6074 diff -urNp -x '*.orig' linux-4.4/fs/proc/meminfo.c linux-4.4/fs/proc/meminfo.c
6075 --- linux-4.4/fs/proc/meminfo.c 2021-02-24 16:56:11.809087043 +0100
6076 +++ linux-4.4/fs/proc/meminfo.c 2021-02-24 16:56:24.576155779 +0100
6077 @@ -40,7 +40,8 @@ static int meminfo_proc_show(struct seq_
6079 committed = percpu_counter_read_positive(&vm_committed_as);
6081 - cached = global_page_state(NR_FILE_PAGES) -
6082 + cached = vx_flags(VXF_VIRT_MEM, 0) ?
6083 + vx_vsi_cached(&i) : global_page_state(NR_FILE_PAGES) -
6084 total_swapcache_pages() - i.bufferram;
6087 diff -urNp -x '*.orig' linux-4.4/fs/proc/root.c linux-4.4/fs/proc/root.c
6088 --- linux-4.4/fs/proc/root.c 2021-02-24 16:56:11.809087043 +0100
6089 +++ linux-4.4/fs/proc/root.c 2021-02-24 16:56:24.576155779 +0100
6091 #include <linux/mount.h>
6092 #include <linux/pid_namespace.h>
6093 #include <linux/parser.h>
6094 +#include <linux/vserver/inode.h>
6096 #include "internal.h"
6098 +struct proc_dir_entry *proc_virtual;
6100 +extern void proc_vx_init(void);
6102 static int proc_test_super(struct super_block *sb, void *data)
6104 return sb->s_fs_info == data;
6105 @@ -113,7 +118,8 @@ static struct dentry *proc_mount(struct
6108 /* Does the mounter have privilege over the pid namespace? */
6109 - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
6110 + if (!vx_ns_capable(ns->user_ns,
6111 + CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
6112 return ERR_PTR(-EPERM);
6115 @@ -196,6 +202,7 @@ void __init proc_root_init(void)
6117 proc_mkdir("bus", NULL);
6122 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
6123 @@ -257,6 +264,7 @@ struct proc_dir_entry proc_root = {
6124 .proc_iops = &proc_root_inode_operations,
6125 .proc_fops = &proc_root_operations,
6126 .parent = &proc_root,
6127 + .vx_flags = IATTR_ADMIN | IATTR_WATCH,
6131 diff -urNp -x '*.orig' linux-4.4/fs/proc/self.c linux-4.4/fs/proc/self.c
6132 --- linux-4.4/fs/proc/self.c 2021-02-24 16:56:11.809087043 +0100
6133 +++ linux-4.4/fs/proc/self.c 2021-02-24 16:56:24.576155779 +0100
6135 #include <linux/sched.h>
6136 #include <linux/slab.h>
6137 #include <linux/pid_namespace.h>
6138 +#include <linux/vserver/inode.h>
6139 #include "internal.h"
6142 @@ -59,6 +60,8 @@ int proc_setup_self(struct super_block *
6143 self = d_alloc_name(s->s_root, "self");
6145 struct inode *inode = new_inode(s);
6147 + // self->vx_flags = IATTR_PROC_SYMLINK;
6149 inode->i_ino = self_inum;
6150 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
6151 diff -urNp -x '*.orig' linux-4.4/fs/proc/stat.c linux-4.4/fs/proc/stat.c
6152 --- linux-4.4/fs/proc/stat.c 2016-01-11 00:01:32.000000000 +0100
6153 +++ linux-4.4/fs/proc/stat.c 2021-02-24 16:56:24.576155779 +0100
6155 #include <linux/slab.h>
6156 #include <linux/time.h>
6157 #include <linux/irqnr.h>
6158 +#include <linux/vserver/cvirt.h>
6159 #include <linux/cputime.h>
6160 #include <linux/tick.h>
6161 +#include <linux/cpuset.h>
6163 #ifndef arch_irq_stat_cpu
6164 #define arch_irq_stat_cpu(cpu) 0
6165 @@ -87,14 +89,26 @@ static int show_stat(struct seq_file *p,
6166 u64 sum_softirq = 0;
6167 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
6168 struct timespec boottime;
6169 + cpumask_var_t cpus_allowed;
6170 + bool virt_cpu = vx_flags(VXF_VIRT_CPU, 0);
6172 user = nice = system = idle = iowait =
6173 irq = softirq = steal = 0;
6174 guest = guest_nice = 0;
6175 getboottime(&boottime);
6177 + if (vx_flags(VXF_VIRT_UPTIME, 0))
6178 + vx_vsi_boottime(&boottime);
6181 + cpuset_cpus_allowed(current, cpus_allowed);
6183 jif = boottime.tv_sec;
6185 for_each_possible_cpu(i) {
6186 + if (virt_cpu && !cpumask_test_cpu(i, cpus_allowed))
6189 user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
6190 nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
6191 system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
6192 @@ -131,6 +145,9 @@ static int show_stat(struct seq_file *p,
6195 for_each_online_cpu(i) {
6196 + if (virt_cpu && !cpumask_test_cpu(i, cpus_allowed))
6199 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
6200 user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
6201 nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
6202 diff -urNp -x '*.orig' linux-4.4/fs/proc/uptime.c linux-4.4/fs/proc/uptime.c
6203 --- linux-4.4/fs/proc/uptime.c 2016-01-11 00:01:32.000000000 +0100
6204 +++ linux-4.4/fs/proc/uptime.c 2021-02-24 16:56:24.576155779 +0100
6206 #include <linux/seq_file.h>
6207 #include <linux/time.h>
6208 #include <linux/kernel_stat.h>
6209 +#include <linux/vserver/cvirt.h>
6210 #include <linux/cputime.h>
6212 static int uptime_proc_show(struct seq_file *m, void *v)
6213 @@ -24,6 +25,10 @@ static int uptime_proc_show(struct seq_f
6214 nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
6215 idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
6218 + if (vx_flags(VXF_VIRT_UPTIME, 0))
6219 + vx_vsi_uptime(&uptime, &idle);
6221 seq_printf(m, "%lu.%02lu %lu.%02lu\n",
6222 (unsigned long) uptime.tv_sec,
6223 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
6224 diff -urNp -x '*.orig' linux-4.4/fs/proc_namespace.c linux-4.4/fs/proc_namespace.c
6225 --- linux-4.4/fs/proc_namespace.c 2021-02-24 16:56:11.812420481 +0100
6226 +++ linux-4.4/fs/proc_namespace.c 2021-02-24 16:56:24.576155779 +0100
6227 @@ -46,6 +46,8 @@ static int show_sb_opts(struct seq_file
6228 { MS_DIRSYNC, ",dirsync" },
6229 { MS_MANDLOCK, ",mand" },
6230 { MS_LAZYTIME, ",lazytime" },
6231 + { MS_TAGGED, ",tag" },
6232 + { MS_NOTAGCHECK, ",notagcheck" },
6235 const struct proc_fs_info *fs_infop;
6236 @@ -82,6 +84,38 @@ static inline void mangle(struct seq_fil
6237 seq_escape(m, s, " \t\n\\");
6240 +#ifdef CONFIG_VSERVER_EXTRA_MNT_CHECK
6242 +static int mnt_is_reachable(struct vfsmount *vfsmnt)
6245 + struct dentry *point;
6246 + struct mount *mnt = real_mount(vfsmnt);
6247 + struct mount *root_mnt;
6250 + if (mnt == mnt->mnt_ns->root)
6254 + root = current->fs->root;
6255 + root_mnt = real_mount(root.mnt);
6256 + point = root.dentry;
6258 + while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) {
6259 + point = mnt->mnt_mountpoint;
6260 + mnt = mnt->mnt_parent;
6262 + rcu_read_unlock();
6264 + ret = (mnt == root_mnt) && is_subdir(point, root.dentry);
6269 +#define mnt_is_reachable(v) (1)
6272 static void show_type(struct seq_file *m, struct super_block *sb)
6274 mangle(m, sb->s_type->name);
6275 @@ -99,6 +133,17 @@ static int show_vfsmnt(struct seq_file *
6276 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
6277 struct super_block *sb = mnt_path.dentry->d_sb;
6279 + if (vx_flags(VXF_HIDE_MOUNT, 0))
6281 + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6284 + if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
6285 + mnt == current->fs->root.mnt) {
6286 + seq_puts(m, "/dev/root / ");
6290 if (sb->s_op->show_devname) {
6291 err = sb->s_op->show_devname(m, mnt_path.dentry);
6293 @@ -112,6 +157,7 @@ static int show_vfsmnt(struct seq_file *
6299 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
6300 err = show_sb_opts(m, sb);
6301 @@ -133,6 +179,11 @@ static int show_mountinfo(struct seq_fil
6302 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
6305 + if (vx_flags(VXF_HIDE_MOUNT, 0))
6307 + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6310 seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
6311 MAJOR(sb->s_dev), MINOR(sb->s_dev));
6312 if (sb->s_op->show_path)
6313 @@ -193,6 +244,17 @@ static int show_vfsstat(struct seq_file
6314 struct super_block *sb = mnt_path.dentry->d_sb;
6317 + if (vx_flags(VXF_HIDE_MOUNT, 0))
6319 + if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6322 + if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
6323 + mnt == current->fs->root.mnt) {
6324 + seq_puts(m, "device /dev/root mounted on / ");
6329 if (sb->s_op->show_devname) {
6330 seq_puts(m, "device ");
6331 @@ -214,7 +276,7 @@ static int show_vfsstat(struct seq_file
6337 /* file system type */
6338 seq_puts(m, "with fstype ");
6340 diff -urNp -x '*.orig' linux-4.4/fs/quota/dquot.c linux-4.4/fs/quota/dquot.c
6341 --- linux-4.4/fs/quota/dquot.c 2021-02-24 16:56:11.812420481 +0100
6342 +++ linux-4.4/fs/quota/dquot.c 2021-02-24 16:56:24.576155779 +0100
6343 @@ -1644,6 +1644,9 @@ int __dquot_alloc_space(struct inode *in
6344 int reserve = flags & DQUOT_SPACE_RESERVE;
6345 struct dquot **dquots;
6347 + if ((ret = dl_alloc_space(inode, number)))
6350 if (!dquot_active(inode)) {
6351 inode_incr_space(inode, number, reserve);
6353 @@ -1696,6 +1699,9 @@ int dquot_alloc_inode(struct inode *inod
6354 struct dquot_warn warn[MAXQUOTAS];
6355 struct dquot * const *dquots;
6357 + if ((ret = dl_alloc_inode(inode)))
6360 if (!dquot_active(inode))
6362 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
6363 @@ -1798,6 +1804,8 @@ void __dquot_free_space(struct inode *in
6364 struct dquot **dquots;
6365 int reserve = flags & DQUOT_SPACE_RESERVE, index;
6367 + dl_free_space(inode, number);
6369 if (!dquot_active(inode)) {
6370 inode_decr_space(inode, number, reserve);
6372 @@ -1842,6 +1850,8 @@ void dquot_free_inode(struct inode *inod
6373 struct dquot * const *dquots;
6376 + dl_free_inode(inode);
6378 if (!dquot_active(inode))
6381 diff -urNp -x '*.orig' linux-4.4/fs/quota/quota.c linux-4.4/fs/quota/quota.c
6382 --- linux-4.4/fs/quota/quota.c 2021-02-24 16:56:11.812420481 +0100
6383 +++ linux-4.4/fs/quota/quota.c 2021-02-24 16:56:24.576155779 +0100
6385 #include <linux/fs.h>
6386 #include <linux/namei.h>
6387 #include <linux/slab.h>
6388 +#include <linux/vs_context.h>
6389 #include <asm/current.h>
6390 #include <linux/uaccess.h>
6391 #include <linux/kernel.h>
6392 @@ -39,7 +40,7 @@ static int check_quotactl_permission(str
6396 - if (!capable(CAP_SYS_ADMIN))
6397 + if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
6401 @@ -704,6 +705,46 @@ static int do_quotactl(struct super_bloc
6405 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6407 +#include <linux/vroot.h>
6408 +#include <linux/major.h>
6409 +#include <linux/module.h>
6410 +#include <linux/kallsyms.h>
6411 +#include <linux/vserver/debug.h>
6413 +static vroot_grb_func *vroot_get_real_bdev = NULL;
6415 +static DEFINE_SPINLOCK(vroot_grb_lock);
6417 +int register_vroot_grb(vroot_grb_func *func) {
6420 + spin_lock(&vroot_grb_lock);
6421 + if (!vroot_get_real_bdev) {
6422 + vroot_get_real_bdev = func;
6425 + spin_unlock(&vroot_grb_lock);
6428 +EXPORT_SYMBOL(register_vroot_grb);
6430 +int unregister_vroot_grb(vroot_grb_func *func) {
6431 + int ret = -EINVAL;
6433 + spin_lock(&vroot_grb_lock);
6434 + if (vroot_get_real_bdev) {
6435 + vroot_get_real_bdev = NULL;
6438 + spin_unlock(&vroot_grb_lock);
6441 +EXPORT_SYMBOL(unregister_vroot_grb);
6445 /* Return 1 if 'cmd' will block on frozen filesystem */
6446 static int quotactl_cmd_write(int cmd)
6448 @@ -739,6 +780,22 @@ static struct super_block *quotactl_bloc
6451 return ERR_CAST(bdev);
6452 +#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6453 + if (bdev && bdev->bd_inode &&
6454 + imajor(bdev->bd_inode) == VROOT_MAJOR) {
6455 + struct block_device *bdnew = (void *)-EINVAL;
6457 + if (vroot_get_real_bdev)
6458 + bdnew = vroot_get_real_bdev(bdev);
6460 + vxdprintk(VXD_CBIT(misc, 0),
6461 + "vroot_get_real_bdev not set");
6463 + if (IS_ERR(bdnew))
6464 + return ERR_PTR(PTR_ERR(bdnew));
6468 if (quotactl_cmd_write(cmd))
6469 sb = get_super_thawed(bdev);
6471 diff -urNp -x '*.orig' linux-4.4/fs/stat.c linux-4.4/fs/stat.c
6472 --- linux-4.4/fs/stat.c 2021-02-24 16:56:11.822420796 +0100
6473 +++ linux-4.4/fs/stat.c 2021-02-24 16:56:24.576155779 +0100
6474 @@ -26,6 +26,7 @@ void generic_fillattr(struct inode *inod
6475 stat->nlink = inode->i_nlink;
6476 stat->uid = inode->i_uid;
6477 stat->gid = inode->i_gid;
6478 + stat->tag = inode->i_tag;
6479 stat->rdev = inode->i_rdev;
6480 stat->size = i_size_read(inode);
6481 stat->atime = inode->i_atime;
6482 diff -urNp -x '*.orig' linux-4.4/fs/statfs.c linux-4.4/fs/statfs.c
6483 --- linux-4.4/fs/statfs.c 2016-01-11 00:01:32.000000000 +0100
6484 +++ linux-4.4/fs/statfs.c 2021-02-24 16:56:24.576155779 +0100
6486 #include <linux/statfs.h>
6487 #include <linux/security.h>
6488 #include <linux/uaccess.h>
6489 +#include <linux/vs_base.h>
6490 +#include <linux/vs_dlimit.h>
6491 #include "internal.h"
6493 static int flags_by_mnt(int mnt_flags)
6494 @@ -60,6 +62,8 @@ static int statfs_by_dentry(struct dentr
6495 retval = dentry->d_sb->s_op->statfs(dentry, buf);
6496 if (retval == 0 && buf->f_frsize == 0)
6497 buf->f_frsize = buf->f_bsize;
6498 + if (!vx_check(0, VS_ADMIN|VS_WATCH))
6499 + vx_vsi_statfs(dentry->d_sb, buf);
6503 diff -urNp -x '*.orig' linux-4.4/fs/super.c linux-4.4/fs/super.c
6504 --- linux-4.4/fs/super.c 2021-02-24 16:56:11.822420796 +0100
6505 +++ linux-4.4/fs/super.c 2021-02-24 16:56:24.576155779 +0100
6507 #include <linux/cleancache.h>
6508 #include <linux/fsnotify.h>
6509 #include <linux/lockdep.h>
6510 +#include <linux/magic.h>
6511 +#include <linux/vs_context.h>
6512 #include "internal.h"
6515 @@ -1153,6 +1155,13 @@ mount_fs(struct file_system_type *type,
6517 sb->s_flags |= MS_BORN;
6520 + if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) &&
6522 + (sb->s_magic != PROC_SUPER_MAGIC) &&
6523 + (sb->s_magic != DEVPTS_SUPER_MAGIC))
6526 error = security_sb_kern_mount(sb, flags, secdata);
6529 diff -urNp -x '*.orig' linux-4.4/fs/utimes.c linux-4.4/fs/utimes.c
6530 --- linux-4.4/fs/utimes.c 2021-02-24 16:56:11.829087673 +0100
6531 +++ linux-4.4/fs/utimes.c 2021-02-24 16:56:24.576155779 +0100
6533 #include <linux/stat.h>
6534 #include <linux/utime.h>
6535 #include <linux/syscalls.h>
6536 +#include <linux/mount.h>
6537 +#include <linux/vs_cowbl.h>
6538 #include <asm/uaccess.h>
6539 #include <asm/unistd.h>
6541 @@ -52,13 +54,19 @@ static int utimes_common(struct path *pa
6544 struct iattr newattrs;
6545 - struct inode *inode = path->dentry->d_inode;
6546 struct inode *delegated_inode = NULL;
6547 + struct inode *inode;
6549 + error = cow_check_and_break(path);
6553 error = mnt_want_write(path->mnt);
6557 + inode = path->dentry->d_inode;
6559 if (times && times[0].tv_nsec == UTIME_NOW &&
6560 times[1].tv_nsec == UTIME_NOW)
6562 diff -urNp -x '*.orig' linux-4.4/fs/xattr.c linux-4.4/fs/xattr.c
6563 --- linux-4.4/fs/xattr.c 2021-02-24 16:56:11.829087673 +0100
6564 +++ linux-4.4/fs/xattr.c 2021-02-24 16:56:24.579489218 +0100
6566 #include <linux/audit.h>
6567 #include <linux/vmalloc.h>
6568 #include <linux/posix_acl_xattr.h>
6569 +#include <linux/mount.h>
6571 #include <asm/uaccess.h>
6573 @@ -52,7 +53,7 @@ xattr_permission(struct inode *inode, co
6574 * The trusted.* namespace can only be accessed by privileged users.
6576 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
6577 - if (!capable(CAP_SYS_ADMIN))
6578 + if (!vx_capable(CAP_SYS_ADMIN, VXC_FS_TRUSTED))
6579 return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
6582 diff -urNp -x '*.orig' linux-4.4/include/linux/capability.h linux-4.4/include/linux/capability.h
6583 --- linux-4.4/include/linux/capability.h 2021-02-24 16:56:11.855755179 +0100
6584 +++ linux-4.4/include/linux/capability.h 2021-02-24 16:56:24.579489218 +0100
6585 @@ -77,7 +77,8 @@ extern const kernel_cap_t __cap_init_eff
6586 #else /* HAND-CODED capability initializers */
6588 #define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1)
6589 -#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1)
6590 +#define CAP_LAST_U32_VALID_MASK ((CAP_TO_MASK(CAP_LAST_CAP + 1) -1) \
6591 + | CAP_TO_MASK(CAP_CONTEXT))
6593 # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }})
6594 # define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }})
6595 diff -urNp -x '*.orig' linux-4.4/include/linux/cred.h linux-4.4/include/linux/cred.h
6596 --- linux-4.4/include/linux/cred.h 2021-02-24 16:56:11.859088617 +0100
6597 +++ linux-4.4/include/linux/cred.h 2021-02-24 16:56:24.579489218 +0100
6598 @@ -165,6 +165,7 @@ extern void exit_creds(struct task_struc
6599 extern int copy_creds(struct task_struct *, unsigned long);
6600 extern const struct cred *get_task_cred(struct task_struct *);
6601 extern struct cred *cred_alloc_blank(void);
6602 +extern struct cred *__prepare_creds(const struct cred *);
6603 extern struct cred *prepare_creds(void);
6604 extern struct cred *prepare_exec_creds(void);
6605 extern int commit_creds(struct cred *);
6606 @@ -225,6 +226,31 @@ static inline bool cap_ambient_invariant
6607 cred->cap_inheritable));
6610 +static inline void set_cred_subscribers(struct cred *cred, int n)
6612 +#ifdef CONFIG_DEBUG_CREDENTIALS
6613 + atomic_set(&cred->subscribers, n);
6617 +static inline int read_cred_subscribers(const struct cred *cred)
6619 +#ifdef CONFIG_DEBUG_CREDENTIALS
6620 + return atomic_read(&cred->subscribers);
6626 +static inline void alter_cred_subscribers(const struct cred *_cred, int n)
6628 +#ifdef CONFIG_DEBUG_CREDENTIALS
6629 + struct cred *cred = (struct cred *) _cred;
6631 + atomic_add(n, &cred->subscribers);
6636 * get_new_cred - Get a reference on a new set of credentials
6637 * @cred: The new credentials to reference
6638 diff -urNp -x '*.orig' linux-4.4/include/linux/dcache.h linux-4.4/include/linux/dcache.h
6639 --- linux-4.4/include/linux/dcache.h 2021-02-24 16:56:11.859088617 +0100
6640 +++ linux-4.4/include/linux/dcache.h 2021-02-24 16:56:24.579489218 +0100
6642 #include <linux/cache.h>
6643 #include <linux/rcupdate.h>
6644 #include <linux/lockref.h>
6645 +// #include <linux/vs_limit.h>
6649 @@ -352,8 +353,10 @@ extern char *dentry_path(struct dentry *
6651 static inline struct dentry *dget_dlock(struct dentry *dentry)
6655 dentry->d_lockref.count++;
6656 + // vx_dentry_inc(dentry);
6661 diff -urNp -x '*.orig' linux-4.4/include/linux/devpts_fs.h linux-4.4/include/linux/devpts_fs.h
6662 --- linux-4.4/include/linux/devpts_fs.h 2021-02-24 16:56:11.859088617 +0100
6663 +++ linux-4.4/include/linux/devpts_fs.h 2021-02-24 16:56:24.579489218 +0100
6664 @@ -35,5 +35,4 @@ void devpts_pty_kill(struct inode *inode
6669 #endif /* _LINUX_DEVPTS_FS_H */
6670 diff -urNp -x '*.orig' linux-4.4/include/linux/fs.h linux-4.4/include/linux/fs.h
6671 --- linux-4.4/include/linux/fs.h 2021-02-24 16:56:11.862422056 +0100
6672 +++ linux-4.4/include/linux/fs.h 2021-02-24 16:56:24.579489218 +0100
6673 @@ -232,6 +232,7 @@ typedef void (dax_iodone_t)(struct buffe
6674 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
6675 #define ATTR_TIMES_SET (1 << 16)
6676 #define ATTR_TOUCH (1 << 17)
6677 +#define ATTR_TAG (1 << 18)
6680 * Whiteout is represented by a char device. The following constants define the
6681 @@ -254,6 +255,7 @@ struct iattr {
6687 struct timespec ia_atime;
6688 struct timespec ia_mtime;
6689 @@ -592,7 +594,9 @@ struct inode {
6690 unsigned short i_opflags;
6693 - unsigned int i_flags;
6695 + unsigned short i_flags;
6696 + unsigned short i_vflags;
6698 #ifdef CONFIG_FS_POSIX_ACL
6699 struct posix_acl *i_acl;
6700 @@ -621,6 +625,7 @@ struct inode {
6701 unsigned int __i_nlink;
6706 struct timespec i_atime;
6707 struct timespec i_mtime;
6708 @@ -820,6 +825,11 @@ static inline gid_t i_gid_read(const str
6709 return from_kgid(&init_user_ns, inode->i_gid);
6712 +static inline vtag_t i_tag_read(const struct inode *inode)
6714 + return from_ktag(&init_user_ns, inode->i_tag);
6717 static inline void i_uid_write(struct inode *inode, uid_t uid)
6719 inode->i_uid = make_kuid(&init_user_ns, uid);
6720 @@ -830,14 +840,19 @@ static inline void i_gid_write(struct in
6721 inode->i_gid = make_kgid(&init_user_ns, gid);
6724 +static inline void i_tag_write(struct inode *inode, vtag_t tag)
6726 + inode->i_tag = make_ktag(&init_user_ns, tag);
6729 static inline unsigned iminor(const struct inode *inode)
6731 - return MINOR(inode->i_rdev);
6732 + return MINOR(inode->i_mdev);
6735 static inline unsigned imajor(const struct inode *inode)
6737 - return MAJOR(inode->i_rdev);
6738 + return MAJOR(inode->i_mdev);
6741 extern struct block_device *I_BDEV(struct inode *inode);
6742 @@ -894,6 +909,7 @@ struct file {
6744 struct fown_struct f_owner;
6745 const struct cred *f_cred;
6747 struct file_ra_state f_ra;
6750 @@ -1028,6 +1044,7 @@ struct file_lock {
6751 struct file *fl_file;
6756 struct fasync_struct * fl_fasync; /* for lease break notifications */
6757 /* for lease breaks: */
6758 @@ -1705,6 +1722,7 @@ struct inode_operations {
6759 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
6760 ssize_t (*listxattr) (struct dentry *, char *, size_t);
6761 int (*removexattr) (struct dentry *, const char *);
6762 + int (*sync_flags) (struct inode *, int, int);
6763 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
6765 int (*update_time)(struct inode *, struct timespec *, int);
6766 @@ -1719,6 +1737,7 @@ ssize_t rw_copy_check_uvector(int type,
6767 unsigned long nr_segs, unsigned long fast_segs,
6768 struct iovec *fast_pointer,
6769 struct iovec **ret_pointer);
6770 +ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
6772 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
6773 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
6774 @@ -1784,6 +1803,14 @@ struct super_operations {
6776 #define S_DAX 0 /* Make all the DAX code disappear */
6778 +#define S_IXUNLINK 16384 /* Immutable Invert on unlink */
6780 +/* Linux-VServer related Inode flags */
6784 +#define V_BARRIER 4 /* Barrier for chroot() */
6785 +#define V_COW 8 /* Copy on Write */
6788 * Note that nosuid etc flags are inode-specific: setting some file-system
6789 @@ -1808,10 +1835,13 @@ struct super_operations {
6790 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
6791 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
6792 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
6793 +#define IS_TAGGED(inode) __IS_FLG(inode, MS_TAGGED)
6795 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
6796 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
6797 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
6798 +#define IS_IXUNLINK(inode) ((inode)->i_flags & S_IXUNLINK)
6799 +#define IS_IXORUNLINK(inode) ((IS_IXUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
6800 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
6802 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
6803 @@ -1826,6 +1856,16 @@ struct super_operations {
6804 #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
6805 (inode)->i_rdev == WHITEOUT_DEV)
6807 +#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_vflags & V_BARRIER))
6809 +#ifdef CONFIG_VSERVER_COWBL
6810 +# define IS_COW(inode) (IS_IXUNLINK(inode) && IS_IMMUTABLE(inode))
6811 +# define IS_COW_LINK(inode) (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
6813 +# define IS_COW(inode) (0)
6814 +# define IS_COW_LINK(inode) (0)
6818 * Inode state bits. Protected by inode->i_lock
6820 @@ -2086,6 +2126,9 @@ extern struct kobject *fs_kobj;
6821 extern int locks_mandatory_locked(struct file *);
6822 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
6824 +#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */
6825 +#define ATTR_FLAG_IXUNLINK 1024 /* Immutable invert on unlink */
6828 * Candidates for mandatory locking have the setgid bit set
6829 * but no group execute bit - an otherwise meaningless combination.
6830 @@ -2842,6 +2885,7 @@ extern int dcache_dir_open(struct inode
6831 extern int dcache_dir_close(struct inode *, struct file *);
6832 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
6833 extern int dcache_readdir(struct file *, struct dir_context *);
6834 +extern int dcache_readdir_filter(struct file *, struct dir_context *, int (*)(struct dentry *));
6835 extern int simple_setattr(struct dentry *, struct iattr *);
6836 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
6837 extern int simple_statfs(struct dentry *, struct kstatfs *);
6838 diff -urNp -x '*.orig' linux-4.4/include/linux/init_task.h linux-4.4/include/linux/init_task.h
6839 --- linux-4.4/include/linux/init_task.h 2021-02-24 16:56:11.865755494 +0100
6840 +++ linux-4.4/include/linux/init_task.h 2021-02-24 16:56:24.579489218 +0100
6841 @@ -269,6 +269,10 @@ extern struct task_group root_task_group
6843 INIT_NUMA_BALANCING(tsk) \
6846 + .vx_info = NULL, \
6848 + .nx_info = NULL, \
6852 diff -urNp -x '*.orig' linux-4.4/include/linux/ipc.h linux-4.4/include/linux/ipc.h
6853 --- linux-4.4/include/linux/ipc.h 2016-01-11 00:01:32.000000000 +0100
6854 +++ linux-4.4/include/linux/ipc.h 2021-02-24 16:56:24.579489218 +0100
6855 @@ -16,6 +16,7 @@ struct kern_ipc_perm
6863 diff -urNp -x '*.orig' linux-4.4/include/linux/memcontrol.h linux-4.4/include/linux/memcontrol.h
6864 --- linux-4.4/include/linux/memcontrol.h 2021-02-24 16:56:11.872422371 +0100
6865 +++ linux-4.4/include/linux/memcontrol.h 2021-02-24 16:56:24.579489218 +0100
6866 @@ -113,6 +113,7 @@ struct cg_proto {
6867 struct mem_cgroup *memcg;
6872 struct mem_cgroup_stat_cpu {
6873 long count[MEM_CGROUP_STAT_NSTATS];
6874 @@ -338,6 +339,12 @@ static inline bool mem_cgroup_is_descend
6875 return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
6878 +extern unsigned long mem_cgroup_mem_usage_pages(struct mem_cgroup *memcg);
6879 +extern unsigned long mem_cgroup_mem_limit_pages(struct mem_cgroup *memcg);
6880 +extern unsigned long mem_cgroup_memsw_usage_pages(struct mem_cgroup *memcg);
6881 +extern unsigned long mem_cgroup_memsw_limit_pages(struct mem_cgroup *memcg);
6882 +extern void dump_mem_cgroup(struct mem_cgroup *memcg);
6884 static inline bool mm_match_cgroup(struct mm_struct *mm,
6885 struct mem_cgroup *memcg)
6887 diff -urNp -x '*.orig' linux-4.4/include/linux/mount.h linux-4.4/include/linux/mount.h
6888 --- linux-4.4/include/linux/mount.h 2021-02-24 16:56:11.875755809 +0100
6889 +++ linux-4.4/include/linux/mount.h 2021-02-24 16:56:24.579489218 +0100
6890 @@ -63,6 +63,9 @@ struct mnt_namespace;
6891 #define MNT_MARKED 0x4000000
6892 #define MNT_UMOUNT 0x8000000
6894 +#define MNT_TAGID 0x10000
6895 +#define MNT_NOTAG 0x20000
6898 struct dentry *mnt_root; /* root of the mounted tree */
6899 struct super_block *mnt_sb; /* pointer to superblock */
6900 diff -urNp -x '*.orig' linux-4.4/include/linux/net.h linux-4.4/include/linux/net.h
6901 --- linux-4.4/include/linux/net.h 2021-02-24 16:56:11.879089247 +0100
6902 +++ linux-4.4/include/linux/net.h 2021-02-24 16:56:24.582822656 +0100
6903 @@ -43,6 +43,7 @@ struct net;
6904 #define SOCK_NOSPACE 2
6905 #define SOCK_PASSCRED 3
6906 #define SOCK_PASSSEC 4
6907 +#define SOCK_USER_SOCKET 5
6909 #ifndef ARCH_HAS_SOCKET_TYPES
6911 diff -urNp -x '*.orig' linux-4.4/include/linux/netdevice.h linux-4.4/include/linux/netdevice.h
6912 --- linux-4.4/include/linux/netdevice.h 2021-02-24 16:56:12.542443470 +0100
6913 +++ linux-4.4/include/linux/netdevice.h 2021-02-24 16:56:24.579489218 +0100
6914 @@ -2311,6 +2311,7 @@ static inline int dev_recursion_level(vo
6916 struct net_device *dev_get_by_index(struct net *net, int ifindex);
6917 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
6918 +struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex);
6919 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
6920 int netdev_get_name(struct net *net, char *name, int ifindex);
6921 int dev_restart(struct net_device *dev);
6922 diff -urNp -x '*.orig' linux-4.4/include/linux/nsproxy.h linux-4.4/include/linux/nsproxy.h
6923 --- linux-4.4/include/linux/nsproxy.h 2016-01-11 00:01:32.000000000 +0100
6924 +++ linux-4.4/include/linux/nsproxy.h 2021-02-24 16:56:24.582822656 +0100
6927 #include <linux/spinlock.h>
6928 #include <linux/sched.h>
6929 +#include <linux/vserver/debug.h>
6931 struct mnt_namespace;
6932 struct uts_namespace;
6933 @@ -63,6 +64,7 @@ extern struct nsproxy init_nsproxy;
6936 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
6937 +struct nsproxy *copy_nsproxy(struct nsproxy *orig);
6938 void exit_task_namespaces(struct task_struct *tsk);
6939 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
6940 void free_nsproxy(struct nsproxy *ns);
6941 @@ -70,16 +72,26 @@ int unshare_nsproxy_namespaces(unsigned
6942 struct cred *, struct fs_struct *);
6943 int __init nsproxy_cache_init(void);
6945 -static inline void put_nsproxy(struct nsproxy *ns)
6946 +#define get_nsproxy(n) __get_nsproxy(n, __FILE__, __LINE__)
6948 +static inline void __get_nsproxy(struct nsproxy *ns,
6949 + const char *_file, int _line)
6951 - if (atomic_dec_and_test(&ns->count)) {
6954 + vxlprintk(VXD_CBIT(space, 0), "get_nsproxy(%p[%u])",
6955 + ns, atomic_read(&ns->count), _file, _line);
6956 + atomic_inc(&ns->count);
6959 -static inline void get_nsproxy(struct nsproxy *ns)
6960 +#define put_nsproxy(n) __put_nsproxy(n, __FILE__, __LINE__)
6962 +static inline void __put_nsproxy(struct nsproxy *ns,
6963 + const char *_file, int _line)
6965 - atomic_inc(&ns->count);
6966 + vxlprintk(VXD_CBIT(space, 0), "put_nsproxy(%p[%u])",
6967 + ns, atomic_read(&ns->count), _file, _line);
6968 + if (atomic_dec_and_test(&ns->count)) {
6974 diff -urNp -x '*.orig' linux-4.4/include/linux/pid.h linux-4.4/include/linux/pid.h
6975 --- linux-4.4/include/linux/pid.h 2021-02-24 16:56:11.882422685 +0100
6976 +++ linux-4.4/include/linux/pid.h 2021-02-24 16:56:24.582822656 +0100
6977 @@ -10,7 +10,8 @@ enum pid_type
6980 /* only valid to __task_pid_nr_ns() */
6987 @@ -172,6 +173,7 @@ static inline pid_t pid_nr(struct pid *p
6990 pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
6991 +pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns);
6992 pid_t pid_vnr(struct pid *pid);
6994 #define do_each_pid_task(pid, type, task) \
6995 diff -urNp -x '*.orig' linux-4.4/include/linux/quotaops.h linux-4.4/include/linux/quotaops.h
6996 --- linux-4.4/include/linux/quotaops.h 2021-02-24 16:56:11.885756124 +0100
6997 +++ linux-4.4/include/linux/quotaops.h 2021-02-24 16:56:24.582822656 +0100
6999 #define _LINUX_QUOTAOPS_
7001 #include <linux/fs.h>
7002 +#include <linux/vs_dlimit.h>
7004 #define DQUOT_SPACE_WARN 0x1
7005 #define DQUOT_SPACE_RESERVE 0x2
7006 @@ -221,11 +222,12 @@ static inline void dquot_drop(struct ino
7008 static inline int dquot_alloc_inode(struct inode *inode)
7011 + return dl_alloc_inode(inode);
7014 static inline void dquot_free_inode(struct inode *inode)
7016 + dl_free_inode(inode);
7019 static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
7020 @@ -236,6 +238,10 @@ static inline int dquot_transfer(struct
7021 static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
7026 + if ((ret = dl_alloc_space(inode, number)))
7028 if (!(flags & DQUOT_SPACE_RESERVE))
7029 inode_add_bytes(inode, number);
7031 @@ -246,6 +252,7 @@ static inline void __dquot_free_space(st
7033 if (!(flags & DQUOT_SPACE_RESERVE))
7034 inode_sub_bytes(inode, number);
7035 + dl_free_space(inode, number);
7038 static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
7039 diff -urNp -x '*.orig' linux-4.4/include/linux/sched.h linux-4.4/include/linux/sched.h
7040 --- linux-4.4/include/linux/sched.h 2021-02-24 16:56:11.889089562 +0100
7041 +++ linux-4.4/include/linux/sched.h 2021-02-24 16:56:24.582822656 +0100
7042 @@ -1611,6 +1611,14 @@ struct task_struct {
7044 struct seccomp seccomp;
7046 +/* vserver context data */
7047 + struct vx_info *vx_info;
7048 + struct nx_info *nx_info;
7054 /* Thread group tracking */
7057 @@ -1940,6 +1948,11 @@ struct pid_namespace;
7058 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
7059 struct pid_namespace *ns);
7061 +#include <linux/vserver/base.h>
7062 +#include <linux/vserver/context.h>
7063 +#include <linux/vserver/debug.h>
7064 +#include <linux/vserver/pid.h>
7066 static inline pid_t task_pid_nr(struct task_struct *tsk)
7069 @@ -1953,7 +1966,8 @@ static inline pid_t task_pid_nr_ns(struc
7071 static inline pid_t task_pid_vnr(struct task_struct *tsk)
7073 - return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
7074 + // return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
7075 + return vx_map_pid(__task_pid_nr_ns(tsk, PIDTYPE_PID, NULL));
7079 diff -urNp -x '*.orig' linux-4.4/include/linux/shmem_fs.h linux-4.4/include/linux/shmem_fs.h
7080 --- linux-4.4/include/linux/shmem_fs.h 2021-02-24 16:56:11.889089562 +0100
7081 +++ linux-4.4/include/linux/shmem_fs.h 2021-02-24 16:56:24.582822656 +0100
7084 /* inode in-kernel data */
7086 +#define TMPFS_SUPER_MAGIC 0x01021994
7089 struct shmem_inode_info {
7091 unsigned int seals; /* shmem seals */
7092 diff -urNp -x '*.orig' linux-4.4/include/linux/stat.h linux-4.4/include/linux/stat.h
7093 --- linux-4.4/include/linux/stat.h 2016-01-11 00:01:32.000000000 +0100
7094 +++ linux-4.4/include/linux/stat.h 2021-02-24 16:56:24.582822656 +0100
7095 @@ -25,6 +25,7 @@ struct kstat {
7102 struct timespec atime;
7103 diff -urNp -x '*.orig' linux-4.4/include/linux/sunrpc/auth.h linux-4.4/include/linux/sunrpc/auth.h
7104 --- linux-4.4/include/linux/sunrpc/auth.h 2016-01-11 00:01:32.000000000 +0100
7105 +++ linux-4.4/include/linux/sunrpc/auth.h 2021-02-24 16:56:24.582822656 +0100
7106 @@ -40,6 +40,7 @@ enum {
7111 struct group_info *group_info;
7112 const char *principal;
7113 unsigned long ac_flags;
7114 diff -urNp -x '*.orig' linux-4.4/include/linux/sunrpc/clnt.h linux-4.4/include/linux/sunrpc/clnt.h
7115 --- linux-4.4/include/linux/sunrpc/clnt.h 2021-02-24 16:56:11.892423000 +0100
7116 +++ linux-4.4/include/linux/sunrpc/clnt.h 2021-02-24 16:56:24.582822656 +0100
7117 @@ -51,7 +51,8 @@ struct rpc_clnt {
7118 cl_discrtry : 1,/* disconnect before retry */
7119 cl_noretranstimeo: 1,/* No retransmit timeouts */
7120 cl_autobind : 1,/* use getport() */
7121 - cl_chatty : 1;/* be verbose */
7122 + cl_chatty : 1,/* be verbose */
7123 + cl_tag : 1;/* context tagging */
7125 struct rpc_rtt * cl_rtt; /* RTO estimator data */
7126 const struct rpc_timeout *cl_timeout; /* Timeout strategy */
7127 diff -urNp -x '*.orig' linux-4.4/include/linux/types.h linux-4.4/include/linux/types.h
7128 --- linux-4.4/include/linux/types.h 2016-01-11 00:01:32.000000000 +0100
7129 +++ linux-4.4/include/linux/types.h 2021-02-24 16:56:24.582822656 +0100
7130 @@ -32,6 +32,9 @@ typedef __kernel_uid32_t uid_t;
7131 typedef __kernel_gid32_t gid_t;
7132 typedef __kernel_uid16_t uid16_t;
7133 typedef __kernel_gid16_t gid16_t;
7134 +typedef unsigned int vxid_t;
7135 +typedef unsigned int vnid_t;
7136 +typedef unsigned int vtag_t;
7138 typedef unsigned long uintptr_t;
7140 diff -urNp -x '*.orig' linux-4.4/include/linux/uidgid.h linux-4.4/include/linux/uidgid.h
7141 --- linux-4.4/include/linux/uidgid.h 2016-01-11 00:01:32.000000000 +0100
7142 +++ linux-4.4/include/linux/uidgid.h 2021-02-24 16:56:24.582822656 +0100
7143 @@ -21,13 +21,17 @@ typedef struct {
7156 #define KUIDT_INIT(value) (kuid_t){ value }
7157 #define KGIDT_INIT(value) (kgid_t){ value }
7158 +#define KTAGT_INIT(value) (ktag_t){ value }
7160 #ifdef CONFIG_MULTIUSER
7161 static inline uid_t __kuid_val(kuid_t uid)
7162 @@ -51,11 +55,18 @@ static inline gid_t __kgid_val(kgid_t gi
7166 +static inline vtag_t __ktag_val(ktag_t tag)
7171 #define GLOBAL_ROOT_UID KUIDT_INIT(0)
7172 #define GLOBAL_ROOT_GID KGIDT_INIT(0)
7173 +#define GLOBAL_ROOT_TAG KTAGT_INIT(0)
7175 #define INVALID_UID KUIDT_INIT(-1)
7176 #define INVALID_GID KGIDT_INIT(-1)
7177 +#define INVALID_TAG KTAGT_INIT(-1)
7179 static inline bool uid_eq(kuid_t left, kuid_t right)
7181 @@ -67,6 +78,11 @@ static inline bool gid_eq(kgid_t left, k
7182 return __kgid_val(left) == __kgid_val(right);
7185 +static inline bool tag_eq(ktag_t left, ktag_t right)
7187 + return __ktag_val(left) == __ktag_val(right);
7190 static inline bool uid_gt(kuid_t left, kuid_t right)
7192 return __kuid_val(left) > __kuid_val(right);
7193 @@ -117,13 +133,21 @@ static inline bool gid_valid(kgid_t gid)
7194 return __kgid_val(gid) != (gid_t) -1;
7197 +static inline bool tag_valid(ktag_t tag)
7199 + return !tag_eq(tag, INVALID_TAG);
7202 #ifdef CONFIG_USER_NS
7204 extern kuid_t make_kuid(struct user_namespace *from, uid_t uid);
7205 extern kgid_t make_kgid(struct user_namespace *from, gid_t gid);
7206 +extern ktag_t make_ktag(struct user_namespace *from, gid_t gid);
7208 extern uid_t from_kuid(struct user_namespace *to, kuid_t uid);
7209 extern gid_t from_kgid(struct user_namespace *to, kgid_t gid);
7210 +extern vtag_t from_ktag(struct user_namespace *to, ktag_t tag);
7212 extern uid_t from_kuid_munged(struct user_namespace *to, kuid_t uid);
7213 extern gid_t from_kgid_munged(struct user_namespace *to, kgid_t gid);
7215 @@ -149,6 +173,11 @@ static inline kgid_t make_kgid(struct us
7216 return KGIDT_INIT(gid);
7219 +static inline ktag_t make_ktag(struct user_namespace *from, vtag_t tag)
7221 + return KTAGT_INIT(tag);
7224 static inline uid_t from_kuid(struct user_namespace *to, kuid_t kuid)
7226 return __kuid_val(kuid);
7227 @@ -159,6 +188,11 @@ static inline gid_t from_kgid(struct use
7228 return __kgid_val(kgid);
7231 +static inline vtag_t from_ktag(struct user_namespace *to, ktag_t ktag)
7233 + return __ktag_val(ktag);
7236 static inline uid_t from_kuid_munged(struct user_namespace *to, kuid_t kuid)
7238 uid_t uid = from_kuid(to, kuid);
7239 diff -urNp -x '*.orig' linux-4.4/include/linux/vroot.h linux-4.4/include/linux/vroot.h
7240 --- linux-4.4/include/linux/vroot.h 1970-01-01 01:00:00.000000000 +0100
7241 +++ linux-4.4/include/linux/vroot.h 2021-02-24 16:56:24.582822656 +0100
7245 + * include/linux/vroot.h
7247 + * written by Herbert P?tzl, 9/11/2002
7248 + * ported to 2.6 by Herbert P?tzl, 30/12/2004
7250 + * Copyright (C) 2002-2007 by Herbert P?tzl.
7251 + * Redistribution of this file is permitted under the
7252 + * GNU General Public License.
7255 +#ifndef _LINUX_VROOT_H
7256 +#define _LINUX_VROOT_H
7261 +/* Possible states of device */
7267 +struct vroot_device {
7271 + struct semaphore vr_ctl_mutex;
7272 + struct block_device *vr_device;
7277 +typedef struct block_device *(vroot_grb_func)(struct block_device *);
7279 +extern int register_vroot_grb(vroot_grb_func *);
7280 +extern int unregister_vroot_grb(vroot_grb_func *);
7282 +#endif /* __KERNEL__ */
7284 +#define MAX_VROOT_DEFAULT 8
7287 + * IOCTL commands --- we will commandeer 0x56 ('V')
7290 +#define VROOT_SET_DEV 0x5600
7291 +#define VROOT_CLR_DEV 0x5601
7293 +#endif /* _LINUX_VROOT_H */
7294 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_base.h linux-4.4/include/linux/vs_base.h
7295 --- linux-4.4/include/linux/vs_base.h 1970-01-01 01:00:00.000000000 +0100
7296 +++ linux-4.4/include/linux/vs_base.h 2021-02-24 16:56:24.582822656 +0100
7301 +#include "vserver/base.h"
7302 +#include "vserver/check.h"
7303 +#include "vserver/debug.h"
7306 +#warning duplicate inclusion
7308 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_context.h linux-4.4/include/linux/vs_context.h
7309 --- linux-4.4/include/linux/vs_context.h 1970-01-01 01:00:00.000000000 +0100
7310 +++ linux-4.4/include/linux/vs_context.h 2021-02-24 16:56:24.582822656 +0100
7312 +#ifndef _VS_CONTEXT_H
7313 +#define _VS_CONTEXT_H
7315 +#include "vserver/base.h"
7316 +#include "vserver/check.h"
7317 +#include "vserver/context.h"
7318 +#include "vserver/history.h"
7319 +#include "vserver/debug.h"
7321 +#include <linux/sched.h>
7324 +#define get_vx_info(i) __get_vx_info(i, __FILE__, __LINE__, __HERE__)
7326 +static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
7327 + const char *_file, int _line, void *_here)
7332 + vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
7333 + vxi, vxi ? vxi->vx_id : 0,
7334 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7336 + __vxh_get_vx_info(vxi, _here);
7338 + atomic_inc(&vxi->vx_usecnt);
7343 +extern void free_vx_info(struct vx_info *);
7345 +#define put_vx_info(i) __put_vx_info(i, __FILE__, __LINE__, __HERE__)
7347 +static inline void __put_vx_info(struct vx_info *vxi,
7348 + const char *_file, int _line, void *_here)
7353 + vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
7354 + vxi, vxi ? vxi->vx_id : 0,
7355 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7357 + __vxh_put_vx_info(vxi, _here);
7359 + if (atomic_dec_and_test(&vxi->vx_usecnt))
7360 + free_vx_info(vxi);
7364 +#define init_vx_info(p, i) \
7365 + __init_vx_info(p, i, __FILE__, __LINE__, __HERE__)
7367 +static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
7368 + const char *_file, int _line, void *_here)
7371 + vxlprintk(VXD_CBIT(xid, 3),
7372 + "init_vx_info(%p[#%d.%d])",
7373 + vxi, vxi ? vxi->vx_id : 0,
7374 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7376 + __vxh_init_vx_info(vxi, vxp, _here);
7378 + atomic_inc(&vxi->vx_usecnt);
7384 +#define set_vx_info(p, i) \
7385 + __set_vx_info(p, i, __FILE__, __LINE__, __HERE__)
7387 +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
7388 + const char *_file, int _line, void *_here)
7390 + struct vx_info *vxo;
7395 + vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
7396 + vxi, vxi ? vxi->vx_id : 0,
7397 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7399 + __vxh_set_vx_info(vxi, vxp, _here);
7401 + atomic_inc(&vxi->vx_usecnt);
7402 + vxo = xchg(vxp, vxi);
7407 +#define clr_vx_info(p) __clr_vx_info(p, __FILE__, __LINE__, __HERE__)
7409 +static inline void __clr_vx_info(struct vx_info **vxp,
7410 + const char *_file, int _line, void *_here)
7412 + struct vx_info *vxo;
7414 + vxo = xchg(vxp, NULL);
7418 + vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
7419 + vxo, vxo ? vxo->vx_id : 0,
7420 + vxo ? atomic_read(&vxo->vx_usecnt) : 0,
7422 + __vxh_clr_vx_info(vxo, vxp, _here);
7424 + if (atomic_dec_and_test(&vxo->vx_usecnt))
7425 + free_vx_info(vxo);
7429 +#define claim_vx_info(v, p) \
7430 + __claim_vx_info(v, p, __FILE__, __LINE__, __HERE__)
7432 +static inline void __claim_vx_info(struct vx_info *vxi,
7433 + struct task_struct *task,
7434 + const char *_file, int _line, void *_here)
7436 + vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
7437 + vxi, vxi ? vxi->vx_id : 0,
7438 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7439 + vxi ? atomic_read(&vxi->vx_tasks) : 0,
7440 + task, _file, _line);
7441 + __vxh_claim_vx_info(vxi, task, _here);
7443 + atomic_inc(&vxi->vx_tasks);
7447 +extern void unhash_vx_info(struct vx_info *);
7449 +#define release_vx_info(v, p) \
7450 + __release_vx_info(v, p, __FILE__, __LINE__, __HERE__)
7452 +static inline void __release_vx_info(struct vx_info *vxi,
7453 + struct task_struct *task,
7454 + const char *_file, int _line, void *_here)
7456 + vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
7457 + vxi, vxi ? vxi->vx_id : 0,
7458 + vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7459 + vxi ? atomic_read(&vxi->vx_tasks) : 0,
7460 + task, _file, _line);
7461 + __vxh_release_vx_info(vxi, task, _here);
7465 + if (atomic_dec_and_test(&vxi->vx_tasks))
7466 + unhash_vx_info(vxi);
7470 +#define task_get_vx_info(p) \
7471 + __task_get_vx_info(p, __FILE__, __LINE__, __HERE__)
7473 +static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
7474 + const char *_file, int _line, void *_here)
7476 + struct vx_info *vxi;
7479 + vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
7481 + vxi = __get_vx_info(p->vx_info, _file, _line, _here);
7487 +static inline void __wakeup_vx_info(struct vx_info *vxi)
7489 + if (waitqueue_active(&vxi->vx_wait))
7490 + wake_up_interruptible(&vxi->vx_wait);
7494 +#define enter_vx_info(v, s) __enter_vx_info(v, s, __FILE__, __LINE__)
7496 +static inline void __enter_vx_info(struct vx_info *vxi,
7497 + struct vx_info_save *vxis, const char *_file, int _line)
7499 + vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
7500 + vxi, vxi ? vxi->vx_id : 0, vxis, current,
7501 + current->xid, current->vx_info, _file, _line);
7502 + vxis->vxi = xchg(¤t->vx_info, vxi);
7503 + vxis->xid = current->xid;
7504 + current->xid = vxi ? vxi->vx_id : 0;
7507 +#define leave_vx_info(s) __leave_vx_info(s, __FILE__, __LINE__)
7509 +static inline void __leave_vx_info(struct vx_info_save *vxis,
7510 + const char *_file, int _line)
7512 + vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
7513 + vxis, vxis->xid, vxis->vxi, current,
7514 + current->xid, current->vx_info, _file, _line);
7515 + (void)xchg(¤t->vx_info, vxis->vxi);
7516 + current->xid = vxis->xid;
7520 +static inline void __enter_vx_admin(struct vx_info_save *vxis)
7522 + vxis->vxi = xchg(¤t->vx_info, NULL);
7523 + vxis->xid = xchg(¤t->xid, (vxid_t)0);
7526 +static inline void __leave_vx_admin(struct vx_info_save *vxis)
7528 + (void)xchg(¤t->xid, vxis->xid);
7529 + (void)xchg(¤t->vx_info, vxis->vxi);
7532 +#define task_is_init(p) \
7533 + __task_is_init(p, __FILE__, __LINE__, __HERE__)
7535 +static inline int __task_is_init(struct task_struct *p,
7536 + const char *_file, int _line, void *_here)
7538 + int is_init = is_global_init(p);
7542 + is_init = p->vx_info->vx_initpid == p->pid;
7547 +extern void exit_vx_info(struct task_struct *, int);
7548 +extern void exit_vx_info_early(struct task_struct *, int);
7552 +#warning duplicate inclusion
7554 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_cowbl.h linux-4.4/include/linux/vs_cowbl.h
7555 --- linux-4.4/include/linux/vs_cowbl.h 1970-01-01 01:00:00.000000000 +0100
7556 +++ linux-4.4/include/linux/vs_cowbl.h 2021-02-24 16:56:24.582822656 +0100
7558 +#ifndef _VS_COWBL_H
7559 +#define _VS_COWBL_H
7561 +#include <linux/fs.h>
7562 +#include <linux/dcache.h>
7563 +#include <linux/namei.h>
7564 +#include <linux/slab.h>
7566 +extern struct dentry *cow_break_link(const char *pathname);
7568 +static inline int cow_check_and_break(struct path *path)
7570 + struct inode *inode = path->dentry->d_inode;
7573 + /* do we need this check? */
7574 + if (IS_RDONLY(inode))
7577 + if (IS_COW(inode)) {
7578 + if (IS_COW_LINK(inode)) {
7579 + struct dentry *new_dentry, *old_dentry = path->dentry;
7582 + buf = kmalloc(PATH_MAX, GFP_KERNEL);
7586 + pp = d_path(path, buf, PATH_MAX);
7587 + new_dentry = cow_break_link(pp);
7589 + if (!IS_ERR(new_dentry)) {
7590 + path->dentry = new_dentry;
7593 + error = PTR_ERR(new_dentry);
7595 + inode->i_flags &= ~(S_IXUNLINK | S_IMMUTABLE);
7596 + inode->i_ctime = CURRENT_TIME;
7597 + mark_inode_dirty(inode);
7604 +#warning duplicate inclusion
7606 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_cvirt.h linux-4.4/include/linux/vs_cvirt.h
7607 --- linux-4.4/include/linux/vs_cvirt.h 1970-01-01 01:00:00.000000000 +0100
7608 +++ linux-4.4/include/linux/vs_cvirt.h 2021-02-24 16:56:24.582822656 +0100
7610 +#ifndef _VS_CVIRT_H
7611 +#define _VS_CVIRT_H
7613 +#include "vserver/cvirt.h"
7614 +#include "vserver/context.h"
7615 +#include "vserver/base.h"
7616 +#include "vserver/check.h"
7617 +#include "vserver/debug.h"
7620 +static inline void vx_activate_task(struct task_struct *p)
7622 + struct vx_info *vxi;
7624 + if ((vxi = p->vx_info)) {
7625 + vx_update_load(vxi);
7626 + atomic_inc(&vxi->cvirt.nr_running);
7630 +static inline void vx_deactivate_task(struct task_struct *p)
7632 + struct vx_info *vxi;
7634 + if ((vxi = p->vx_info)) {
7635 + vx_update_load(vxi);
7636 + atomic_dec(&vxi->cvirt.nr_running);
7640 +static inline void vx_uninterruptible_inc(struct task_struct *p)
7642 + struct vx_info *vxi;
7644 + if ((vxi = p->vx_info))
7645 + atomic_inc(&vxi->cvirt.nr_uninterruptible);
7648 +static inline void vx_uninterruptible_dec(struct task_struct *p)
7650 + struct vx_info *vxi;
7652 + if ((vxi = p->vx_info))
7653 + atomic_dec(&vxi->cvirt.nr_uninterruptible);
7658 +#warning duplicate inclusion
7660 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_device.h linux-4.4/include/linux/vs_device.h
7661 --- linux-4.4/include/linux/vs_device.h 1970-01-01 01:00:00.000000000 +0100
7662 +++ linux-4.4/include/linux/vs_device.h 2021-02-24 16:56:24.582822656 +0100
7664 +#ifndef _VS_DEVICE_H
7665 +#define _VS_DEVICE_H
7667 +#include "vserver/base.h"
7668 +#include "vserver/device.h"
7669 +#include "vserver/debug.h"
7672 +#ifdef CONFIG_VSERVER_DEVICE
7674 +int vs_map_device(struct vx_info *, dev_t, dev_t *, umode_t);
7676 +#define vs_device_perm(v, d, m, p) \
7677 + ((vs_map_device(current_vx_info(), d, NULL, m) & (p)) == (p))
7682 +int vs_map_device(struct vx_info *vxi,
7683 + dev_t device, dev_t *target, umode_t mode)
7690 +#define vs_device_perm(v, d, m, p) ((p) == (p))
7695 +#define vs_map_chrdev(d, t, p) \
7696 + ((vs_map_device(current_vx_info(), d, t, S_IFCHR) & (p)) == (p))
7697 +#define vs_map_blkdev(d, t, p) \
7698 + ((vs_map_device(current_vx_info(), d, t, S_IFBLK) & (p)) == (p))
7700 +#define vs_chrdev_perm(d, p) \
7701 + vs_device_perm(current_vx_info(), d, S_IFCHR, p)
7702 +#define vs_blkdev_perm(d, p) \
7703 + vs_device_perm(current_vx_info(), d, S_IFBLK, p)
7707 +#warning duplicate inclusion
7709 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_dlimit.h linux-4.4/include/linux/vs_dlimit.h
7710 --- linux-4.4/include/linux/vs_dlimit.h 1970-01-01 01:00:00.000000000 +0100
7711 +++ linux-4.4/include/linux/vs_dlimit.h 2021-02-24 16:56:24.582822656 +0100
7713 +#ifndef _VS_DLIMIT_H
7714 +#define _VS_DLIMIT_H
7716 +#include <linux/fs.h>
7718 +#include "vserver/dlimit.h"
7719 +#include "vserver/base.h"
7720 +#include "vserver/debug.h"
7723 +#define get_dl_info(i) __get_dl_info(i, __FILE__, __LINE__)
7725 +static inline struct dl_info *__get_dl_info(struct dl_info *dli,
7726 + const char *_file, int _line)
7730 + vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
7731 + dli, dli ? dli->dl_tag : 0,
7732 + dli ? atomic_read(&dli->dl_usecnt) : 0,
7734 + atomic_inc(&dli->dl_usecnt);
7739 +#define free_dl_info(i) \
7740 + call_rcu(&(i)->dl_rcu, rcu_free_dl_info)
7742 +#define put_dl_info(i) __put_dl_info(i, __FILE__, __LINE__)
7744 +static inline void __put_dl_info(struct dl_info *dli,
7745 + const char *_file, int _line)
7749 + vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
7750 + dli, dli ? dli->dl_tag : 0,
7751 + dli ? atomic_read(&dli->dl_usecnt) : 0,
7753 + if (atomic_dec_and_test(&dli->dl_usecnt))
7754 + free_dl_info(dli);
7758 +#define __dlimit_char(d) ((d) ? '*' : ' ')
7760 +static inline int __dl_alloc_space(struct super_block *sb,
7761 + vtag_t tag, dlsize_t nr, const char *file, int line)
7763 + struct dl_info *dli = NULL;
7768 + dli = locate_dl_info(sb, tag);
7772 + spin_lock(&dli->dl_lock);
7773 + ret = (dli->dl_space_used + nr > dli->dl_space_total);
7775 + dli->dl_space_used += nr;
7776 + spin_unlock(&dli->dl_lock);
7779 + vxlprintk(VXD_CBIT(dlim, 1),
7780 + "ALLOC (%p,#%d)%c %lld bytes (%d)",
7781 + sb, tag, __dlimit_char(dli), (long long)nr,
7783 + return ret ? -ENOSPC : 0;
7786 +static inline void __dl_free_space(struct super_block *sb,
7787 + vtag_t tag, dlsize_t nr, const char *_file, int _line)
7789 + struct dl_info *dli = NULL;
7793 + dli = locate_dl_info(sb, tag);
7797 + spin_lock(&dli->dl_lock);
7798 + if (dli->dl_space_used > nr)
7799 + dli->dl_space_used -= nr;
7801 + dli->dl_space_used = 0;
7802 + spin_unlock(&dli->dl_lock);
7805 + vxlprintk(VXD_CBIT(dlim, 1),
7806 + "FREE (%p,#%d)%c %lld bytes",
7807 + sb, tag, __dlimit_char(dli), (long long)nr,
7811 +static inline int __dl_alloc_inode(struct super_block *sb,
7812 + vtag_t tag, const char *_file, int _line)
7814 + struct dl_info *dli;
7817 + dli = locate_dl_info(sb, tag);
7821 + spin_lock(&dli->dl_lock);
7822 + dli->dl_inodes_used++;
7823 + ret = (dli->dl_inodes_used > dli->dl_inodes_total);
7824 + spin_unlock(&dli->dl_lock);
7827 + vxlprintk(VXD_CBIT(dlim, 0),
7828 + "ALLOC (%p,#%d)%c inode (%d)",
7829 + sb, tag, __dlimit_char(dli), ret, _file, _line);
7830 + return ret ? -ENOSPC : 0;
7833 +static inline void __dl_free_inode(struct super_block *sb,
7834 + vtag_t tag, const char *_file, int _line)
7836 + struct dl_info *dli;
7838 + dli = locate_dl_info(sb, tag);
7842 + spin_lock(&dli->dl_lock);
7843 + if (dli->dl_inodes_used > 1)
7844 + dli->dl_inodes_used--;
7846 + dli->dl_inodes_used = 0;
7847 + spin_unlock(&dli->dl_lock);
7850 + vxlprintk(VXD_CBIT(dlim, 0),
7851 + "FREE (%p,#%d)%c inode",
7852 + sb, tag, __dlimit_char(dli), _file, _line);
7855 +static inline void __dl_adjust_block(struct super_block *sb, vtag_t tag,
7856 + unsigned long long *free_blocks, unsigned long long *root_blocks,
7857 + const char *_file, int _line)
7859 + struct dl_info *dli;
7860 + uint64_t broot, bfree;
7862 + dli = locate_dl_info(sb, tag);
7866 + spin_lock(&dli->dl_lock);
7867 + broot = (dli->dl_space_total -
7868 + (dli->dl_space_total >> 10) * dli->dl_nrlmult)
7869 + >> sb->s_blocksize_bits;
7870 + bfree = (dli->dl_space_total - dli->dl_space_used)
7871 + >> sb->s_blocksize_bits;
7872 + spin_unlock(&dli->dl_lock);
7874 + vxlprintk(VXD_CBIT(dlim, 2),
7875 + "ADJUST: %lld,%lld on %lld,%lld [mult=%d]",
7876 + (long long)bfree, (long long)broot,
7877 + *free_blocks, *root_blocks, dli->dl_nrlmult,
7879 + if (free_blocks) {
7880 + if (*free_blocks > bfree)
7881 + *free_blocks = bfree;
7883 + if (root_blocks) {
7884 + if (*root_blocks > broot)
7885 + *root_blocks = broot;
7890 +#define dl_prealloc_space(in, bytes) \
7891 + __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
7892 + __FILE__, __LINE__ )
7894 +#define dl_alloc_space(in, bytes) \
7895 + __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
7896 + __FILE__, __LINE__ )
7898 +#define dl_reserve_space(in, bytes) \
7899 + __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
7900 + __FILE__, __LINE__ )
7902 +#define dl_claim_space(in, bytes) (0)
7904 +#define dl_release_space(in, bytes) \
7905 + __dl_free_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
7906 + __FILE__, __LINE__ )
7908 +#define dl_free_space(in, bytes) \
7909 + __dl_free_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
7910 + __FILE__, __LINE__ )
7914 +#define dl_alloc_inode(in) \
7915 + __dl_alloc_inode((in)->i_sb, i_tag_read(in), __FILE__, __LINE__ )
7917 +#define dl_free_inode(in) \
7918 + __dl_free_inode((in)->i_sb, i_tag_read(in), __FILE__, __LINE__ )
7921 +#define dl_adjust_block(sb, tag, fb, rb) \
7922 + __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
7926 +#warning duplicate inclusion
7928 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_inet.h linux-4.4/include/linux/vs_inet.h
7929 --- linux-4.4/include/linux/vs_inet.h 1970-01-01 01:00:00.000000000 +0100
7930 +++ linux-4.4/include/linux/vs_inet.h 2021-02-24 16:56:24.589489533 +0100
7935 +#include "vserver/base.h"
7936 +#include "vserver/network.h"
7937 +#include "vserver/debug.h"
7939 +#define IPI_LOOPBACK htonl(INADDR_LOOPBACK)
7941 +#define NXAV4(a) NIPQUAD((a)->ip[0]), NIPQUAD((a)->ip[1]), \
7942 + NIPQUAD((a)->mask), (a)->type
7943 +#define NXAV4_FMT "[" NIPQUAD_FMT "-" NIPQUAD_FMT "/" NIPQUAD_FMT ":%04x]"
7945 +#define NIPQUAD(addr) \
7946 + ((unsigned char *)&addr)[0], \
7947 + ((unsigned char *)&addr)[1], \
7948 + ((unsigned char *)&addr)[2], \
7949 + ((unsigned char *)&addr)[3]
7951 +#define NIPQUAD_FMT "%u.%u.%u.%u"
7955 +int v4_addr_match(struct nx_addr_v4 *nxa, __be32 addr, uint16_t tmask)
7957 + __be32 ip = nxa->ip[0].s_addr;
7958 + __be32 mask = nxa->mask.s_addr;
7959 + __be32 bcast = ip | ~mask;
7962 + switch (nxa->type & tmask) {
7963 + case NXA_TYPE_MASK:
7964 + ret = (ip == (addr & mask));
7966 + case NXA_TYPE_ADDR:
7970 + /* fall through to broadcast */
7971 + case NXA_MOD_BCAST:
7972 + ret = ((tmask & NXA_MOD_BCAST) && (addr == bcast));
7974 + case NXA_TYPE_RANGE:
7975 + ret = ((nxa->ip[0].s_addr <= addr) &&
7976 + (nxa->ip[1].s_addr > addr));
7978 + case NXA_TYPE_ANY:
7983 + vxdprintk(VXD_CBIT(net, 0),
7984 + "v4_addr_match(%p" NXAV4_FMT "," NIPQUAD_FMT ",%04x) = %d",
7985 + nxa, NXAV4(nxa), NIPQUAD(addr), tmask, ret);
7990 +int v4_addr_in_nx_info(struct nx_info *nxi, __be32 addr, uint16_t tmask)
7992 + struct nx_addr_v4 *nxa;
7993 + unsigned long irqflags;
8000 + /* allow 127.0.0.1 when remapping lback */
8001 + if ((tmask & NXA_LOOPBACK) &&
8002 + (addr == IPI_LOOPBACK) &&
8003 + nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
8006 + /* check for lback address */
8007 + if ((tmask & NXA_MOD_LBACK) &&
8008 + (nxi->v4_lback.s_addr == addr))
8011 + /* check for broadcast address */
8012 + if ((tmask & NXA_MOD_BCAST) &&
8013 + (nxi->v4_bcast.s_addr == addr))
8017 + /* check for v4 addresses */
8018 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
8019 + for (nxa = &nxi->v4; nxa; nxa = nxa->next)
8020 + if (v4_addr_match(nxa, addr, tmask))
8024 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
8026 + vxdprintk(VXD_CBIT(net, 0),
8027 + "v4_addr_in_nx_info(%p[#%u]," NIPQUAD_FMT ",%04x) = %d",
8028 + nxi, nxi ? nxi->nx_id : 0, NIPQUAD(addr), tmask, ret);
8033 +int v4_nx_addr_match(struct nx_addr_v4 *nxa, struct nx_addr_v4 *addr, uint16_t mask)
8035 + /* FIXME: needs full range checks */
8036 + return v4_addr_match(nxa, addr->ip[0].s_addr, mask);
8040 +int v4_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v4 *nxa, uint16_t mask)
8042 + struct nx_addr_v4 *ptr;
8043 + unsigned long irqflags;
8046 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
8047 + for (ptr = &nxi->v4; ptr; ptr = ptr->next)
8048 + if (v4_nx_addr_match(ptr, nxa, mask))
8052 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
8056 +#include <net/inet_sock.h>
8059 + * Check if a given address matches for a socket
8061 + * nxi: the socket's nx_info if any
8062 + * addr: to be verified address
8065 +int v4_sock_addr_match (
8066 + struct nx_info *nxi,
8067 + struct inet_sock *inet,
8070 + __be32 saddr = inet->inet_rcv_saddr;
8071 + __be32 bcast = nxi ? nxi->v4_bcast.s_addr : INADDR_BROADCAST;
8073 + if (addr && (saddr == addr || bcast == addr))
8076 + return v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND);
8081 +/* inet related checks and helpers */
8090 +#include <linux/netdevice.h>
8091 +#include <linux/inetdevice.h>
8092 +#include <net/inet_sock.h>
8093 +#include <net/inet_timewait_sock.h>
8096 +int dev_in_nx_info(struct net_device *, struct nx_info *);
8097 +int v4_dev_in_nx_info(struct net_device *, struct nx_info *);
8098 +int nx_v4_addr_conflict(struct nx_info *, struct nx_info *);
8102 + * check if address is covered by socket
8104 + * sk: the socket to check against
8105 + * addr: the address in question (must be != 0)
8109 +int __v4_addr_match_socket(const struct sock *sk, struct nx_addr_v4 *nxa)
8111 + struct nx_info *nxi = sk->sk_nx_info;
8112 + __be32 saddr = sk->sk_rcv_saddr;
8114 + vxdprintk(VXD_CBIT(net, 5),
8115 + "__v4_addr_in_socket(%p," NXAV4_FMT ") %p:" NIPQUAD_FMT " %p;%lx",
8116 + sk, NXAV4(nxa), nxi, NIPQUAD(saddr), sk->sk_socket,
8117 + (sk->sk_socket?sk->sk_socket->flags:0));
8119 + if (saddr) { /* direct address match */
8120 + return v4_addr_match(nxa, saddr, -1);
8121 + } else if (nxi) { /* match against nx_info */
8122 + return v4_nx_addr_in_nx_info(nxi, nxa, -1);
8123 + } else { /* unrestricted any socket */
8131 +int nx_dev_visible(struct nx_info *nxi, struct net_device *dev)
8133 + vxdprintk(VXD_CBIT(net, 1),
8134 + "nx_dev_visible(%p[#%u],%p " VS_Q("%s") ") %d",
8135 + nxi, nxi ? nxi->nx_id : 0, dev, dev->name,
8136 + nxi ? dev_in_nx_info(dev, nxi) : 0);
8138 + if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8140 + if (dev_in_nx_info(dev, nxi))
8147 +int v4_ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
8153 + return v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW);
8157 +int nx_v4_ifa_visible(struct nx_info *nxi, struct in_ifaddr *ifa)
8159 + vxdprintk(VXD_CBIT(net, 1), "nx_v4_ifa_visible(%p[#%u],%p) %d",
8160 + nxi, nxi ? nxi->nx_id : 0, ifa,
8161 + nxi ? v4_ifa_in_nx_info(ifa, nxi) : 0);
8163 + if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8165 + if (v4_ifa_in_nx_info(ifa, nxi))
8171 +struct nx_v4_sock_addr {
8172 + __be32 saddr; /* Address used for validation */
8173 + __be32 baddr; /* Address used for socket bind */
8177 +int v4_map_sock_addr(struct inet_sock *inet, struct sockaddr_in *addr,
8178 + struct nx_v4_sock_addr *nsa)
8180 + struct sock *sk = &inet->sk;
8181 + struct nx_info *nxi = sk->sk_nx_info;
8182 + __be32 saddr = addr->sin_addr.s_addr;
8183 + __be32 baddr = saddr;
8185 + vxdprintk(VXD_CBIT(net, 3),
8186 + "inet_bind(%p)* %p,%p;%lx " NIPQUAD_FMT,
8187 + sk, sk->sk_nx_info, sk->sk_socket,
8188 + (sk->sk_socket ? sk->sk_socket->flags : 0),
8192 + if (saddr == INADDR_ANY) {
8193 + if (nx_info_flags(nxi, NXF_SINGLE_IP, 0))
8194 + baddr = nxi->v4.ip[0].s_addr;
8195 + } else if (saddr == IPI_LOOPBACK) {
8196 + if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
8197 + baddr = nxi->v4_lback.s_addr;
8198 + } else if (!ipv4_is_multicast(saddr) ||
8199 + !nx_info_ncaps(nxi, NXC_MULTICAST)) {
8200 + /* normal address bind */
8201 + if (!v4_addr_in_nx_info(nxi, saddr, NXA_MASK_BIND))
8202 + return -EADDRNOTAVAIL;
8206 + vxdprintk(VXD_CBIT(net, 3),
8207 + "inet_bind(%p) " NIPQUAD_FMT ", " NIPQUAD_FMT,
8208 + sk, NIPQUAD(saddr), NIPQUAD(baddr));
8210 + nsa->saddr = saddr;
8211 + nsa->baddr = baddr;
8216 +void v4_set_sock_addr(struct inet_sock *inet, struct nx_v4_sock_addr *nsa)
8218 + inet->inet_saddr = nsa->baddr;
8219 + inet->inet_rcv_saddr = nsa->baddr;
8224 + * helper to simplify inet_lookup_listener
8226 + * nxi: the socket's nx_info if any
8227 + * addr: to be verified address
8228 + * saddr: socket address
8230 +static inline int v4_inet_addr_match (
8231 + struct nx_info *nxi,
8235 + if (addr && (saddr == addr))
8238 + return nxi ? v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND) : 1;
8242 +static inline __be32 nx_map_sock_lback(struct nx_info *nxi, __be32 addr)
8244 + if (nx_info_flags(nxi, NXF_HIDE_LBACK, 0) &&
8245 + (addr == nxi->v4_lback.s_addr))
8246 + return IPI_LOOPBACK;
8251 +int nx_info_has_v4(struct nx_info *nxi)
8257 + if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
8262 +#else /* CONFIG_INET */
8265 +int nx_dev_visible(struct nx_info *n, struct net_device *d)
8271 +int nx_v4_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
8277 +int v4_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
8283 +int nx_info_has_v4(struct nx_info *nxi)
8288 +#endif /* CONFIG_INET */
8290 +#define current_nx_info_has_v4() \
8291 + nx_info_has_v4(current_nx_info())
8294 +// #warning duplicate inclusion
8296 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_inet6.h linux-4.4/include/linux/vs_inet6.h
8297 --- linux-4.4/include/linux/vs_inet6.h 1970-01-01 01:00:00.000000000 +0100
8298 +++ linux-4.4/include/linux/vs_inet6.h 2021-02-24 16:56:24.589489533 +0100
8300 +#ifndef _VS_INET6_H
8301 +#define _VS_INET6_H
8303 +#include "vserver/base.h"
8304 +#include "vserver/network.h"
8305 +#include "vserver/debug.h"
8307 +#include <net/ipv6.h>
8309 +#define NXAV6(a) &(a)->ip, &(a)->mask, (a)->prefix, (a)->type
8310 +#define NXAV6_FMT "[%pI6/%pI6/%d:%04x]"
8316 +int v6_addr_match(struct nx_addr_v6 *nxa,
8317 + const struct in6_addr *addr, uint16_t mask)
8321 + switch (nxa->type & mask) {
8322 + case NXA_TYPE_MASK:
8323 + ret = ipv6_masked_addr_cmp(&nxa->ip, &nxa->mask, addr);
8325 + case NXA_TYPE_ADDR:
8326 + ret = ipv6_addr_equal(&nxa->ip, addr);
8328 + case NXA_TYPE_ANY:
8332 + vxdprintk(VXD_CBIT(net, 0),
8333 + "v6_addr_match(%p" NXAV6_FMT ",%pI6,%04x) = %d",
8334 + nxa, NXAV6(nxa), addr, mask, ret);
8339 +int v6_addr_in_nx_info(struct nx_info *nxi,
8340 + const struct in6_addr *addr, uint16_t mask)
8342 + struct nx_addr_v6 *nxa;
8343 + unsigned long irqflags;
8349 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
8350 + for (nxa = &nxi->v6; nxa; nxa = nxa->next)
8351 + if (v6_addr_match(nxa, addr, mask))
8355 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
8357 + vxdprintk(VXD_CBIT(net, 0),
8358 + "v6_addr_in_nx_info(%p[#%u],%pI6,%04x) = %d",
8359 + nxi, nxi ? nxi->nx_id : 0, addr, mask, ret);
8364 +int v6_nx_addr_match(struct nx_addr_v6 *nxa, struct nx_addr_v6 *addr, uint16_t mask)
8366 + /* FIXME: needs full range checks */
8367 + return v6_addr_match(nxa, &addr->ip, mask);
8371 +int v6_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v6 *nxa, uint16_t mask)
8373 + struct nx_addr_v6 *ptr;
8374 + unsigned long irqflags;
8377 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
8378 + for (ptr = &nxi->v6; ptr; ptr = ptr->next)
8379 + if (v6_nx_addr_match(ptr, nxa, mask))
8383 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
8389 + * Check if a given address matches for a socket
8391 + * nxi: the socket's nx_info if any
8392 + * addr: to be verified address
8395 +int v6_sock_addr_match (
8396 + struct nx_info *nxi,
8397 + struct inet_sock *inet,
8398 + struct in6_addr *addr)
8400 + struct sock *sk = &inet->sk;
8401 + const struct in6_addr *saddr = inet6_rcv_saddr(sk);
8403 + if (!ipv6_addr_any(addr) &&
8404 + ipv6_addr_equal(saddr, addr))
8406 + if (ipv6_addr_any(saddr))
8407 + return v6_addr_in_nx_info(nxi, addr, -1);
8412 + * check if address is covered by socket
8414 + * sk: the socket to check against
8415 + * addr: the address in question (must be != 0)
8419 +int __v6_addr_match_socket(const struct sock *sk, struct nx_addr_v6 *nxa)
8421 + struct nx_info *nxi = sk->sk_nx_info;
8422 + const struct in6_addr *saddr = inet6_rcv_saddr(sk);
8424 + vxdprintk(VXD_CBIT(net, 5),
8425 + "__v6_addr_in_socket(%p," NXAV6_FMT ") %p:%pI6 %p;%lx",
8426 + sk, NXAV6(nxa), nxi, saddr, sk->sk_socket,
8427 + (sk->sk_socket?sk->sk_socket->flags:0));
8429 + if (!ipv6_addr_any(saddr)) { /* direct address match */
8430 + return v6_addr_match(nxa, saddr, -1);
8431 + } else if (nxi) { /* match against nx_info */
8432 + return v6_nx_addr_in_nx_info(nxi, nxa, -1);
8433 + } else { /* unrestricted any socket */
8439 +/* inet related checks and helpers */
8447 +#include <linux/netdevice.h>
8448 +#include <linux/inetdevice.h>
8449 +#include <net/inet_timewait_sock.h>
8452 +int dev_in_nx_info(struct net_device *, struct nx_info *);
8453 +int v6_dev_in_nx_info(struct net_device *, struct nx_info *);
8454 +int nx_v6_addr_conflict(struct nx_info *, struct nx_info *);
8459 +int v6_ifa_in_nx_info(struct inet6_ifaddr *ifa, struct nx_info *nxi)
8465 + return v6_addr_in_nx_info(nxi, &ifa->addr, -1);
8469 +int nx_v6_ifa_visible(struct nx_info *nxi, struct inet6_ifaddr *ifa)
8471 + vxdprintk(VXD_CBIT(net, 1), "nx_v6_ifa_visible(%p[#%u],%p) %d",
8472 + nxi, nxi ? nxi->nx_id : 0, ifa,
8473 + nxi ? v6_ifa_in_nx_info(ifa, nxi) : 0);
8475 + if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8477 + if (v6_ifa_in_nx_info(ifa, nxi))
8483 +struct nx_v6_sock_addr {
8484 + struct in6_addr saddr; /* Address used for validation */
8485 + struct in6_addr baddr; /* Address used for socket bind */
8489 +int v6_map_sock_addr(struct inet_sock *inet, struct sockaddr_in6 *addr,
8490 + struct nx_v6_sock_addr *nsa)
8492 + // struct sock *sk = &inet->sk;
8493 + // struct nx_info *nxi = sk->sk_nx_info;
8494 + struct in6_addr saddr = addr->sin6_addr;
8495 + struct in6_addr baddr = saddr;
8497 + nsa->saddr = saddr;
8498 + nsa->baddr = baddr;
8503 +void v6_set_sock_addr(struct inet_sock *inet, struct nx_v6_sock_addr *nsa)
8505 + // struct sock *sk = &inet->sk;
8506 + // struct in6_addr *saddr = inet6_rcv_saddr(sk);
8508 + // *saddr = nsa->baddr;
8509 + // inet->inet_saddr = nsa->baddr;
8513 +int nx_info_has_v6(struct nx_info *nxi)
8522 +#else /* CONFIG_IPV6 */
8525 +int nx_v6_dev_visible(struct nx_info *n, struct net_device *d)
8532 +int nx_v6_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
8538 +int v6_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
8544 +int nx_info_has_v6(struct nx_info *nxi)
8549 +#endif /* CONFIG_IPV6 */
8551 +#define current_nx_info_has_v6() \
8552 + nx_info_has_v6(current_nx_info())
8555 +#warning duplicate inclusion
8557 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_limit.h linux-4.4/include/linux/vs_limit.h
8558 --- linux-4.4/include/linux/vs_limit.h 1970-01-01 01:00:00.000000000 +0100
8559 +++ linux-4.4/include/linux/vs_limit.h 2021-02-24 16:56:24.589489533 +0100
8561 +#ifndef _VS_LIMIT_H
8562 +#define _VS_LIMIT_H
8564 +#include "vserver/limit.h"
8565 +#include "vserver/base.h"
8566 +#include "vserver/context.h"
8567 +#include "vserver/debug.h"
8568 +#include "vserver/context.h"
8569 +#include "vserver/limit_int.h"
8572 +#define vx_acc_cres(v, d, p, r) \
8573 + __vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
8575 +#define vx_acc_cres_cond(x, d, p, r) \
8576 + __vx_acc_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
8577 + r, d, p, __FILE__, __LINE__)
8580 +#define vx_add_cres(v, a, p, r) \
8581 + __vx_add_cres(v, r, a, p, __FILE__, __LINE__)
8582 +#define vx_sub_cres(v, a, p, r) vx_add_cres(v, -(a), p, r)
8584 +#define vx_add_cres_cond(x, a, p, r) \
8585 + __vx_add_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
8586 + r, a, p, __FILE__, __LINE__)
8587 +#define vx_sub_cres_cond(x, a, p, r) vx_add_cres_cond(x, -(a), p, r)
8590 +/* process and file limits */
8592 +#define vx_nproc_inc(p) \
8593 + vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
8595 +#define vx_nproc_dec(p) \
8596 + vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
8598 +#define vx_files_inc(f) \
8599 + vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
8601 +#define vx_files_dec(f) \
8602 + vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
8604 +#define vx_locks_inc(l) \
8605 + vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
8607 +#define vx_locks_dec(l) \
8608 + vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
8610 +#define vx_openfd_inc(f) \
8611 + vx_acc_cres(current_vx_info(), 1, (void *)(long)(f), VLIMIT_OPENFD)
8613 +#define vx_openfd_dec(f) \
8614 + vx_acc_cres(current_vx_info(),-1, (void *)(long)(f), VLIMIT_OPENFD)
8617 +#define vx_cres_avail(v, n, r) \
8618 + __vx_cres_avail(v, r, n, __FILE__, __LINE__)
8621 +#define vx_nproc_avail(n) \
8622 + vx_cres_avail(current_vx_info(), n, RLIMIT_NPROC)
8624 +#define vx_files_avail(n) \
8625 + vx_cres_avail(current_vx_info(), n, RLIMIT_NOFILE)
8627 +#define vx_locks_avail(n) \
8628 + vx_cres_avail(current_vx_info(), n, RLIMIT_LOCKS)
8630 +#define vx_openfd_avail(n) \
8631 + vx_cres_avail(current_vx_info(), n, VLIMIT_OPENFD)
8634 +/* dentry limits */
8636 +#define vx_dentry_inc(d) do { \
8637 + if (d_count(d) == 1) \
8638 + vx_acc_cres(current_vx_info(), 1, d, VLIMIT_DENTRY); \
8641 +#define vx_dentry_dec(d) do { \
8642 + if (d_count(d) == 0) \
8643 + vx_acc_cres(current_vx_info(),-1, d, VLIMIT_DENTRY); \
8646 +#define vx_dentry_avail(n) \
8647 + vx_cres_avail(current_vx_info(), n, VLIMIT_DENTRY)
8650 +/* socket limits */
8652 +#define vx_sock_inc(s) \
8653 + vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
8655 +#define vx_sock_dec(s) \
8656 + vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
8658 +#define vx_sock_avail(n) \
8659 + vx_cres_avail(current_vx_info(), n, VLIMIT_NSOCK)
8662 +/* ipc resource limits */
8664 +#define vx_ipcmsg_add(v, u, a) \
8665 + vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
8667 +#define vx_ipcmsg_sub(v, u, a) \
8668 + vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
8670 +#define vx_ipcmsg_avail(v, a) \
8671 + vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
8674 +#define vx_ipcshm_add(v, k, a) \
8675 + vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
8677 +#define vx_ipcshm_sub(v, k, a) \
8678 + vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
8680 +#define vx_ipcshm_avail(v, a) \
8681 + vx_cres_avail(v, a, VLIMIT_SHMEM)
8684 +#define vx_semary_inc(a) \
8685 + vx_acc_cres(current_vx_info(), 1, a, VLIMIT_SEMARY)
8687 +#define vx_semary_dec(a) \
8688 + vx_acc_cres(current_vx_info(), -1, a, VLIMIT_SEMARY)
8691 +#define vx_nsems_add(a,n) \
8692 + vx_add_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
8694 +#define vx_nsems_sub(a,n) \
8695 + vx_sub_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
8699 +#warning duplicate inclusion
8701 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_network.h linux-4.4/include/linux/vs_network.h
8702 --- linux-4.4/include/linux/vs_network.h 1970-01-01 01:00:00.000000000 +0100
8703 +++ linux-4.4/include/linux/vs_network.h 2021-02-24 16:56:24.589489533 +0100
8705 +#ifndef _NX_VS_NETWORK_H
8706 +#define _NX_VS_NETWORK_H
8708 +#include "vserver/context.h"
8709 +#include "vserver/network.h"
8710 +#include "vserver/base.h"
8711 +#include "vserver/check.h"
8712 +#include "vserver/debug.h"
8714 +#include <linux/sched.h>
8717 +#define get_nx_info(i) __get_nx_info(i, __FILE__, __LINE__)
8719 +static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
8720 + const char *_file, int _line)
8725 + vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
8726 + nxi, nxi ? nxi->nx_id : 0,
8727 + nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8730 + atomic_inc(&nxi->nx_usecnt);
8735 +extern void free_nx_info(struct nx_info *);
8737 +#define put_nx_info(i) __put_nx_info(i, __FILE__, __LINE__)
8739 +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
8744 + vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
8745 + nxi, nxi ? nxi->nx_id : 0,
8746 + nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8749 + if (atomic_dec_and_test(&nxi->nx_usecnt))
8750 + free_nx_info(nxi);
8754 +#define init_nx_info(p, i) __init_nx_info(p, i, __FILE__, __LINE__)
8756 +static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
8757 + const char *_file, int _line)
8760 + vxlprintk(VXD_CBIT(nid, 3),
8761 + "init_nx_info(%p[#%d.%d])",
8762 + nxi, nxi ? nxi->nx_id : 0,
8763 + nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8766 + atomic_inc(&nxi->nx_usecnt);
8772 +#define set_nx_info(p, i) __set_nx_info(p, i, __FILE__, __LINE__)
8774 +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
8775 + const char *_file, int _line)
8777 + struct nx_info *nxo;
8782 + vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
8783 + nxi, nxi ? nxi->nx_id : 0,
8784 + nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8787 + atomic_inc(&nxi->nx_usecnt);
8788 + nxo = xchg(nxp, nxi);
8792 +#define clr_nx_info(p) __clr_nx_info(p, __FILE__, __LINE__)
8794 +static inline void __clr_nx_info(struct nx_info **nxp,
8795 + const char *_file, int _line)
8797 + struct nx_info *nxo;
8799 + nxo = xchg(nxp, NULL);
8803 + vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
8804 + nxo, nxo ? nxo->nx_id : 0,
8805 + nxo ? atomic_read(&nxo->nx_usecnt) : 0,
8808 + if (atomic_dec_and_test(&nxo->nx_usecnt))
8809 + free_nx_info(nxo);
8813 +#define claim_nx_info(v, p) __claim_nx_info(v, p, __FILE__, __LINE__)
8815 +static inline void __claim_nx_info(struct nx_info *nxi,
8816 + struct task_struct *task, const char *_file, int _line)
8818 + vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
8819 + nxi, nxi ? nxi->nx_id : 0,
8820 + nxi?atomic_read(&nxi->nx_usecnt):0,
8821 + nxi?atomic_read(&nxi->nx_tasks):0,
8822 + task, _file, _line);
8824 + atomic_inc(&nxi->nx_tasks);
8828 +extern void unhash_nx_info(struct nx_info *);
8830 +#define release_nx_info(v, p) __release_nx_info(v, p, __FILE__, __LINE__)
8832 +static inline void __release_nx_info(struct nx_info *nxi,
8833 + struct task_struct *task, const char *_file, int _line)
8835 + vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
8836 + nxi, nxi ? nxi->nx_id : 0,
8837 + nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8838 + nxi ? atomic_read(&nxi->nx_tasks) : 0,
8839 + task, _file, _line);
8843 + if (atomic_dec_and_test(&nxi->nx_tasks))
8844 + unhash_nx_info(nxi);
8848 +#define task_get_nx_info(i) __task_get_nx_info(i, __FILE__, __LINE__)
8850 +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
8851 + const char *_file, int _line)
8853 + struct nx_info *nxi;
8856 + vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
8858 + nxi = __get_nx_info(p->nx_info, _file, _line);
8864 +static inline void exit_nx_info(struct task_struct *p)
8867 + release_nx_info(p->nx_info, p);
8872 +#warning duplicate inclusion
8874 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_pid.h linux-4.4/include/linux/vs_pid.h
8875 --- linux-4.4/include/linux/vs_pid.h 1970-01-01 01:00:00.000000000 +0100
8876 +++ linux-4.4/include/linux/vs_pid.h 2021-02-24 16:56:24.589489533 +0100
8881 +#include "vserver/base.h"
8882 +#include "vserver/check.h"
8883 +#include "vserver/context.h"
8884 +#include "vserver/debug.h"
8885 +#include "vserver/pid.h"
8886 +#include <linux/pid_namespace.h>
8889 +#define VXF_FAKE_INIT (VXF_INFO_INIT | VXF_STATE_INIT)
8892 +int vx_proc_task_visible(struct task_struct *task)
8894 + if ((task->pid == 1) &&
8895 + !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
8896 + /* show a blend through init */
8898 + if (vx_check(vx_task_xid(task), VS_WATCH | VS_IDENT))
8905 +#define find_task_by_real_pid(pid) find_task_by_pid_ns(pid, &init_pid_ns)
8909 +struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid)
8911 + struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
8913 + if (task && !vx_proc_task_visible(task)) {
8914 + vxdprintk(VXD_CBIT(misc, 6),
8915 + "dropping task (get) %p[#%u,%u] for %p[#%u,%u]",
8916 + task, task->xid, task->pid,
8917 + current, current->xid, current->pid);
8918 + put_task_struct(task);
8926 +#warning duplicate inclusion
8928 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_sched.h linux-4.4/include/linux/vs_sched.h
8929 --- linux-4.4/include/linux/vs_sched.h 1970-01-01 01:00:00.000000000 +0100
8930 +++ linux-4.4/include/linux/vs_sched.h 2021-02-24 16:56:24.589489533 +0100
8932 +#ifndef _VS_SCHED_H
8933 +#define _VS_SCHED_H
8935 +#include "vserver/base.h"
8936 +#include "vserver/context.h"
8937 +#include "vserver/sched.h"
8940 +#define MAX_PRIO_BIAS 20
8941 +#define MIN_PRIO_BIAS -20
8944 +int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
8946 + struct vx_info *vxi = p->vx_info;
8949 + prio += vx_cpu(vxi, sched_pc).prio_bias;
8953 +static inline void vx_account_user(struct vx_info *vxi,
8954 + cputime_t cputime, int nice)
8958 + vx_cpu(vxi, sched_pc).user_ticks += cputime;
8961 +static inline void vx_account_system(struct vx_info *vxi,
8962 + cputime_t cputime, int idle)
8966 + vx_cpu(vxi, sched_pc).sys_ticks += cputime;
8970 +#warning duplicate inclusion
8972 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_socket.h linux-4.4/include/linux/vs_socket.h
8973 --- linux-4.4/include/linux/vs_socket.h 1970-01-01 01:00:00.000000000 +0100
8974 +++ linux-4.4/include/linux/vs_socket.h 2021-02-24 16:56:24.589489533 +0100
8976 +#ifndef _VS_SOCKET_H
8977 +#define _VS_SOCKET_H
8979 +#include "vserver/debug.h"
8980 +#include "vserver/base.h"
8981 +#include "vserver/cacct.h"
8982 +#include "vserver/context.h"
8983 +#include "vserver/tag.h"
8986 +/* socket accounting */
8988 +#include <linux/socket.h>
8990 +static inline int vx_sock_type(int family)
8994 + return VXA_SOCK_UNSPEC;
8996 + return VXA_SOCK_UNIX;
8998 + return VXA_SOCK_INET;
9000 + return VXA_SOCK_INET6;
9002 + return VXA_SOCK_PACKET;
9004 + return VXA_SOCK_OTHER;
9008 +#define vx_acc_sock(v, f, p, s) \
9009 + __vx_acc_sock(v, f, p, s, __FILE__, __LINE__)
9011 +static inline void __vx_acc_sock(struct vx_info *vxi,
9012 + int family, int pos, int size, char *file, int line)
9015 + int type = vx_sock_type(family);
9017 + atomic_long_inc(&vxi->cacct.sock[type][pos].count);
9018 + atomic_long_add(size, &vxi->cacct.sock[type][pos].total);
9022 +#define vx_sock_recv(sk, s) \
9023 + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, s)
9024 +#define vx_sock_send(sk, s) \
9025 + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, s)
9026 +#define vx_sock_fail(sk, s) \
9027 + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, s)
9030 +#define sock_vx_init(s) do { \
9031 + (s)->sk_xid = 0; \
9032 + (s)->sk_vx_info = NULL; \
9035 +#define sock_nx_init(s) do { \
9036 + (s)->sk_nid = 0; \
9037 + (s)->sk_nx_info = NULL; \
9041 +#warning duplicate inclusion
9043 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_tag.h linux-4.4/include/linux/vs_tag.h
9044 --- linux-4.4/include/linux/vs_tag.h 1970-01-01 01:00:00.000000000 +0100
9045 +++ linux-4.4/include/linux/vs_tag.h 2021-02-24 16:56:24.589489533 +0100
9050 +#include <linux/vserver/tag.h>
9052 +/* check conditions */
9054 +#define DX_ADMIN 0x0001
9055 +#define DX_WATCH 0x0002
9056 +#define DX_HOSTID 0x0008
9058 +#define DX_IDENT 0x0010
9060 +#define DX_ARG_MASK 0x0010
9063 +#define dx_task_tag(t) ((t)->tag)
9065 +#define dx_current_tag() dx_task_tag(current)
9067 +#define dx_check(c, m) __dx_check(dx_current_tag(), c, m)
9069 +#define dx_weak_check(c, m) ((m) ? dx_check(c, m) : 1)
9073 + * check current context for ADMIN/WATCH and
9074 + * optionally against supplied argument
9076 +static inline int __dx_check(vtag_t cid, vtag_t id, unsigned int mode)
9078 + if (mode & DX_ARG_MASK) {
9079 + if ((mode & DX_IDENT) && (id == cid))
9082 + return (((mode & DX_ADMIN) && (cid == 0)) ||
9083 + ((mode & DX_WATCH) && (cid == 1)) ||
9084 + ((mode & DX_HOSTID) && (id == 0)));
9088 +int dx_permission(const struct inode *inode, int mask);
9092 +#warning duplicate inclusion
9094 diff -urNp -x '*.orig' linux-4.4/include/linux/vs_time.h linux-4.4/include/linux/vs_time.h
9095 --- linux-4.4/include/linux/vs_time.h 1970-01-01 01:00:00.000000000 +0100
9096 +++ linux-4.4/include/linux/vs_time.h 2021-02-24 16:56:24.589489533 +0100
9102 +/* time faking stuff */
9104 +#ifdef CONFIG_VSERVER_VTIME
9106 +extern void vx_adjust_timespec(struct timespec *ts);
9107 +extern int vx_settimeofday(const struct timespec *ts);
9110 +#define vx_adjust_timespec(t) do { } while (0)
9111 +#define vx_settimeofday(t) do_settimeofday(t)
9115 +#warning duplicate inclusion
9117 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/base.h linux-4.4/include/linux/vserver/base.h
9118 --- linux-4.4/include/linux/vserver/base.h 1970-01-01 01:00:00.000000000 +0100
9119 +++ linux-4.4/include/linux/vserver/base.h 2021-02-24 16:56:24.586156094 +0100
9121 +#ifndef _VSERVER_BASE_H
9122 +#define _VSERVER_BASE_H
9125 +/* context state changes */
9137 +#define vx_task_xid(t) ((t)->xid)
9139 +#define vx_current_xid() vx_task_xid(current)
9141 +#define current_vx_info() (current->vx_info)
9144 +#define nx_task_nid(t) ((t)->nid)
9146 +#define nx_current_nid() nx_task_nid(current)
9148 +#define current_nx_info() (current->nx_info)
9151 +/* generic flag merging */
9153 +#define vs_check_flags(v, m, f) (((v) & (m)) ^ (f))
9155 +#define vs_mask_flags(v, f, m) (((v) & ~(m)) | ((f) & (m)))
9157 +#define vs_mask_mask(v, f, m) (((v) & ~(m)) | ((v) & (f) & (m)))
9159 +#define vs_check_bit(v, n) ((v) & (1LL << (n)))
9162 +/* context flags */
9164 +#define __vx_flags(v) ((v) ? (v)->vx_flags : 0)
9166 +#define vx_current_flags() __vx_flags(current_vx_info())
9168 +#define vx_info_flags(v, m, f) \
9169 + vs_check_flags(__vx_flags(v), m, f)
9171 +#define task_vx_flags(t, m, f) \
9172 + ((t) && vx_info_flags((t)->vx_info, m, f))
9174 +#define vx_flags(m, f) vx_info_flags(current_vx_info(), m, f)
9179 +#define __vx_ccaps(v) ((v) ? (v)->vx_ccaps : 0)
9181 +#define vx_current_ccaps() __vx_ccaps(current_vx_info())
9183 +#define vx_info_ccaps(v, c) (__vx_ccaps(v) & (c))
9185 +#define vx_ccaps(c) vx_info_ccaps(current_vx_info(), (c))
9189 +/* network flags */
9191 +#define __nx_flags(n) ((n) ? (n)->nx_flags : 0)
9193 +#define nx_current_flags() __nx_flags(current_nx_info())
9195 +#define nx_info_flags(n, m, f) \
9196 + vs_check_flags(__nx_flags(n), m, f)
9198 +#define task_nx_flags(t, m, f) \
9199 + ((t) && nx_info_flags((t)->nx_info, m, f))
9201 +#define nx_flags(m, f) nx_info_flags(current_nx_info(), m, f)
9206 +#define __nx_ncaps(n) ((n) ? (n)->nx_ncaps : 0)
9208 +#define nx_current_ncaps() __nx_ncaps(current_nx_info())
9210 +#define nx_info_ncaps(n, c) (__nx_ncaps(n) & (c))
9212 +#define nx_ncaps(c) nx_info_ncaps(current_nx_info(), c)
9215 +/* context mask capabilities */
9217 +#define __vx_mcaps(v) ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
9219 +#define vx_info_mcaps(v, c) (__vx_mcaps(v) & (c))
9221 +#define vx_mcaps(c) vx_info_mcaps(current_vx_info(), c)
9224 +/* context bcap mask */
9226 +#define __vx_bcaps(v) ((v)->vx_bcaps)
9228 +#define vx_current_bcaps() __vx_bcaps(current_vx_info())
9231 +/* mask given bcaps */
9233 +#define vx_info_mbcaps(v, c) ((v) ? cap_intersect(__vx_bcaps(v), c) : c)
9235 +#define vx_mbcaps(c) vx_info_mbcaps(current_vx_info(), c)
9238 +/* masked cap_bset */
9240 +#define vx_info_cap_bset(v) vx_info_mbcaps(v, current->cap_bset)
9242 +#define vx_current_cap_bset() vx_info_cap_bset(current_vx_info())
9245 +#define vx_info_mbcap(v, b) \
9246 + (!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
9247 + vx_info_bcaps(v, b) : (b))
9249 +#define task_vx_mbcap(t, b) \
9250 + vx_info_mbcap((t)->vx_info, (t)->b)
9252 +#define vx_mbcap(b) task_vx_mbcap(current, b)
9255 +#define vx_cap_raised(v, c, f) cap_raised(vx_info_mbcaps(v, c), f)
9257 +#define vx_capable(b, c) (capable(b) || \
9258 + (cap_raised(current_cap(), b) && vx_ccaps(c)))
9260 +#define vx_ns_capable(n, b, c) (ns_capable(n, b) || \
9261 + (cap_raised(current_cap(), b) && vx_ccaps(c)))
9263 +#define nx_capable(b, c) (capable(b) || \
9264 + (cap_raised(current_cap(), b) && nx_ncaps(c)))
9266 +#define nx_ns_capable(n, b, c) (ns_capable(n, b) || \
9267 + (cap_raised(current_cap(), b) && nx_ncaps(c)))
9269 +#define vx_task_initpid(t, n) \
9270 + ((t)->vx_info && \
9271 + ((t)->vx_info->vx_initpid == (n)))
9273 +#define vx_current_initpid(n) vx_task_initpid(current, n)
9276 +/* context unshare mask */
9278 +#define __vx_umask(v) ((v)->vx_umask)
9280 +#define vx_current_umask() __vx_umask(current_vx_info())
9282 +#define vx_can_unshare(b, f) (capable(b) || \
9283 + (cap_raised(current_cap(), b) && \
9284 + !((f) & ~vx_current_umask())))
9286 +#define vx_ns_can_unshare(n, b, f) (ns_capable(n, b) || \
9287 + (cap_raised(current_cap(), b) && \
9288 + !((f) & ~vx_current_umask())))
9290 +#define __vx_wmask(v) ((v)->vx_wmask)
9292 +#define vx_current_wmask() __vx_wmask(current_vx_info())
9295 +#define __vx_state(v) ((v) ? ((v)->vx_state) : 0)
9297 +#define vx_info_state(v, m) (__vx_state(v) & (m))
9300 +#define __nx_state(n) ((n) ? ((n)->nx_state) : 0)
9302 +#define nx_info_state(n, m) (__nx_state(n) & (m))
9305 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cacct.h linux-4.4/include/linux/vserver/cacct.h
9306 --- linux-4.4/include/linux/vserver/cacct.h 1970-01-01 01:00:00.000000000 +0100
9307 +++ linux-4.4/include/linux/vserver/cacct.h 2021-02-24 16:56:24.586156094 +0100
9309 +#ifndef _VSERVER_CACCT_H
9310 +#define _VSERVER_CACCT_H
9313 +enum sock_acc_field {
9314 + VXA_SOCK_UNSPEC = 0,
9320 + VXA_SOCK_SIZE /* array size */
9323 +#endif /* _VSERVER_CACCT_H */
9324 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cacct_cmd.h linux-4.4/include/linux/vserver/cacct_cmd.h
9325 --- linux-4.4/include/linux/vserver/cacct_cmd.h 1970-01-01 01:00:00.000000000 +0100
9326 +++ linux-4.4/include/linux/vserver/cacct_cmd.h 2021-02-24 16:56:24.586156094 +0100
9328 +#ifndef _VSERVER_CACCT_CMD_H
9329 +#define _VSERVER_CACCT_CMD_H
9332 +#include <linux/compiler.h>
9333 +#include <uapi/vserver/cacct_cmd.h>
9335 +extern int vc_sock_stat(struct vx_info *, void __user *);
9337 +#endif /* _VSERVER_CACCT_CMD_H */
9338 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cacct_def.h linux-4.4/include/linux/vserver/cacct_def.h
9339 --- linux-4.4/include/linux/vserver/cacct_def.h 1970-01-01 01:00:00.000000000 +0100
9340 +++ linux-4.4/include/linux/vserver/cacct_def.h 2021-02-24 16:56:24.586156094 +0100
9342 +#ifndef _VSERVER_CACCT_DEF_H
9343 +#define _VSERVER_CACCT_DEF_H
9345 +#include <asm/atomic.h>
9346 +#include <linux/vserver/cacct.h>
9349 +struct _vx_sock_acc {
9350 + atomic_long_t count;
9351 + atomic_long_t total;
9354 +/* context sub struct */
9357 + struct _vx_sock_acc sock[VXA_SOCK_SIZE][3];
9359 + atomic_t page[6][8];
9362 +#ifdef CONFIG_VSERVER_DEBUG
9364 +static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
9368 + printk("\t_vx_cacct:");
9369 + for (i = 0; i < 6; i++) {
9370 + struct _vx_sock_acc *ptr = cacct->sock[i];
9372 + printk("\t [%d] =", i);
9373 + for (j = 0; j < 3; j++) {
9374 + printk(" [%d] = %8lu, %8lu", j,
9375 + atomic_long_read(&ptr[j].count),
9376 + atomic_long_read(&ptr[j].total));
9384 +#endif /* _VSERVER_CACCT_DEF_H */
9385 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cacct_int.h linux-4.4/include/linux/vserver/cacct_int.h
9386 --- linux-4.4/include/linux/vserver/cacct_int.h 1970-01-01 01:00:00.000000000 +0100
9387 +++ linux-4.4/include/linux/vserver/cacct_int.h 2021-02-24 16:56:24.586156094 +0100
9389 +#ifndef _VSERVER_CACCT_INT_H
9390 +#define _VSERVER_CACCT_INT_H
9393 +unsigned long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
9395 + return atomic_long_read(&cacct->sock[type][pos].count);
9400 +unsigned long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
9402 + return atomic_long_read(&cacct->sock[type][pos].total);
9405 +#endif /* _VSERVER_CACCT_INT_H */
9406 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/check.h linux-4.4/include/linux/vserver/check.h
9407 --- linux-4.4/include/linux/vserver/check.h 1970-01-01 01:00:00.000000000 +0100
9408 +++ linux-4.4/include/linux/vserver/check.h 2021-02-24 16:56:24.586156094 +0100
9410 +#ifndef _VSERVER_CHECK_H
9411 +#define _VSERVER_CHECK_H
9414 +#define MAX_S_CONTEXT 65535 /* Arbitrary limit */
9416 +#ifdef CONFIG_VSERVER_DYNAMIC_IDS
9417 +#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */
9419 +#define MIN_D_CONTEXT 65536
9422 +/* check conditions */
9424 +#define VS_ADMIN 0x0001
9425 +#define VS_WATCH 0x0002
9426 +#define VS_HIDE 0x0004
9427 +#define VS_HOSTID 0x0008
9429 +#define VS_IDENT 0x0010
9430 +#define VS_EQUIV 0x0020
9431 +#define VS_PARENT 0x0040
9432 +#define VS_CHILD 0x0080
9434 +#define VS_ARG_MASK 0x00F0
9436 +#define VS_DYNAMIC 0x0100
9437 +#define VS_STATIC 0x0200
9439 +#define VS_ATR_MASK 0x0F00
9441 +#ifdef CONFIG_VSERVER_PRIVACY
9442 +#define VS_ADMIN_P (0)
9443 +#define VS_WATCH_P (0)
9445 +#define VS_ADMIN_P VS_ADMIN
9446 +#define VS_WATCH_P VS_WATCH
9449 +#define VS_HARDIRQ 0x1000
9450 +#define VS_SOFTIRQ 0x2000
9451 +#define VS_IRQ 0x4000
9453 +#define VS_IRQ_MASK 0xF000
9455 +#include <linux/hardirq.h>
9458 + * check current context for ADMIN/WATCH and
9459 + * optionally against supplied argument
9461 +static inline int __vs_check(int cid, int id, unsigned int mode)
9463 + if (mode & VS_ARG_MASK) {
9464 + if ((mode & VS_IDENT) && (id == cid))
9467 + if (mode & VS_ATR_MASK) {
9468 + if ((mode & VS_DYNAMIC) &&
9469 + (id >= MIN_D_CONTEXT) &&
9470 + (id <= MAX_S_CONTEXT))
9472 + if ((mode & VS_STATIC) &&
9473 + (id > 1) && (id < MIN_D_CONTEXT))
9476 + if (mode & VS_IRQ_MASK) {
9477 + if ((mode & VS_IRQ) && unlikely(in_interrupt()))
9479 + if ((mode & VS_HARDIRQ) && unlikely(in_irq()))
9481 + if ((mode & VS_SOFTIRQ) && unlikely(in_softirq()))
9484 + return (((mode & VS_ADMIN) && (cid == 0)) ||
9485 + ((mode & VS_WATCH) && (cid == 1)) ||
9486 + ((mode & VS_HOSTID) && (id == 0)));
9489 +#define vx_check(c, m) __vs_check(vx_current_xid(), c, (m) | VS_IRQ)
9491 +#define vx_weak_check(c, m) ((m) ? vx_check(c, m) : 1)
9494 +#define nx_check(c, m) __vs_check(nx_current_nid(), c, m)
9496 +#define nx_weak_check(c, m) ((m) ? nx_check(c, m) : 1)
9499 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/context.h linux-4.4/include/linux/vserver/context.h
9500 --- linux-4.4/include/linux/vserver/context.h 1970-01-01 01:00:00.000000000 +0100
9501 +++ linux-4.4/include/linux/vserver/context.h 2021-02-24 16:56:24.586156094 +0100
9503 +#ifndef _VSERVER_CONTEXT_H
9504 +#define _VSERVER_CONTEXT_H
9507 +#include <linux/list.h>
9508 +#include <linux/spinlock.h>
9509 +#include <linux/rcupdate.h>
9510 +#include <uapi/vserver/context.h>
9512 +#include "limit_def.h"
9513 +#include "sched_def.h"
9514 +#include "cvirt_def.h"
9515 +#include "cacct_def.h"
9516 +#include "device_def.h"
9518 +#define VX_SPACES 2
9520 +struct _vx_info_pc {
9521 + struct _vx_sched_pc sched_pc;
9522 + struct _vx_cvirt_pc cvirt_pc;
9526 + unsigned long vx_nsmask; /* assignment mask */
9527 + struct nsproxy *vx_nsproxy; /* private namespaces */
9528 + struct fs_struct *vx_fs; /* private namespace fs */
9529 + const struct cred *vx_cred; /* task credentials */
9533 + struct hlist_node vx_hlist; /* linked list of contexts */
9534 + vxid_t vx_id; /* context id */
9535 + atomic_t vx_usecnt; /* usage count */
9536 + atomic_t vx_tasks; /* tasks count */
9537 + struct vx_info *vx_parent; /* parent context */
9538 + int vx_state; /* context state */
9540 + struct _vx_space space[VX_SPACES]; /* namespace store */
9542 + uint64_t vx_flags; /* context flags */
9543 + uint64_t vx_ccaps; /* context caps (vserver) */
9544 + uint64_t vx_umask; /* unshare mask (guest) */
9545 + uint64_t vx_wmask; /* warn mask (guest) */
9546 + kernel_cap_t vx_bcaps; /* bounding caps (system) */
9548 + struct task_struct *vx_reaper; /* guest reaper process */
9549 + pid_t vx_initpid; /* PID of guest init */
9550 + int64_t vx_badness_bias; /* OOM points bias */
9552 + struct _vx_limit limit; /* vserver limits */
9553 + struct _vx_sched sched; /* vserver scheduler */
9554 + struct _vx_cvirt cvirt; /* virtual/bias stuff */
9555 + struct _vx_cacct cacct; /* context accounting */
9557 + struct _vx_device dmap; /* default device map targets */
9560 + struct _vx_info_pc info_pc; /* per cpu data */
9562 + struct _vx_info_pc *ptr_pc; /* per cpu array */
9565 + wait_queue_head_t vx_wait; /* context exit waitqueue */
9566 + int reboot_cmd; /* last sys_reboot() cmd */
9567 + int exit_code; /* last process exit code */
9569 + char vx_name[65]; /* vserver name */
9573 +#define vx_ptr_pc(vxi) (&(vxi)->info_pc)
9574 +#define vx_per_cpu(vxi, v, id) vx_ptr_pc(vxi)->v
9576 +#define vx_ptr_pc(vxi) ((vxi)->ptr_pc)
9577 +#define vx_per_cpu(vxi, v, id) per_cpu_ptr(vx_ptr_pc(vxi), id)->v
9580 +#define vx_cpu(vxi, v) vx_per_cpu(vxi, v, smp_processor_id())
9583 +struct vx_info_save {
9584 + struct vx_info *vxi;
9591 +#define VXS_HASHED 0x0001
9592 +#define VXS_PAUSED 0x0010
9593 +#define VXS_SHUTDOWN 0x0100
9594 +#define VXS_HELPER 0x1000
9595 +#define VXS_RELEASED 0x8000
9598 +extern void claim_vx_info(struct vx_info *, struct task_struct *);
9599 +extern void release_vx_info(struct vx_info *, struct task_struct *);
9601 +extern struct vx_info *lookup_vx_info(int);
9602 +extern struct vx_info *lookup_or_create_vx_info(int);
9604 +extern int get_xid_list(int, unsigned int *, int);
9605 +extern int xid_is_hashed(vxid_t);
9607 +extern int vx_migrate_task(struct task_struct *, struct vx_info *, int);
9609 +extern long vs_state_change(struct vx_info *, unsigned int);
9612 +#endif /* _VSERVER_CONTEXT_H */
9613 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/context_cmd.h linux-4.4/include/linux/vserver/context_cmd.h
9614 --- linux-4.4/include/linux/vserver/context_cmd.h 1970-01-01 01:00:00.000000000 +0100
9615 +++ linux-4.4/include/linux/vserver/context_cmd.h 2021-02-24 16:56:24.586156094 +0100
9617 +#ifndef _VSERVER_CONTEXT_CMD_H
9618 +#define _VSERVER_CONTEXT_CMD_H
9620 +#include <uapi/vserver/context_cmd.h>
9622 +extern int vc_task_xid(uint32_t);
9624 +extern int vc_vx_info(struct vx_info *, void __user *);
9626 +extern int vc_ctx_stat(struct vx_info *, void __user *);
9628 +extern int vc_ctx_create(uint32_t, void __user *);
9629 +extern int vc_ctx_migrate(struct vx_info *, void __user *);
9631 +extern int vc_get_cflags(struct vx_info *, void __user *);
9632 +extern int vc_set_cflags(struct vx_info *, void __user *);
9634 +extern int vc_get_ccaps(struct vx_info *, void __user *);
9635 +extern int vc_set_ccaps(struct vx_info *, void __user *);
9637 +extern int vc_get_bcaps(struct vx_info *, void __user *);
9638 +extern int vc_set_bcaps(struct vx_info *, void __user *);
9640 +extern int vc_get_umask(struct vx_info *, void __user *);
9641 +extern int vc_set_umask(struct vx_info *, void __user *);
9643 +extern int vc_get_wmask(struct vx_info *, void __user *);
9644 +extern int vc_set_wmask(struct vx_info *, void __user *);
9646 +extern int vc_get_badness(struct vx_info *, void __user *);
9647 +extern int vc_set_badness(struct vx_info *, void __user *);
9649 +#endif /* _VSERVER_CONTEXT_CMD_H */
9650 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cvirt.h linux-4.4/include/linux/vserver/cvirt.h
9651 --- linux-4.4/include/linux/vserver/cvirt.h 1970-01-01 01:00:00.000000000 +0100
9652 +++ linux-4.4/include/linux/vserver/cvirt.h 2021-02-24 16:56:24.586156094 +0100
9654 +#ifndef _VSERVER_CVIRT_H
9655 +#define _VSERVER_CVIRT_H
9659 +void vx_vsi_boottime(struct timespec *);
9661 +void vx_vsi_uptime(struct timespec *, struct timespec *);
9666 +void vx_update_load(struct vx_info *);
9669 +int vx_do_syslog(int, char __user *, int);
9671 +#endif /* _VSERVER_CVIRT_H */
9672 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cvirt_cmd.h linux-4.4/include/linux/vserver/cvirt_cmd.h
9673 --- linux-4.4/include/linux/vserver/cvirt_cmd.h 1970-01-01 01:00:00.000000000 +0100
9674 +++ linux-4.4/include/linux/vserver/cvirt_cmd.h 2021-02-24 16:56:24.586156094 +0100
9676 +#ifndef _VSERVER_CVIRT_CMD_H
9677 +#define _VSERVER_CVIRT_CMD_H
9680 +#include <linux/compiler.h>
9681 +#include <uapi/vserver/cvirt_cmd.h>
9683 +extern int vc_set_vhi_name(struct vx_info *, void __user *);
9684 +extern int vc_get_vhi_name(struct vx_info *, void __user *);
9686 +extern int vc_virt_stat(struct vx_info *, void __user *);
9688 +#endif /* _VSERVER_CVIRT_CMD_H */
9689 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cvirt_def.h linux-4.4/include/linux/vserver/cvirt_def.h
9690 --- linux-4.4/include/linux/vserver/cvirt_def.h 1970-01-01 01:00:00.000000000 +0100
9691 +++ linux-4.4/include/linux/vserver/cvirt_def.h 2021-02-24 16:56:24.586156094 +0100
9693 +#ifndef _VSERVER_CVIRT_DEF_H
9694 +#define _VSERVER_CVIRT_DEF_H
9696 +#include <linux/jiffies.h>
9697 +#include <linux/spinlock.h>
9698 +#include <linux/wait.h>
9699 +#include <linux/time.h>
9700 +#include <asm/atomic.h>
9703 +struct _vx_usage_stat {
9713 +struct _vx_syslog {
9714 + wait_queue_head_t log_wait;
9715 + spinlock_t logbuf_lock; /* lock for the log buffer */
9717 + unsigned long log_start; /* next char to be read by syslog() */
9718 + unsigned long con_start; /* next char to be sent to consoles */
9719 + unsigned long log_end; /* most-recently-written-char + 1 */
9720 + unsigned long logged_chars; /* #chars since last read+clear operation */
9722 + char log_buf[1024];
9726 +/* context sub struct */
9729 + atomic_t nr_threads; /* number of current threads */
9730 + atomic_t nr_running; /* number of running threads */
9731 + atomic_t nr_uninterruptible; /* number of uninterruptible threads */
9733 + atomic_t nr_onhold; /* processes on hold */
9734 + uint32_t onhold_last; /* jiffies when put on hold */
9736 + struct timespec bias_ts; /* time offset to the host */
9737 + struct timespec bias_idle;
9738 + struct timespec bias_uptime; /* context creation point */
9739 + uint64_t bias_clock; /* offset in clock_t */
9741 + spinlock_t load_lock; /* lock for the load averages */
9742 + atomic_t load_updates; /* nr of load updates done so far */
9743 + uint32_t load_last; /* last time load was calculated */
9744 + uint32_t load[3]; /* load averages 1,5,15 */
9746 + atomic_t total_forks; /* number of forks so far */
9748 + struct _vx_syslog syslog;
9751 +struct _vx_cvirt_pc {
9752 + struct _vx_usage_stat cpustat;
9756 +#ifdef CONFIG_VSERVER_DEBUG
9758 +static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
9760 + printk("\t_vx_cvirt:\n");
9761 + printk("\t threads: %4d, %4d, %4d, %4d\n",
9762 + atomic_read(&cvirt->nr_threads),
9763 + atomic_read(&cvirt->nr_running),
9764 + atomic_read(&cvirt->nr_uninterruptible),
9765 + atomic_read(&cvirt->nr_onhold));
9766 + /* add rest here */
9767 + printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
9772 +#endif /* _VSERVER_CVIRT_DEF_H */
9773 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/debug.h linux-4.4/include/linux/vserver/debug.h
9774 --- linux-4.4/include/linux/vserver/debug.h 1970-01-01 01:00:00.000000000 +0100
9775 +++ linux-4.4/include/linux/vserver/debug.h 2021-02-24 16:56:24.586156094 +0100
9777 +#ifndef _VSERVER_DEBUG_H
9778 +#define _VSERVER_DEBUG_H
9781 +#define VXD_CBIT(n, m) (vs_debug_ ## n & (1 << (m)))
9782 +#define VXD_CMIN(n, m) (vs_debug_ ## n > (m))
9783 +#define VXD_MASK(n, m) (vs_debug_ ## n & (m))
9785 +#define VXD_DEV(d) (d), (d)->bd_inode->i_ino, \
9786 + imajor((d)->bd_inode), iminor((d)->bd_inode)
9787 +#define VXF_DEV "%p[%lu,%d:%d]"
9789 +#if defined(CONFIG_QUOTES_UTF8)
9790 +#define VS_Q_LQM "\xc2\xbb"
9791 +#define VS_Q_RQM "\xc2\xab"
9792 +#elif defined(CONFIG_QUOTES_ASCII)
9793 +#define VS_Q_LQM "\x27"
9794 +#define VS_Q_RQM "\x27"
9796 +#define VS_Q_LQM "\xbb"
9797 +#define VS_Q_RQM "\xab"
9800 +#define VS_Q(f) VS_Q_LQM f VS_Q_RQM
9803 +#define vxd_path(p) \
9804 + ({ static char _buffer[PATH_MAX]; \
9805 + d_path(p, _buffer, sizeof(_buffer)); })
9807 +#define vxd_cond_path(n) \
9808 + ((n) ? vxd_path(&(n)->path) : "<null>" )
9811 +#ifdef CONFIG_VSERVER_DEBUG
9813 +extern unsigned int vs_debug_switch;
9814 +extern unsigned int vs_debug_xid;
9815 +extern unsigned int vs_debug_nid;
9816 +extern unsigned int vs_debug_tag;
9817 +extern unsigned int vs_debug_net;
9818 +extern unsigned int vs_debug_limit;
9819 +extern unsigned int vs_debug_cres;
9820 +extern unsigned int vs_debug_dlim;
9821 +extern unsigned int vs_debug_quota;
9822 +extern unsigned int vs_debug_cvirt;
9823 +extern unsigned int vs_debug_space;
9824 +extern unsigned int vs_debug_perm;
9825 +extern unsigned int vs_debug_misc;
9828 +#define VX_LOGLEVEL "vxD: "
9829 +#define VX_PROC_FMT "%p: "
9830 +#define VX_PROCESS current
9832 +#define vxdprintk(c, f, x...) \
9835 + printk(VX_LOGLEVEL VX_PROC_FMT f "\n", \
9836 + VX_PROCESS , ##x); \
9839 +#define vxlprintk(c, f, x...) \
9842 + printk(VX_LOGLEVEL f " @%s:%d\n", x); \
9845 +#define vxfprintk(c, f, x...) \
9848 + printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
9854 +void dump_vx_info(struct vx_info *, int);
9855 +void dump_vx_info_inactive(int);
9857 +#else /* CONFIG_VSERVER_DEBUG */
9859 +#define vs_debug_switch 0
9860 +#define vs_debug_xid 0
9861 +#define vs_debug_nid 0
9862 +#define vs_debug_tag 0
9863 +#define vs_debug_net 0
9864 +#define vs_debug_limit 0
9865 +#define vs_debug_cres 0
9866 +#define vs_debug_dlim 0
9867 +#define vs_debug_quota 0
9868 +#define vs_debug_cvirt 0
9869 +#define vs_debug_space 0
9870 +#define vs_debug_perm 0
9871 +#define vs_debug_misc 0
9873 +#define vxdprintk(x...) do { } while (0)
9874 +#define vxlprintk(x...) do { } while (0)
9875 +#define vxfprintk(x...) do { } while (0)
9877 +#endif /* CONFIG_VSERVER_DEBUG */
9880 +#ifdef CONFIG_VSERVER_WARN
9882 +#define VX_WARNLEVEL KERN_WARNING "vxW: "
9883 +#define VX_WARN_TASK "[" VS_Q("%s") ",%u:#%u|%u|%u] "
9884 +#define VX_WARN_XID "[xid #%u] "
9885 +#define VX_WARN_NID "[nid #%u] "
9886 +#define VX_WARN_TAG "[tag #%u] "
9888 +#define vxwprintk(c, f, x...) \
9891 + printk(VX_WARNLEVEL f "\n", ##x); \
9894 +#else /* CONFIG_VSERVER_WARN */
9896 +#define vxwprintk(x...) do { } while (0)
9898 +#endif /* CONFIG_VSERVER_WARN */
9900 +#define vxwprintk_task(c, f, x...) \
9901 + vxwprintk(c, VX_WARN_TASK f, \
9902 + current->comm, current->pid, \
9903 + current->xid, current->nid, \
9904 + current->tag, ##x)
9905 +#define vxwprintk_xid(c, f, x...) \
9906 + vxwprintk(c, VX_WARN_XID f, current->xid, x)
9907 +#define vxwprintk_nid(c, f, x...) \
9908 + vxwprintk(c, VX_WARN_NID f, current->nid, x)
9909 +#define vxwprintk_tag(c, f, x...) \
9910 + vxwprintk(c, VX_WARN_TAG f, current->tag, x)
9912 +#ifdef CONFIG_VSERVER_DEBUG
9913 +#define vxd_assert_lock(l) assert_spin_locked(l)
9914 +#define vxd_assert(c, f, x...) vxlprintk(!(c), \
9915 + "assertion [" f "] failed.", ##x, __FILE__, __LINE__)
9917 +#define vxd_assert_lock(l) do { } while (0)
9918 +#define vxd_assert(c, f, x...) do { } while (0)
9922 +#endif /* _VSERVER_DEBUG_H */
9923 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/debug_cmd.h linux-4.4/include/linux/vserver/debug_cmd.h
9924 --- linux-4.4/include/linux/vserver/debug_cmd.h 1970-01-01 01:00:00.000000000 +0100
9925 +++ linux-4.4/include/linux/vserver/debug_cmd.h 2021-02-24 16:56:24.586156094 +0100
9927 +#ifndef _VSERVER_DEBUG_CMD_H
9928 +#define _VSERVER_DEBUG_CMD_H
9930 +#include <uapi/vserver/debug_cmd.h>
9933 +#ifdef CONFIG_COMPAT
9935 +#include <asm/compat.h>
9937 +struct vcmd_read_history_v0_x32 {
9940 + compat_uptr_t data_ptr;
9943 +struct vcmd_read_monitor_v0_x32 {
9946 + compat_uptr_t data_ptr;
9949 +#endif /* CONFIG_COMPAT */
9951 +extern int vc_dump_history(uint32_t);
9953 +extern int vc_read_history(uint32_t, void __user *);
9954 +extern int vc_read_monitor(uint32_t, void __user *);
9956 +#ifdef CONFIG_COMPAT
9958 +extern int vc_read_history_x32(uint32_t, void __user *);
9959 +extern int vc_read_monitor_x32(uint32_t, void __user *);
9961 +#endif /* CONFIG_COMPAT */
9963 +#endif /* _VSERVER_DEBUG_CMD_H */
9964 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/device.h linux-4.4/include/linux/vserver/device.h
9965 --- linux-4.4/include/linux/vserver/device.h 1970-01-01 01:00:00.000000000 +0100
9966 +++ linux-4.4/include/linux/vserver/device.h 2021-02-24 16:56:24.586156094 +0100
9968 +#ifndef _VSERVER_DEVICE_H
9969 +#define _VSERVER_DEVICE_H
9972 +#include <uapi/vserver/device.h>
9974 +#else /* _VSERVER_DEVICE_H */
9975 +#warning duplicate inclusion
9976 +#endif /* _VSERVER_DEVICE_H */
9977 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/device_cmd.h linux-4.4/include/linux/vserver/device_cmd.h
9978 --- linux-4.4/include/linux/vserver/device_cmd.h 1970-01-01 01:00:00.000000000 +0100
9979 +++ linux-4.4/include/linux/vserver/device_cmd.h 2021-02-24 16:56:24.586156094 +0100
9981 +#ifndef _VSERVER_DEVICE_CMD_H
9982 +#define _VSERVER_DEVICE_CMD_H
9984 +#include <uapi/vserver/device_cmd.h>
9987 +#ifdef CONFIG_COMPAT
9989 +#include <asm/compat.h>
9991 +struct vcmd_set_mapping_v0_x32 {
9992 + compat_uptr_t device_ptr;
9993 + compat_uptr_t target_ptr;
9997 +#endif /* CONFIG_COMPAT */
9999 +#include <linux/compiler.h>
10001 +extern int vc_set_mapping(struct vx_info *, void __user *);
10002 +extern int vc_unset_mapping(struct vx_info *, void __user *);
10004 +#ifdef CONFIG_COMPAT
10006 +extern int vc_set_mapping_x32(struct vx_info *, void __user *);
10007 +extern int vc_unset_mapping_x32(struct vx_info *, void __user *);
10009 +#endif /* CONFIG_COMPAT */
10011 +#endif /* _VSERVER_DEVICE_CMD_H */
10012 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/device_def.h linux-4.4/include/linux/vserver/device_def.h
10013 --- linux-4.4/include/linux/vserver/device_def.h 1970-01-01 01:00:00.000000000 +0100
10014 +++ linux-4.4/include/linux/vserver/device_def.h 2021-02-24 16:56:24.586156094 +0100
10016 +#ifndef _VSERVER_DEVICE_DEF_H
10017 +#define _VSERVER_DEVICE_DEF_H
10019 +#include <linux/types.h>
10021 +struct vx_dmap_target {
10026 +struct _vx_device {
10027 +#ifdef CONFIG_VSERVER_DEVICE
10028 + struct vx_dmap_target targets[2];
10032 +#endif /* _VSERVER_DEVICE_DEF_H */
10033 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/dlimit.h linux-4.4/include/linux/vserver/dlimit.h
10034 --- linux-4.4/include/linux/vserver/dlimit.h 1970-01-01 01:00:00.000000000 +0100
10035 +++ linux-4.4/include/linux/vserver/dlimit.h 2021-02-24 16:56:24.586156094 +0100
10037 +#ifndef _VSERVER_DLIMIT_H
10038 +#define _VSERVER_DLIMIT_H
10040 +#include "switch.h"
10045 +/* keep in sync with CDLIM_INFINITY */
10047 +#define DLIM_INFINITY (~0ULL)
10049 +#include <linux/spinlock.h>
10050 +#include <linux/rcupdate.h>
10052 +struct super_block;
10055 + struct hlist_node dl_hlist; /* linked list of contexts */
10056 + struct rcu_head dl_rcu; /* the rcu head */
10057 + vtag_t dl_tag; /* context tag */
10058 + atomic_t dl_usecnt; /* usage count */
10059 + atomic_t dl_refcnt; /* reference count */
10061 + struct super_block *dl_sb; /* associated superblock */
10063 + spinlock_t dl_lock; /* protect the values */
10065 + unsigned long long dl_space_used; /* used space in bytes */
10066 + unsigned long long dl_space_total; /* maximum space in bytes */
10067 + unsigned long dl_inodes_used; /* used inodes */
10068 + unsigned long dl_inodes_total; /* maximum inodes */
10070 + unsigned int dl_nrlmult; /* non root limit mult */
10075 +extern void rcu_free_dl_info(struct rcu_head *);
10076 +extern void unhash_dl_info(struct dl_info *);
10078 +extern struct dl_info *locate_dl_info(struct super_block *, vtag_t);
10083 +extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
10085 +typedef uint64_t dlsize_t;
10087 +#endif /* __KERNEL__ */
10088 +#else /* _VSERVER_DLIMIT_H */
10089 +#warning duplicate inclusion
10090 +#endif /* _VSERVER_DLIMIT_H */
10091 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/dlimit_cmd.h linux-4.4/include/linux/vserver/dlimit_cmd.h
10092 --- linux-4.4/include/linux/vserver/dlimit_cmd.h 1970-01-01 01:00:00.000000000 +0100
10093 +++ linux-4.4/include/linux/vserver/dlimit_cmd.h 2021-02-24 16:56:24.586156094 +0100
10095 +#ifndef _VSERVER_DLIMIT_CMD_H
10096 +#define _VSERVER_DLIMIT_CMD_H
10098 +#include <uapi/vserver/dlimit_cmd.h>
10101 +#ifdef CONFIG_COMPAT
10103 +#include <asm/compat.h>
10105 +struct vcmd_ctx_dlimit_base_v0_x32 {
10106 + compat_uptr_t name_ptr;
10110 +struct vcmd_ctx_dlimit_v0_x32 {
10111 + compat_uptr_t name_ptr;
10112 + uint32_t space_used; /* used space in kbytes */
10113 + uint32_t space_total; /* maximum space in kbytes */
10114 + uint32_t inodes_used; /* used inodes */
10115 + uint32_t inodes_total; /* maximum inodes */
10116 + uint32_t reserved; /* reserved for root in % */
10120 +#endif /* CONFIG_COMPAT */
10122 +#include <linux/compiler.h>
10124 +extern int vc_add_dlimit(uint32_t, void __user *);
10125 +extern int vc_rem_dlimit(uint32_t, void __user *);
10127 +extern int vc_set_dlimit(uint32_t, void __user *);
10128 +extern int vc_get_dlimit(uint32_t, void __user *);
10130 +#ifdef CONFIG_COMPAT
10132 +extern int vc_add_dlimit_x32(uint32_t, void __user *);
10133 +extern int vc_rem_dlimit_x32(uint32_t, void __user *);
10135 +extern int vc_set_dlimit_x32(uint32_t, void __user *);
10136 +extern int vc_get_dlimit_x32(uint32_t, void __user *);
10138 +#endif /* CONFIG_COMPAT */
10140 +#endif /* _VSERVER_DLIMIT_CMD_H */
10141 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/global.h linux-4.4/include/linux/vserver/global.h
10142 --- linux-4.4/include/linux/vserver/global.h 1970-01-01 01:00:00.000000000 +0100
10143 +++ linux-4.4/include/linux/vserver/global.h 2021-02-24 16:56:24.586156094 +0100
10145 +#ifndef _VSERVER_GLOBAL_H
10146 +#define _VSERVER_GLOBAL_H
10149 +extern atomic_t vx_global_ctotal;
10150 +extern atomic_t vx_global_cactive;
10152 +extern atomic_t nx_global_ctotal;
10153 +extern atomic_t nx_global_cactive;
10155 +extern atomic_t vs_global_nsproxy;
10156 +extern atomic_t vs_global_fs;
10157 +extern atomic_t vs_global_mnt_ns;
10158 +extern atomic_t vs_global_uts_ns;
10159 +extern atomic_t vs_global_user_ns;
10160 +extern atomic_t vs_global_pid_ns;
10163 +#endif /* _VSERVER_GLOBAL_H */
10164 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/history.h linux-4.4/include/linux/vserver/history.h
10165 --- linux-4.4/include/linux/vserver/history.h 1970-01-01 01:00:00.000000000 +0100
10166 +++ linux-4.4/include/linux/vserver/history.h 2021-02-24 16:56:24.586156094 +0100
10168 +#ifndef _VSERVER_HISTORY_H
10169 +#define _VSERVER_HISTORY_H
10174 + VXH_THROW_OOPS = 1,
10178 + VXH_INIT_VX_INFO,
10181 + VXH_CLAIM_VX_INFO,
10182 + VXH_RELEASE_VX_INFO,
10183 + VXH_ALLOC_VX_INFO,
10184 + VXH_DEALLOC_VX_INFO,
10185 + VXH_HASH_VX_INFO,
10186 + VXH_UNHASH_VX_INFO,
10188 + VXH_LOOKUP_VX_INFO,
10189 + VXH_CREATE_VX_INFO,
10192 +struct _vxhe_vxi {
10193 + struct vx_info *ptr;
10199 +struct _vxhe_set_clr {
10203 +struct _vxhe_loc_lookup {
10207 +struct _vx_hist_entry {
10209 + unsigned short seq;
10210 + unsigned short type;
10211 + struct _vxhe_vxi vxi;
10213 + struct _vxhe_set_clr sc;
10214 + struct _vxhe_loc_lookup ll;
10218 +#ifdef CONFIG_VSERVER_HISTORY
10220 +extern unsigned volatile int vxh_active;
10222 +struct _vx_hist_entry *vxh_advance(void *loc);
10226 +void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
10228 + entry->vxi.ptr = vxi;
10230 + entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
10231 + entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
10232 + entry->vxi.xid = vxi->vx_id;
10237 +#define __HERE__ current_text_addr()
10239 +#define __VXH_BODY(__type, __data, __here) \
10240 + struct _vx_hist_entry *entry; \
10242 + preempt_disable(); \
10243 + entry = vxh_advance(__here); \
10245 + entry->type = __type; \
10246 + preempt_enable();
10249 + /* pass vxi only */
10251 +#define __VXH_SMPL \
10252 + __vxh_copy_vxi(entry, vxi)
10255 +void __vxh_smpl(struct vx_info *vxi, int __type, void *__here)
10257 + __VXH_BODY(__type, __VXH_SMPL, __here)
10260 + /* pass vxi and data (void *) */
10262 +#define __VXH_DATA \
10263 + __vxh_copy_vxi(entry, vxi); \
10264 + entry->sc.data = data
10267 +void __vxh_data(struct vx_info *vxi, void *data,
10268 + int __type, void *__here)
10270 + __VXH_BODY(__type, __VXH_DATA, __here)
10273 + /* pass vxi and arg (long) */
10275 +#define __VXH_LONG \
10276 + __vxh_copy_vxi(entry, vxi); \
10277 + entry->ll.arg = arg
10280 +void __vxh_long(struct vx_info *vxi, long arg,
10281 + int __type, void *__here)
10283 + __VXH_BODY(__type, __VXH_LONG, __here)
10288 +void __vxh_throw_oops(void *__here)
10290 + __VXH_BODY(VXH_THROW_OOPS, {}, __here);
10291 + /* prevent further acquisition */
10296 +#define vxh_throw_oops() __vxh_throw_oops(__HERE__);
10298 +#define __vxh_get_vx_info(v, h) __vxh_smpl(v, VXH_GET_VX_INFO, h);
10299 +#define __vxh_put_vx_info(v, h) __vxh_smpl(v, VXH_PUT_VX_INFO, h);
10301 +#define __vxh_init_vx_info(v, d, h) \
10302 + __vxh_data(v, d, VXH_INIT_VX_INFO, h);
10303 +#define __vxh_set_vx_info(v, d, h) \
10304 + __vxh_data(v, d, VXH_SET_VX_INFO, h);
10305 +#define __vxh_clr_vx_info(v, d, h) \
10306 + __vxh_data(v, d, VXH_CLR_VX_INFO, h);
10308 +#define __vxh_claim_vx_info(v, d, h) \
10309 + __vxh_data(v, d, VXH_CLAIM_VX_INFO, h);
10310 +#define __vxh_release_vx_info(v, d, h) \
10311 + __vxh_data(v, d, VXH_RELEASE_VX_INFO, h);
10313 +#define vxh_alloc_vx_info(v) \
10314 + __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
10315 +#define vxh_dealloc_vx_info(v) \
10316 + __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
10318 +#define vxh_hash_vx_info(v) \
10319 + __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
10320 +#define vxh_unhash_vx_info(v) \
10321 + __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
10323 +#define vxh_loc_vx_info(v, l) \
10324 + __vxh_long(v, l, VXH_LOC_VX_INFO, __HERE__);
10325 +#define vxh_lookup_vx_info(v, l) \
10326 + __vxh_long(v, l, VXH_LOOKUP_VX_INFO, __HERE__);
10327 +#define vxh_create_vx_info(v, l) \
10328 + __vxh_long(v, l, VXH_CREATE_VX_INFO, __HERE__);
10330 +extern void vxh_dump_history(void);
10333 +#else /* CONFIG_VSERVER_HISTORY */
10335 +#define __HERE__ 0
10337 +#define vxh_throw_oops() do { } while (0)
10339 +#define __vxh_get_vx_info(v, h) do { } while (0)
10340 +#define __vxh_put_vx_info(v, h) do { } while (0)
10342 +#define __vxh_init_vx_info(v, d, h) do { } while (0)
10343 +#define __vxh_set_vx_info(v, d, h) do { } while (0)
10344 +#define __vxh_clr_vx_info(v, d, h) do { } while (0)
10346 +#define __vxh_claim_vx_info(v, d, h) do { } while (0)
10347 +#define __vxh_release_vx_info(v, d, h) do { } while (0)
10349 +#define vxh_alloc_vx_info(v) do { } while (0)
10350 +#define vxh_dealloc_vx_info(v) do { } while (0)
10352 +#define vxh_hash_vx_info(v) do { } while (0)
10353 +#define vxh_unhash_vx_info(v) do { } while (0)
10355 +#define vxh_loc_vx_info(v, l) do { } while (0)
10356 +#define vxh_lookup_vx_info(v, l) do { } while (0)
10357 +#define vxh_create_vx_info(v, l) do { } while (0)
10359 +#define vxh_dump_history() do { } while (0)
10362 +#endif /* CONFIG_VSERVER_HISTORY */
10364 +#endif /* _VSERVER_HISTORY_H */
10365 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/inode.h linux-4.4/include/linux/vserver/inode.h
10366 --- linux-4.4/include/linux/vserver/inode.h 1970-01-01 01:00:00.000000000 +0100
10367 +++ linux-4.4/include/linux/vserver/inode.h 2021-02-24 16:56:24.586156094 +0100
10369 +#ifndef _VSERVER_INODE_H
10370 +#define _VSERVER_INODE_H
10372 +#include <uapi/vserver/inode.h>
10375 +#ifdef CONFIG_VSERVER_PROC_SECURE
10376 +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE )
10377 +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
10379 +#define IATTR_PROC_DEFAULT ( IATTR_ADMIN )
10380 +#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
10383 +#define vx_hide_check(c, m) (((m) & IATTR_HIDE) ? vx_check(c, m) : 1)
10385 +#else /* _VSERVER_INODE_H */
10386 +#warning duplicate inclusion
10387 +#endif /* _VSERVER_INODE_H */
10388 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/inode_cmd.h linux-4.4/include/linux/vserver/inode_cmd.h
10389 --- linux-4.4/include/linux/vserver/inode_cmd.h 1970-01-01 01:00:00.000000000 +0100
10390 +++ linux-4.4/include/linux/vserver/inode_cmd.h 2021-02-24 16:56:24.586156094 +0100
10392 +#ifndef _VSERVER_INODE_CMD_H
10393 +#define _VSERVER_INODE_CMD_H
10395 +#include <uapi/vserver/inode_cmd.h>
10399 +#ifdef CONFIG_COMPAT
10401 +#include <asm/compat.h>
10403 +struct vcmd_ctx_iattr_v1_x32 {
10404 + compat_uptr_t name_ptr;
10410 +#endif /* CONFIG_COMPAT */
10412 +#include <linux/compiler.h>
10414 +extern int vc_get_iattr(void __user *);
10415 +extern int vc_set_iattr(void __user *);
10417 +extern int vc_fget_iattr(uint32_t, void __user *);
10418 +extern int vc_fset_iattr(uint32_t, void __user *);
10420 +#ifdef CONFIG_COMPAT
10422 +extern int vc_get_iattr_x32(void __user *);
10423 +extern int vc_set_iattr_x32(void __user *);
10425 +#endif /* CONFIG_COMPAT */
10427 +#endif /* _VSERVER_INODE_CMD_H */
10428 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/limit.h linux-4.4/include/linux/vserver/limit.h
10429 --- linux-4.4/include/linux/vserver/limit.h 1970-01-01 01:00:00.000000000 +0100
10430 +++ linux-4.4/include/linux/vserver/limit.h 2021-02-24 16:56:24.586156094 +0100
10432 +#ifndef _VSERVER_LIMIT_H
10433 +#define _VSERVER_LIMIT_H
10435 +#include <uapi/vserver/limit.h>
10438 +#define VLIM_NOCHECK ((1L << VLIMIT_DENTRY) | (1L << RLIMIT_RSS))
10440 +/* keep in sync with CRLIM_INFINITY */
10442 +#define VLIM_INFINITY (~0ULL)
10444 +#include <asm/atomic.h>
10445 +#include <asm/resource.h>
10447 +#ifndef RLIM_INFINITY
10448 +#warning RLIM_INFINITY is undefined
10451 +#define __rlim_val(l, r, v) ((l)->res[r].v)
10453 +#define __rlim_soft(l, r) __rlim_val(l, r, soft)
10454 +#define __rlim_hard(l, r) __rlim_val(l, r, hard)
10456 +#define __rlim_rcur(l, r) __rlim_val(l, r, rcur)
10457 +#define __rlim_rmin(l, r) __rlim_val(l, r, rmin)
10458 +#define __rlim_rmax(l, r) __rlim_val(l, r, rmax)
10460 +#define __rlim_lhit(l, r) __rlim_val(l, r, lhit)
10461 +#define __rlim_hit(l, r) atomic_inc(&__rlim_lhit(l, r))
10463 +typedef atomic_long_t rlim_atomic_t;
10464 +typedef unsigned long rlim_t;
10466 +#define __rlim_get(l, r) atomic_long_read(&__rlim_rcur(l, r))
10467 +#define __rlim_set(l, r, v) atomic_long_set(&__rlim_rcur(l, r), v)
10468 +#define __rlim_inc(l, r) atomic_long_inc(&__rlim_rcur(l, r))
10469 +#define __rlim_dec(l, r) atomic_long_dec(&__rlim_rcur(l, r))
10470 +#define __rlim_add(l, r, v) atomic_long_add(v, &__rlim_rcur(l, r))
10471 +#define __rlim_sub(l, r, v) atomic_long_sub(v, &__rlim_rcur(l, r))
10474 +#if (RLIM_INFINITY == VLIM_INFINITY)
10475 +#define VX_VLIM(r) ((long long)(long)(r))
10476 +#define VX_RLIM(v) ((rlim_t)(v))
10478 +#define VX_VLIM(r) (((r) == RLIM_INFINITY) \
10479 + ? VLIM_INFINITY : (long long)(r))
10480 +#define VX_RLIM(v) (((v) == VLIM_INFINITY) \
10481 + ? RLIM_INFINITY : (rlim_t)(v))
10486 +#ifdef CONFIG_MEMCG
10487 +void vx_vsi_meminfo(struct sysinfo *);
10488 +void vx_vsi_swapinfo(struct sysinfo *);
10489 +long vx_vsi_cached(struct sysinfo *);
10490 +#else /* !CONFIG_MEMCG */
10491 +#define vx_vsi_meminfo(s) do { } while (0)
10492 +#define vx_vsi_swapinfo(s) do { } while (0)
10493 +#define vx_vsi_cached(s) (0L)
10494 +#endif /* !CONFIG_MEMCG */
10496 +#define NUM_LIMITS 24
10498 +#endif /* _VSERVER_LIMIT_H */
10499 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/limit_cmd.h linux-4.4/include/linux/vserver/limit_cmd.h
10500 --- linux-4.4/include/linux/vserver/limit_cmd.h 1970-01-01 01:00:00.000000000 +0100
10501 +++ linux-4.4/include/linux/vserver/limit_cmd.h 2021-02-24 16:56:24.586156094 +0100
10503 +#ifndef _VSERVER_LIMIT_CMD_H
10504 +#define _VSERVER_LIMIT_CMD_H
10506 +#include <uapi/vserver/limit_cmd.h>
10509 +#ifdef CONFIG_IA32_EMULATION
10511 +struct vcmd_ctx_rlimit_v0_x32 {
10513 + uint64_t minimum;
10514 + uint64_t softlimit;
10515 + uint64_t maximum;
10516 +} __attribute__ ((packed));
10518 +#endif /* CONFIG_IA32_EMULATION */
10520 +#include <linux/compiler.h>
10522 +extern int vc_get_rlimit_mask(uint32_t, void __user *);
10523 +extern int vc_get_rlimit(struct vx_info *, void __user *);
10524 +extern int vc_set_rlimit(struct vx_info *, void __user *);
10525 +extern int vc_reset_hits(struct vx_info *, void __user *);
10526 +extern int vc_reset_minmax(struct vx_info *, void __user *);
10528 +extern int vc_rlimit_stat(struct vx_info *, void __user *);
10530 +#ifdef CONFIG_IA32_EMULATION
10532 +extern int vc_get_rlimit_x32(struct vx_info *, void __user *);
10533 +extern int vc_set_rlimit_x32(struct vx_info *, void __user *);
10535 +#endif /* CONFIG_IA32_EMULATION */
10537 +#endif /* _VSERVER_LIMIT_CMD_H */
10538 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/limit_def.h linux-4.4/include/linux/vserver/limit_def.h
10539 --- linux-4.4/include/linux/vserver/limit_def.h 1970-01-01 01:00:00.000000000 +0100
10540 +++ linux-4.4/include/linux/vserver/limit_def.h 2021-02-24 16:56:24.586156094 +0100
10542 +#ifndef _VSERVER_LIMIT_DEF_H
10543 +#define _VSERVER_LIMIT_DEF_H
10545 +#include <asm/atomic.h>
10546 +#include <asm/resource.h>
10548 +#include "limit.h"
10551 +struct _vx_res_limit {
10552 + rlim_t soft; /* Context soft limit */
10553 + rlim_t hard; /* Context hard limit */
10555 + rlim_atomic_t rcur; /* Current value */
10556 + rlim_t rmin; /* Context minimum */
10557 + rlim_t rmax; /* Context maximum */
10559 + atomic_t lhit; /* Limit hits */
10562 +/* context sub struct */
10564 +struct _vx_limit {
10565 + struct _vx_res_limit res[NUM_LIMITS];
10568 +#ifdef CONFIG_VSERVER_DEBUG
10570 +static inline void __dump_vx_limit(struct _vx_limit *limit)
10574 + printk("\t_vx_limit:");
10575 + for (i = 0; i < NUM_LIMITS; i++) {
10576 + printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
10577 + i, (unsigned long)__rlim_get(limit, i),
10578 + (unsigned long)__rlim_rmin(limit, i),
10579 + (unsigned long)__rlim_rmax(limit, i),
10580 + (long)__rlim_soft(limit, i),
10581 + (long)__rlim_hard(limit, i),
10582 + atomic_read(&__rlim_lhit(limit, i)));
10588 +#endif /* _VSERVER_LIMIT_DEF_H */
10589 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/limit_int.h linux-4.4/include/linux/vserver/limit_int.h
10590 --- linux-4.4/include/linux/vserver/limit_int.h 1970-01-01 01:00:00.000000000 +0100
10591 +++ linux-4.4/include/linux/vserver/limit_int.h 2021-02-24 16:56:24.586156094 +0100
10593 +#ifndef _VSERVER_LIMIT_INT_H
10594 +#define _VSERVER_LIMIT_INT_H
10596 +#define VXD_RCRES_COND(r) VXD_CBIT(cres, r)
10597 +#define VXD_RLIMIT_COND(r) VXD_CBIT(limit, r)
10599 +extern const char *vlimit_name[NUM_LIMITS];
10601 +static inline void __vx_acc_cres(struct vx_info *vxi,
10602 + int res, int dir, void *_data, char *_file, int _line)
10604 + if (VXD_RCRES_COND(res))
10605 + vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
10606 + (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10607 + (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10608 + (dir > 0) ? "++" : "--", _data, _file, _line);
10613 + __rlim_inc(&vxi->limit, res);
10615 + __rlim_dec(&vxi->limit, res);
10618 +static inline void __vx_add_cres(struct vx_info *vxi,
10619 + int res, int amount, void *_data, char *_file, int _line)
10621 + if (VXD_RCRES_COND(res))
10622 + vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
10623 + (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10624 + (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10625 + amount, _data, _file, _line);
10630 + __rlim_add(&vxi->limit, res, amount);
10634 +int __vx_cres_adjust_max(struct _vx_limit *limit, int res, rlim_t value)
10636 + int cond = (value > __rlim_rmax(limit, res));
10639 + __rlim_rmax(limit, res) = value;
10644 +int __vx_cres_adjust_min(struct _vx_limit *limit, int res, rlim_t value)
10646 + int cond = (value < __rlim_rmin(limit, res));
10649 + __rlim_rmin(limit, res) = value;
10654 +void __vx_cres_fixup(struct _vx_limit *limit, int res, rlim_t value)
10656 + if (!__vx_cres_adjust_max(limit, res, value))
10657 + __vx_cres_adjust_min(limit, res, value);
10662 + +1 ... no limit hit
10663 + -1 ... over soft limit
10664 + 0 ... over hard limit */
10666 +static inline int __vx_cres_avail(struct vx_info *vxi,
10667 + int res, int num, char *_file, int _line)
10669 + struct _vx_limit *limit;
10672 + if (VXD_RLIMIT_COND(res))
10673 + vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
10674 + (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10675 + (vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
10676 + (vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
10677 + (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10678 + num, _file, _line);
10682 + limit = &vxi->limit;
10683 + value = __rlim_get(limit, res);
10685 + if (!__vx_cres_adjust_max(limit, res, value))
10686 + __vx_cres_adjust_min(limit, res, value);
10691 + if (__rlim_soft(limit, res) == RLIM_INFINITY)
10693 + if (value + num <= __rlim_soft(limit, res))
10696 + if (__rlim_hard(limit, res) == RLIM_INFINITY)
10698 + if (value + num <= __rlim_hard(limit, res))
10701 + __rlim_hit(limit, res);
10706 +static const int VLA_RSS[] = { RLIMIT_RSS, VLIMIT_ANON, VLIMIT_MAPPED, 0 };
10709 +rlim_t __vx_cres_array_sum(struct _vx_limit *limit, const int *array)
10711 + rlim_t value, sum = 0;
10714 + while ((res = *array++)) {
10715 + value = __rlim_get(limit, res);
10716 + __vx_cres_fixup(limit, res, value);
10723 +rlim_t __vx_cres_array_fixup(struct _vx_limit *limit, const int *array)
10725 + rlim_t value = __vx_cres_array_sum(limit, array + 1);
10726 + int res = *array;
10728 + if (value == __rlim_get(limit, res))
10731 + __rlim_set(limit, res, value);
10732 + /* now adjust min/max */
10733 + if (!__vx_cres_adjust_max(limit, res, value))
10734 + __vx_cres_adjust_min(limit, res, value);
10739 +static inline int __vx_cres_array_avail(struct vx_info *vxi,
10740 + const int *array, int num, char *_file, int _line)
10742 + struct _vx_limit *limit;
10743 + rlim_t value = 0;
10751 + limit = &vxi->limit;
10753 + value = __vx_cres_array_sum(limit, array + 1);
10755 + __rlim_set(limit, res, value);
10756 + __vx_cres_fixup(limit, res, value);
10758 + return __vx_cres_avail(vxi, res, num, _file, _line);
10762 +static inline void vx_limit_fixup(struct _vx_limit *limit, int id)
10767 + /* complex resources first */
10768 + if ((id < 0) || (id == RLIMIT_RSS))
10769 + __vx_cres_array_fixup(limit, VLA_RSS);
10771 + for (res = 0; res < NUM_LIMITS; res++) {
10772 + if ((id > 0) && (res != id))
10775 + value = __rlim_get(limit, res);
10776 + __vx_cres_fixup(limit, res, value);
10778 + /* not supposed to happen, maybe warn? */
10779 + if (__rlim_rmax(limit, res) > __rlim_hard(limit, res))
10780 + __rlim_rmax(limit, res) = __rlim_hard(limit, res);
10785 +#endif /* _VSERVER_LIMIT_INT_H */
10786 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/monitor.h linux-4.4/include/linux/vserver/monitor.h
10787 --- linux-4.4/include/linux/vserver/monitor.h 1970-01-01 01:00:00.000000000 +0100
10788 +++ linux-4.4/include/linux/vserver/monitor.h 2021-02-24 16:56:24.586156094 +0100
10790 +#ifndef _VSERVER_MONITOR_H
10791 +#define _VSERVER_MONITOR_H
10793 +#include <uapi/vserver/monitor.h>
10795 +#endif /* _VSERVER_MONITOR_H */
10796 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/network.h linux-4.4/include/linux/vserver/network.h
10797 --- linux-4.4/include/linux/vserver/network.h 1970-01-01 01:00:00.000000000 +0100
10798 +++ linux-4.4/include/linux/vserver/network.h 2021-02-24 16:56:24.589489533 +0100
10800 +#ifndef _VSERVER_NETWORK_H
10801 +#define _VSERVER_NETWORK_H
10804 +#include <linux/list.h>
10805 +#include <linux/spinlock.h>
10806 +#include <linux/rcupdate.h>
10807 +#include <linux/in.h>
10808 +#include <linux/in6.h>
10809 +#include <asm/atomic.h>
10810 +#include <uapi/vserver/network.h>
10812 +struct nx_addr_v4 {
10813 + struct nx_addr_v4 *next;
10814 + struct in_addr ip[2];
10815 + struct in_addr mask;
10820 +struct nx_addr_v6 {
10821 + struct nx_addr_v6 *next;
10822 + struct in6_addr ip;
10823 + struct in6_addr mask;
10830 + struct hlist_node nx_hlist; /* linked list of nxinfos */
10831 + vnid_t nx_id; /* vnet id */
10832 + atomic_t nx_usecnt; /* usage count */
10833 + atomic_t nx_tasks; /* tasks count */
10834 + int nx_state; /* context state */
10836 + uint64_t nx_flags; /* network flag word */
10837 + uint64_t nx_ncaps; /* network capabilities */
10839 + spinlock_t addr_lock; /* protect address changes */
10840 + struct in_addr v4_lback; /* Loopback address */
10841 + struct in_addr v4_bcast; /* Broadcast address */
10842 + struct nx_addr_v4 v4; /* First/Single ipv4 address */
10843 +#ifdef CONFIG_IPV6
10844 + struct nx_addr_v6 v6; /* First/Single ipv6 address */
10846 + char nx_name[65]; /* network context name */
10850 +/* status flags */
10852 +#define NXS_HASHED 0x0001
10853 +#define NXS_SHUTDOWN 0x0100
10854 +#define NXS_RELEASED 0x8000
10856 +extern struct nx_info *lookup_nx_info(int);
10858 +extern int get_nid_list(int, unsigned int *, int);
10859 +extern int nid_is_hashed(vnid_t);
10861 +extern int nx_migrate_task(struct task_struct *, struct nx_info *);
10863 +extern long vs_net_change(struct nx_info *, unsigned int);
10868 +#define NX_IPV4(n) ((n)->v4.type != NXA_TYPE_NONE)
10869 +#ifdef CONFIG_IPV6
10870 +#define NX_IPV6(n) ((n)->v6.type != NXA_TYPE_NONE)
10872 +#define NX_IPV6(n) (0)
10875 +#endif /* _VSERVER_NETWORK_H */
10876 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/network_cmd.h linux-4.4/include/linux/vserver/network_cmd.h
10877 --- linux-4.4/include/linux/vserver/network_cmd.h 1970-01-01 01:00:00.000000000 +0100
10878 +++ linux-4.4/include/linux/vserver/network_cmd.h 2021-02-24 16:56:24.589489533 +0100
10880 +#ifndef _VSERVER_NETWORK_CMD_H
10881 +#define _VSERVER_NETWORK_CMD_H
10883 +#include <uapi/vserver/network_cmd.h>
10885 +extern int vc_task_nid(uint32_t);
10887 +extern int vc_nx_info(struct nx_info *, void __user *);
10889 +extern int vc_net_create(uint32_t, void __user *);
10890 +extern int vc_net_migrate(struct nx_info *, void __user *);
10892 +extern int vc_net_add(struct nx_info *, void __user *);
10893 +extern int vc_net_remove(struct nx_info *, void __user *);
10895 +extern int vc_net_add_ipv4_v1(struct nx_info *, void __user *);
10896 +extern int vc_net_add_ipv4(struct nx_info *, void __user *);
10898 +extern int vc_net_rem_ipv4_v1(struct nx_info *, void __user *);
10899 +extern int vc_net_rem_ipv4(struct nx_info *, void __user *);
10901 +extern int vc_net_add_ipv6(struct nx_info *, void __user *);
10902 +extern int vc_net_remove_ipv6(struct nx_info *, void __user *);
10904 +extern int vc_add_match_ipv4(struct nx_info *, void __user *);
10905 +extern int vc_get_match_ipv4(struct nx_info *, void __user *);
10907 +extern int vc_add_match_ipv6(struct nx_info *, void __user *);
10908 +extern int vc_get_match_ipv6(struct nx_info *, void __user *);
10910 +extern int vc_get_nflags(struct nx_info *, void __user *);
10911 +extern int vc_set_nflags(struct nx_info *, void __user *);
10913 +extern int vc_get_ncaps(struct nx_info *, void __user *);
10914 +extern int vc_set_ncaps(struct nx_info *, void __user *);
10916 +#endif /* _VSERVER_CONTEXT_CMD_H */
10917 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/percpu.h linux-4.4/include/linux/vserver/percpu.h
10918 --- linux-4.4/include/linux/vserver/percpu.h 1970-01-01 01:00:00.000000000 +0100
10919 +++ linux-4.4/include/linux/vserver/percpu.h 2021-02-24 16:56:24.589489533 +0100
10921 +#ifndef _VSERVER_PERCPU_H
10922 +#define _VSERVER_PERCPU_H
10924 +#include "cvirt_def.h"
10925 +#include "sched_def.h"
10927 +struct _vx_percpu {
10928 + struct _vx_cvirt_pc cvirt;
10929 + struct _vx_sched_pc sched;
10932 +#define PERCPU_PERCTX (sizeof(struct _vx_percpu))
10934 +#endif /* _VSERVER_PERCPU_H */
10935 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/pid.h linux-4.4/include/linux/vserver/pid.h
10936 --- linux-4.4/include/linux/vserver/pid.h 1970-01-01 01:00:00.000000000 +0100
10937 +++ linux-4.4/include/linux/vserver/pid.h 2021-02-24 16:56:24.589489533 +0100
10939 +#ifndef _VSERVER_PID_H
10940 +#define _VSERVER_PID_H
10942 +/* pid faking stuff */
10944 +#define vx_info_map_pid(v, p) \
10945 + __vx_info_map_pid((v), (p), __func__, __FILE__, __LINE__)
10946 +#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p)
10947 +#define vx_map_pid(p) vx_info_map_pid(current_vx_info(), p)
10948 +#define vx_map_tgid(p) vx_map_pid(p)
10950 +static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
10951 + const char *func, const char *file, int line)
10953 + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
10954 + vxfprintk(VXD_CBIT(cvirt, 2),
10955 + "vx_map_tgid: %p/%llx: %d -> %d",
10956 + vxi, (long long)vxi->vx_flags, pid,
10957 + (pid && pid == vxi->vx_initpid) ? 1 : pid,
10958 + func, file, line);
10961 + if (pid == vxi->vx_initpid)
10967 +#define vx_info_rmap_pid(v, p) \
10968 + __vx_info_rmap_pid((v), (p), __func__, __FILE__, __LINE__)
10969 +#define vx_rmap_pid(p) vx_info_rmap_pid(current_vx_info(), p)
10970 +#define vx_rmap_tgid(p) vx_rmap_pid(p)
10972 +static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
10973 + const char *func, const char *file, int line)
10975 + if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
10976 + vxfprintk(VXD_CBIT(cvirt, 2),
10977 + "vx_rmap_tgid: %p/%llx: %d -> %d",
10978 + vxi, (long long)vxi->vx_flags, pid,
10979 + (pid == 1) ? vxi->vx_initpid : pid,
10980 + func, file, line);
10981 + if ((pid == 1) && vxi->vx_initpid)
10982 + return vxi->vx_initpid;
10983 + if (pid == vxi->vx_initpid)
10990 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/sched.h linux-4.4/include/linux/vserver/sched.h
10991 --- linux-4.4/include/linux/vserver/sched.h 1970-01-01 01:00:00.000000000 +0100
10992 +++ linux-4.4/include/linux/vserver/sched.h 2021-02-24 16:56:24.589489533 +0100
10994 +#ifndef _VSERVER_SCHED_H
10995 +#define _VSERVER_SCHED_H
11002 +void vx_vsi_uptime(struct timespec *, struct timespec *);
11007 +void vx_update_load(struct vx_info *);
11010 +void vx_update_sched_param(struct _vx_sched *sched,
11011 + struct _vx_sched_pc *sched_pc);
11013 +#endif /* __KERNEL__ */
11014 +#else /* _VSERVER_SCHED_H */
11015 +#warning duplicate inclusion
11016 +#endif /* _VSERVER_SCHED_H */
11017 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/sched_cmd.h linux-4.4/include/linux/vserver/sched_cmd.h
11018 --- linux-4.4/include/linux/vserver/sched_cmd.h 1970-01-01 01:00:00.000000000 +0100
11019 +++ linux-4.4/include/linux/vserver/sched_cmd.h 2021-02-24 16:56:24.589489533 +0100
11021 +#ifndef _VSERVER_SCHED_CMD_H
11022 +#define _VSERVER_SCHED_CMD_H
11025 +#include <linux/compiler.h>
11026 +#include <uapi/vserver/sched_cmd.h>
11028 +extern int vc_set_prio_bias(struct vx_info *, void __user *);
11029 +extern int vc_get_prio_bias(struct vx_info *, void __user *);
11031 +#endif /* _VSERVER_SCHED_CMD_H */
11032 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/sched_def.h linux-4.4/include/linux/vserver/sched_def.h
11033 --- linux-4.4/include/linux/vserver/sched_def.h 1970-01-01 01:00:00.000000000 +0100
11034 +++ linux-4.4/include/linux/vserver/sched_def.h 2021-02-24 16:56:24.589489533 +0100
11036 +#ifndef _VSERVER_SCHED_DEF_H
11037 +#define _VSERVER_SCHED_DEF_H
11039 +#include <linux/spinlock.h>
11040 +#include <linux/jiffies.h>
11041 +#include <linux/cpumask.h>
11042 +#include <asm/atomic.h>
11043 +#include <asm/param.h>
11046 +/* context sub struct */
11048 +struct _vx_sched {
11049 + int prio_bias; /* bias offset for priority */
11051 + cpumask_t update; /* CPUs which should update */
11054 +struct _vx_sched_pc {
11055 + int prio_bias; /* bias offset for priority */
11057 + uint64_t user_ticks; /* token tick events */
11058 + uint64_t sys_ticks; /* token tick events */
11059 + uint64_t hold_ticks; /* token ticks paused */
11063 +#ifdef CONFIG_VSERVER_DEBUG
11065 +static inline void __dump_vx_sched(struct _vx_sched *sched)
11067 + printk("\t_vx_sched:\n");
11068 + printk("\t priority = %4d\n", sched->prio_bias);
11073 +#endif /* _VSERVER_SCHED_DEF_H */
11074 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/signal.h linux-4.4/include/linux/vserver/signal.h
11075 --- linux-4.4/include/linux/vserver/signal.h 1970-01-01 01:00:00.000000000 +0100
11076 +++ linux-4.4/include/linux/vserver/signal.h 2021-02-24 16:56:24.589489533 +0100
11078 +#ifndef _VSERVER_SIGNAL_H
11079 +#define _VSERVER_SIGNAL_H
11086 +int vx_info_kill(struct vx_info *, int, int);
11088 +#endif /* __KERNEL__ */
11089 +#else /* _VSERVER_SIGNAL_H */
11090 +#warning duplicate inclusion
11091 +#endif /* _VSERVER_SIGNAL_H */
11092 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/signal_cmd.h linux-4.4/include/linux/vserver/signal_cmd.h
11093 --- linux-4.4/include/linux/vserver/signal_cmd.h 1970-01-01 01:00:00.000000000 +0100
11094 +++ linux-4.4/include/linux/vserver/signal_cmd.h 2021-02-24 16:56:24.589489533 +0100
11096 +#ifndef _VSERVER_SIGNAL_CMD_H
11097 +#define _VSERVER_SIGNAL_CMD_H
11099 +#include <uapi/vserver/signal_cmd.h>
11102 +extern int vc_ctx_kill(struct vx_info *, void __user *);
11103 +extern int vc_wait_exit(struct vx_info *, void __user *);
11106 +extern int vc_get_pflags(uint32_t pid, void __user *);
11107 +extern int vc_set_pflags(uint32_t pid, void __user *);
11109 +#endif /* _VSERVER_SIGNAL_CMD_H */
11110 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/space.h linux-4.4/include/linux/vserver/space.h
11111 --- linux-4.4/include/linux/vserver/space.h 1970-01-01 01:00:00.000000000 +0100
11112 +++ linux-4.4/include/linux/vserver/space.h 2021-02-24 16:56:24.589489533 +0100
11114 +#ifndef _VSERVER_SPACE_H
11115 +#define _VSERVER_SPACE_H
11117 +#include <linux/types.h>
11121 +int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index);
11123 +#else /* _VSERVER_SPACE_H */
11124 +#warning duplicate inclusion
11125 +#endif /* _VSERVER_SPACE_H */
11126 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/space_cmd.h linux-4.4/include/linux/vserver/space_cmd.h
11127 --- linux-4.4/include/linux/vserver/space_cmd.h 1970-01-01 01:00:00.000000000 +0100
11128 +++ linux-4.4/include/linux/vserver/space_cmd.h 2021-02-24 16:56:24.589489533 +0100
11130 +#ifndef _VSERVER_SPACE_CMD_H
11131 +#define _VSERVER_SPACE_CMD_H
11133 +#include <uapi/vserver/space_cmd.h>
11136 +extern int vc_enter_space_v1(struct vx_info *, void __user *);
11137 +extern int vc_set_space_v1(struct vx_info *, void __user *);
11138 +extern int vc_enter_space(struct vx_info *, void __user *);
11139 +extern int vc_set_space(struct vx_info *, void __user *);
11140 +extern int vc_get_space_mask(void __user *, int);
11142 +#endif /* _VSERVER_SPACE_CMD_H */
11143 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/switch.h linux-4.4/include/linux/vserver/switch.h
11144 --- linux-4.4/include/linux/vserver/switch.h 1970-01-01 01:00:00.000000000 +0100
11145 +++ linux-4.4/include/linux/vserver/switch.h 2021-02-24 16:56:24.589489533 +0100
11147 +#ifndef _VSERVER_SWITCH_H
11148 +#define _VSERVER_SWITCH_H
11151 +#include <linux/errno.h>
11152 +#include <uapi/vserver/switch.h>
11154 +#endif /* _VSERVER_SWITCH_H */
11155 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/tag.h linux-4.4/include/linux/vserver/tag.h
11156 --- linux-4.4/include/linux/vserver/tag.h 1970-01-01 01:00:00.000000000 +0100
11157 +++ linux-4.4/include/linux/vserver/tag.h 2021-02-24 16:56:24.589489533 +0100
11162 +#include <linux/types.h>
11163 +#include <linux/uidgid.h>
11166 +#define DX_TAG(in) (IS_TAGGED(in))
11169 +#ifdef CONFIG_TAG_NFSD
11170 +#define DX_TAG_NFSD 1
11172 +#define DX_TAG_NFSD 0
11176 +#ifdef CONFIG_TAGGING_NONE
11178 +#define MAX_UID 0xFFFFFFFF
11179 +#define MAX_GID 0xFFFFFFFF
11181 +#define INOTAG_TAG(cond, uid, gid, tag) (0)
11183 +#define TAGINO_UID(cond, uid, tag) (uid)
11184 +#define TAGINO_GID(cond, gid, tag) (gid)
11189 +#ifdef CONFIG_TAGGING_GID16
11191 +#define MAX_UID 0xFFFFFFFF
11192 +#define MAX_GID 0x0000FFFF
11194 +#define INOTAG_TAG(cond, uid, gid, tag) \
11195 + ((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
11197 +#define TAGINO_UID(cond, uid, tag) (uid)
11198 +#define TAGINO_GID(cond, gid, tag) \
11199 + ((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
11204 +#ifdef CONFIG_TAGGING_ID24
11206 +#define MAX_UID 0x00FFFFFF
11207 +#define MAX_GID 0x00FFFFFF
11209 +#define INOTAG_TAG(cond, uid, gid, tag) \
11210 + ((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
11212 +#define TAGINO_UID(cond, uid, tag) \
11213 + ((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
11214 +#define TAGINO_GID(cond, gid, tag) \
11215 + ((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
11220 +#ifdef CONFIG_TAGGING_UID16
11222 +#define MAX_UID 0x0000FFFF
11223 +#define MAX_GID 0xFFFFFFFF
11225 +#define INOTAG_TAG(cond, uid, gid, tag) \
11226 + ((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
11228 +#define TAGINO_UID(cond, uid, tag) \
11229 + ((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
11230 +#define TAGINO_GID(cond, gid, tag) (gid)
11235 +#ifdef CONFIG_TAGGING_INTERN
11237 +#define MAX_UID 0xFFFFFFFF
11238 +#define MAX_GID 0xFFFFFFFF
11240 +#define INOTAG_TAG(cond, uid, gid, tag) \
11241 + ((cond) ? (tag) : 0)
11243 +#define TAGINO_UID(cond, uid, tag) (uid)
11244 +#define TAGINO_GID(cond, gid, tag) (gid)
11249 +#ifndef CONFIG_TAGGING_NONE
11250 +#define dx_current_fstag(sb) \
11251 + ((sb)->s_flags & MS_TAGGED ? dx_current_tag() : 0)
11253 +#define dx_current_fstag(sb) (0)
11256 +#ifndef CONFIG_TAGGING_INTERN
11257 +#define TAGINO_TAG(cond, tag) (0)
11259 +#define TAGINO_TAG(cond, tag) ((cond) ? (tag) : 0)
11262 +#define TAGINO_KUID(cond, kuid, ktag) \
11263 + KUIDT_INIT(TAGINO_UID(cond, __kuid_val(kuid), __ktag_val(ktag)))
11264 +#define TAGINO_KGID(cond, kgid, ktag) \
11265 + KGIDT_INIT(TAGINO_GID(cond, __kgid_val(kgid), __ktag_val(ktag)))
11266 +#define TAGINO_KTAG(cond, ktag) \
11267 + KTAGT_INIT(TAGINO_TAG(cond, __ktag_val(ktag)))
11270 +#define INOTAG_UID(cond, uid, gid) \
11271 + ((cond) ? ((uid) & MAX_UID) : (uid))
11272 +#define INOTAG_GID(cond, uid, gid) \
11273 + ((cond) ? ((gid) & MAX_GID) : (gid))
11275 +#define INOTAG_KUID(cond, kuid, kgid) \
11276 + KUIDT_INIT(INOTAG_UID(cond, __kuid_val(kuid), __kgid_val(kgid)))
11277 +#define INOTAG_KGID(cond, kuid, kgid) \
11278 + KGIDT_INIT(INOTAG_GID(cond, __kuid_val(kuid), __kgid_val(kgid)))
11279 +#define INOTAG_KTAG(cond, kuid, kgid, ktag) \
11280 + KTAGT_INIT(INOTAG_TAG(cond, \
11281 + __kuid_val(kuid), __kgid_val(kgid), __ktag_val(ktag)))
11284 +static inline uid_t dx_map_uid(uid_t uid)
11286 + if ((uid > MAX_UID) && (uid != -1))
11288 + return (uid & MAX_UID);
11291 +static inline gid_t dx_map_gid(gid_t gid)
11293 + if ((gid > MAX_GID) && (gid != -1))
11295 + return (gid & MAX_GID);
11303 +#define dx_notagcheck(sb) ((sb) && ((sb)->s_flags & MS_NOTAGCHECK))
11305 +int dx_parse_tag(char *string, vtag_t *tag, int remove, int *mnt_flags,
11306 + unsigned long *flags);
11308 +#ifdef CONFIG_PROPAGATE
11310 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
11312 +#define dx_propagate_tag(n, i) __dx_propagate_tag(n, i)
11315 +#define dx_propagate_tag(n, i) do { } while (0)
11318 +#endif /* _DX_TAG_H */
11319 diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/tag_cmd.h linux-4.4/include/linux/vserver/tag_cmd.h
11320 --- linux-4.4/include/linux/vserver/tag_cmd.h 1970-01-01 01:00:00.000000000 +0100
11321 +++ linux-4.4/include/linux/vserver/tag_cmd.h 2021-02-24 16:56:24.589489533 +0100
11323 +#ifndef _VSERVER_TAG_CMD_H
11324 +#define _VSERVER_TAG_CMD_H
11326 +#include <uapi/vserver/tag_cmd.h>
11328 +extern int vc_task_tag(uint32_t);
11330 +extern int vc_tag_migrate(uint32_t);
11332 +#endif /* _VSERVER_TAG_CMD_H */
11333 diff -urNp -x '*.orig' linux-4.4/include/net/addrconf.h linux-4.4/include/net/addrconf.h
11334 --- linux-4.4/include/net/addrconf.h 2021-02-24 16:56:11.899089877 +0100
11335 +++ linux-4.4/include/net/addrconf.h 2021-02-24 16:56:24.589489533 +0100
11336 @@ -84,7 +84,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(str
11338 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
11339 const struct in6_addr *daddr, unsigned int srcprefs,
11340 - struct in6_addr *saddr);
11341 + struct in6_addr *saddr, struct nx_info *nxi);
11342 int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
11344 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
11345 diff -urNp -x '*.orig' linux-4.4/include/net/af_unix.h linux-4.4/include/net/af_unix.h
11346 --- linux-4.4/include/net/af_unix.h 2021-02-24 16:56:11.899089877 +0100
11347 +++ linux-4.4/include/net/af_unix.h 2021-02-24 16:56:24.589489533 +0100
11349 #include <linux/socket.h>
11350 #include <linux/un.h>
11351 #include <linux/mutex.h>
11352 +// #include <linux/vs_base.h>
11353 #include <net/sock.h>
11355 void unix_inflight(struct user_struct *user, struct file *fp);
11356 diff -urNp -x '*.orig' linux-4.4/include/net/inet_timewait_sock.h linux-4.4/include/net/inet_timewait_sock.h
11357 --- linux-4.4/include/net/inet_timewait_sock.h 2021-02-24 16:56:11.905756754 +0100
11358 +++ linux-4.4/include/net/inet_timewait_sock.h 2021-02-24 16:56:24.589489533 +0100
11359 @@ -72,6 +72,10 @@ struct inet_timewait_sock {
11360 #define tw_num __tw_common.skc_num
11361 #define tw_cookie __tw_common.skc_cookie
11362 #define tw_dr __tw_common.skc_tw_dr
11363 +#define tw_xid __tw_common.skc_xid
11364 +#define tw_vx_info __tw_common.skc_vx_info
11365 +#define tw_nid __tw_common.skc_nid
11366 +#define tw_nx_info __tw_common.skc_nx_info
11369 volatile unsigned char tw_substate;
11370 diff -urNp -x '*.orig' linux-4.4/include/net/ip6_route.h linux-4.4/include/net/ip6_route.h
11371 --- linux-4.4/include/net/ip6_route.h 2021-02-24 16:56:11.905756754 +0100
11372 +++ linux-4.4/include/net/ip6_route.h 2021-02-24 16:56:24.592822971 +0100
11373 @@ -90,7 +90,7 @@ int ip6_del_rt(struct rt6_info *);
11375 int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
11376 const struct in6_addr *daddr, unsigned int prefs,
11377 - struct in6_addr *saddr);
11378 + struct in6_addr *saddr, struct nx_info *nxi);
11380 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
11381 const struct in6_addr *saddr, int oif, int flags);
11382 diff -urNp -x '*.orig' linux-4.4/include/net/route.h linux-4.4/include/net/route.h
11383 --- linux-4.4/include/net/route.h 2021-02-24 16:56:11.909090192 +0100
11384 +++ linux-4.4/include/net/route.h 2021-02-24 16:56:24.592822971 +0100
11385 @@ -227,6 +227,9 @@ static inline void ip_rt_put(struct rtab
11386 dst_release(&rt->dst);
11389 +#include <linux/vs_base.h>
11390 +#include <linux/vs_inet.h>
11392 #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3)
11394 extern const __u8 ip_tos2prio[16];
11395 @@ -274,6 +277,9 @@ static inline void ip_route_connect_init
11396 protocol, flow_flags, dst, src, dport, sport);
11399 +extern struct rtable *ip_v4_find_src(struct net *net, struct nx_info *,
11400 + struct flowi4 *);
11402 static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
11403 __be32 dst, __be32 src, u32 tos,
11404 int oif, u8 protocol,
11405 @@ -282,6 +288,7 @@ static inline struct rtable *ip_route_co
11407 struct net *net = sock_net(sk);
11409 + struct nx_info *nx_info = current_nx_info();
11411 ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
11413 @@ -295,7 +302,21 @@ static inline struct rtable *ip_route_co
11417 - if (!dst || !src) {
11420 + nx_info = sk->sk_nx_info;
11422 + vxdprintk(VXD_CBIT(net, 4),
11423 + "ip_route_connect(%p) %p,%p;%lx",
11424 + sk, nx_info, sk->sk_socket,
11425 + (sk->sk_socket?sk->sk_socket->flags:0));
11427 + rt = ip_v4_find_src(net, nx_info, fl4);
11432 + if (!fl4->daddr || !fl4->saddr) {
11433 rt = __ip_route_output_key(net, fl4);
11436 diff -urNp -x '*.orig' linux-4.4/include/net/sock.h linux-4.4/include/net/sock.h
11437 --- linux-4.4/include/net/sock.h 2021-02-24 16:56:11.912423630 +0100
11438 +++ linux-4.4/include/net/sock.h 2021-02-24 16:56:24.592822971 +0100
11439 @@ -201,6 +201,10 @@ struct sock_common {
11440 struct in6_addr skc_v6_daddr;
11441 struct in6_addr skc_v6_rcv_saddr;
11444 + struct vx_info *skc_vx_info;
11446 + struct nx_info *skc_nx_info;
11448 atomic64_t skc_cookie;
11450 @@ -350,8 +354,12 @@ struct sock {
11451 #define sk_prot __sk_common.skc_prot
11452 #define sk_net __sk_common.skc_net
11453 #define sk_v6_daddr __sk_common.skc_v6_daddr
11454 -#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
11455 +#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
11456 #define sk_cookie __sk_common.skc_cookie
11457 +#define sk_xid __sk_common.skc_xid
11458 +#define sk_vx_info __sk_common.skc_vx_info
11459 +#define sk_nid __sk_common.skc_nid
11460 +#define sk_nx_info __sk_common.skc_nx_info
11461 #define sk_incoming_cpu __sk_common.skc_incoming_cpu
11462 #define sk_flags __sk_common.skc_flags
11463 #define sk_rxhash __sk_common.skc_rxhash
11464 diff -urNp -x '*.orig' linux-4.4/include/uapi/Kbuild linux-4.4/include/uapi/Kbuild
11465 --- linux-4.4/include/uapi/Kbuild 2016-01-11 00:01:32.000000000 +0100
11466 +++ linux-4.4/include/uapi/Kbuild 2021-02-24 16:56:24.592822971 +0100
11467 @@ -13,3 +13,4 @@ header-y += drm/
11471 +header-y += vserver/
11472 diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/capability.h linux-4.4/include/uapi/linux/capability.h
11473 --- linux-4.4/include/uapi/linux/capability.h 2016-01-11 00:01:32.000000000 +0100
11474 +++ linux-4.4/include/uapi/linux/capability.h 2021-02-24 16:56:24.592822971 +0100
11475 @@ -259,6 +259,7 @@ struct vfs_cap_data {
11476 arbitrary SCSI commands */
11477 /* Allow setting encryption key on loopback filesystem */
11478 /* Allow setting zone reclaim policy */
11479 +/* Allow the selection of a security context */
11481 #define CAP_SYS_ADMIN 21
11483 @@ -354,7 +355,12 @@ struct vfs_cap_data {
11485 #define CAP_LAST_CAP CAP_AUDIT_READ
11487 -#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
11488 +/* Allow context manipulations */
11489 +/* Allow changing context info on files */
11491 +#define CAP_CONTEXT 63
11493 +#define cap_valid(x) ((x) >= 0 && ((x) <= CAP_LAST_CAP || (x) == CAP_CONTEXT))
11496 * Bit location of each capability (used by user-space library and kernel)
11497 diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/fs.h linux-4.4/include/uapi/linux/fs.h
11498 --- linux-4.4/include/uapi/linux/fs.h 2016-01-11 00:01:32.000000000 +0100
11499 +++ linux-4.4/include/uapi/linux/fs.h 2021-02-24 16:56:24.592822971 +0100
11500 @@ -91,6 +91,9 @@ struct inodes_stat_t {
11501 #define MS_I_VERSION (1<<23) /* Update inode I_version field */
11502 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
11503 #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
11504 +#define MS_TAGGED (1<<8) /* use generic inode tagging */
11505 +#define MS_NOTAGCHECK (1<<9) /* don't check tags */
11506 +#define MS_TAGID (1<<26) /* use specific tag for this mount */
11508 /* These sb flags are internal to the kernel */
11509 #define MS_NOSEC (1<<28)
11510 @@ -197,12 +200,15 @@ struct inodes_stat_t {
11511 #define FS_EXTENT_FL 0x00080000 /* Extents */
11512 #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */
11513 #define FS_NOCOW_FL 0x00800000 /* Do not cow file */
11514 +#define FS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
11515 #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
11516 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
11518 -#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
11519 -#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
11520 +#define FS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
11521 +#define FS_COW_FL 0x20000000 /* Copy on Write marker */
11523 +#define FS_FL_USER_VISIBLE 0x0103DFFF /* User visible flags */
11524 +#define FS_FL_USER_MODIFIABLE 0x010380FF /* User modifiable flags */
11526 #define SYNC_FILE_RANGE_WAIT_BEFORE 1
11527 #define SYNC_FILE_RANGE_WRITE 2
11528 diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/gfs2_ondisk.h linux-4.4/include/uapi/linux/gfs2_ondisk.h
11529 --- linux-4.4/include/uapi/linux/gfs2_ondisk.h 2016-01-11 00:01:32.000000000 +0100
11530 +++ linux-4.4/include/uapi/linux/gfs2_ondisk.h 2021-02-24 16:56:24.592822971 +0100
11531 @@ -225,6 +225,9 @@ enum {
11534 gfs2fl_TopLevel = 10,
11535 + gfs2fl_IXUnlink = 16,
11536 + gfs2fl_Barrier = 17,
11538 gfs2fl_TruncInProg = 29,
11539 gfs2fl_InheritDirectio = 30,
11540 gfs2fl_InheritJdata = 31,
11541 @@ -242,6 +245,9 @@ enum {
11542 #define GFS2_DIF_SYNC 0x00000100
11543 #define GFS2_DIF_SYSTEM 0x00000200 /* New in gfs2 */
11544 #define GFS2_DIF_TOPDIR 0x00000400 /* New in gfs2 */
11545 +#define GFS2_DIF_IXUNLINK 0x00010000
11546 +#define GFS2_DIF_BARRIER 0x00020000
11547 +#define GFS2_DIF_COW 0x00040000
11548 #define GFS2_DIF_TRUNC_IN_PROG 0x20000000 /* New in gfs2 */
11549 #define GFS2_DIF_INHERIT_DIRECTIO 0x40000000 /* only in gfs1 */
11550 #define GFS2_DIF_INHERIT_JDATA 0x80000000
11551 diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/if_tun.h linux-4.4/include/uapi/linux/if_tun.h
11552 --- linux-4.4/include/uapi/linux/if_tun.h 2016-01-11 00:01:32.000000000 +0100
11553 +++ linux-4.4/include/uapi/linux/if_tun.h 2021-02-24 16:56:24.592822971 +0100
11556 #define TUNSETVNETBE _IOW('T', 222, int)
11557 #define TUNGETVNETBE _IOR('T', 223, int)
11558 +#define TUNSETNID _IOW('T', 224, int)
11560 /* TUNSETIFF ifr flags */
11561 #define IFF_TUN 0x0001
11562 diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/major.h linux-4.4/include/uapi/linux/major.h
11563 --- linux-4.4/include/uapi/linux/major.h 2016-01-11 00:01:32.000000000 +0100
11564 +++ linux-4.4/include/uapi/linux/major.h 2021-02-24 16:56:24.592822971 +0100
11566 #define HD_MAJOR IDE0_MAJOR
11567 #define PTY_SLAVE_MAJOR 3
11568 #define TTY_MAJOR 4
11569 +#define VROOT_MAJOR 4
11570 #define TTYAUX_MAJOR 5
11572 #define VCS_MAJOR 7
11573 diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/nfs_mount.h linux-4.4/include/uapi/linux/nfs_mount.h
11574 --- linux-4.4/include/uapi/linux/nfs_mount.h 2016-01-11 00:01:32.000000000 +0100
11575 +++ linux-4.4/include/uapi/linux/nfs_mount.h 2021-02-24 16:56:24.592822971 +0100
11576 @@ -63,7 +63,8 @@ struct nfs_mount_data {
11577 #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 non-text parsed mount data only */
11578 #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */
11579 #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
11580 -#define NFS_MOUNT_FLAGMASK 0xFFFF
11581 +#define NFS_MOUNT_TAGGED 0x10000 /* context tagging */
11582 +#define NFS_MOUNT_FLAGMASK 0x1FFFF
11584 /* The following are for internal use only */
11585 #define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
11586 diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/reboot.h linux-4.4/include/uapi/linux/reboot.h
11587 --- linux-4.4/include/uapi/linux/reboot.h 2016-01-11 00:01:32.000000000 +0100
11588 +++ linux-4.4/include/uapi/linux/reboot.h 2021-02-24 16:56:24.592822971 +0100
11590 #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
11591 #define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2
11592 #define LINUX_REBOOT_CMD_KEXEC 0x45584543
11594 +#define LINUX_REBOOT_CMD_OOM 0xDEADBEEF
11597 #endif /* _UAPI_LINUX_REBOOT_H */
11598 diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/sysctl.h linux-4.4/include/uapi/linux/sysctl.h
11599 --- linux-4.4/include/uapi/linux/sysctl.h 2021-02-24 16:56:11.925757384 +0100
11600 +++ linux-4.4/include/uapi/linux/sysctl.h 2021-02-24 16:56:24.592822971 +0100
11601 @@ -60,6 +60,7 @@ enum
11602 CTL_ABI=9, /* Binary emulation */
11603 CTL_CPU=10, /* CPU stuff (speed scaling, etc) */
11604 CTL_ARLAN=254, /* arlan wireless driver */
11605 + CTL_VSERVER=4242, /* Linux-VServer debug */
11606 CTL_S390DBF=5677, /* s390 debug */
11607 CTL_SUNRPC=7249, /* sunrpc debug */
11608 CTL_PM=9899, /* frv power management */
11609 @@ -94,6 +95,7 @@ enum
11611 KERN_PANIC=15, /* int: panic timeout */
11612 KERN_REALROOTDEV=16, /* real root device to mount after initrd */
11613 + KERN_VSHELPER=17, /* string: path to vshelper policy agent */
11615 KERN_SPARC_REBOOT=21, /* reboot command on Sparc */
11616 KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */
11617 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/Kbuild linux-4.4/include/uapi/vserver/Kbuild
11618 --- linux-4.4/include/uapi/vserver/Kbuild 1970-01-01 01:00:00.000000000 +0100
11619 +++ linux-4.4/include/uapi/vserver/Kbuild 2021-02-24 16:56:24.596156409 +0100
11622 +header-y += context_cmd.h network_cmd.h space_cmd.h \
11623 + cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \
11624 + inode_cmd.h tag_cmd.h sched_cmd.h signal_cmd.h \
11625 + debug_cmd.h device_cmd.h
11627 +header-y += switch.h context.h network.h monitor.h \
11628 + limit.h inode.h device.h
11630 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/cacct_cmd.h linux-4.4/include/uapi/vserver/cacct_cmd.h
11631 --- linux-4.4/include/uapi/vserver/cacct_cmd.h 1970-01-01 01:00:00.000000000 +0100
11632 +++ linux-4.4/include/uapi/vserver/cacct_cmd.h 2021-02-24 16:56:24.592822971 +0100
11634 +#ifndef _UAPI_VS_CACCT_CMD_H
11635 +#define _UAPI_VS_CACCT_CMD_H
11638 +/* virtual host info name commands */
11640 +#define VCMD_sock_stat VC_CMD(VSTAT, 5, 0)
11642 +struct vcmd_sock_stat_v0 {
11644 + uint32_t count[3];
11645 + uint64_t total[3];
11648 +#endif /* _UAPI_VS_CACCT_CMD_H */
11649 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/context.h linux-4.4/include/uapi/vserver/context.h
11650 --- linux-4.4/include/uapi/vserver/context.h 1970-01-01 01:00:00.000000000 +0100
11651 +++ linux-4.4/include/uapi/vserver/context.h 2021-02-24 16:56:24.592822971 +0100
11653 +#ifndef _UAPI_VS_CONTEXT_H
11654 +#define _UAPI_VS_CONTEXT_H
11656 +#include <linux/types.h>
11657 +#include <linux/capability.h>
11660 +/* context flags */
11662 +#define VXF_INFO_SCHED 0x00000002
11663 +#define VXF_INFO_NPROC 0x00000004
11664 +#define VXF_INFO_PRIVATE 0x00000008
11666 +#define VXF_INFO_INIT 0x00000010
11667 +#define VXF_INFO_HIDE 0x00000020
11668 +#define VXF_INFO_ULIMIT 0x00000040
11669 +#define VXF_INFO_NSPACE 0x00000080
11671 +#define VXF_SCHED_HARD 0x00000100
11672 +#define VXF_SCHED_PRIO 0x00000200
11673 +#define VXF_SCHED_PAUSE 0x00000400
11675 +#define VXF_VIRT_MEM 0x00010000
11676 +#define VXF_VIRT_UPTIME 0x00020000
11677 +#define VXF_VIRT_CPU 0x00040000
11678 +#define VXF_VIRT_LOAD 0x00080000
11679 +#define VXF_VIRT_TIME 0x00100000
11681 +#define VXF_HIDE_MOUNT 0x01000000
11682 +/* was VXF_HIDE_NETIF 0x02000000 */
11683 +#define VXF_HIDE_VINFO 0x04000000
11685 +#define VXF_STATE_SETUP (1ULL << 32)
11686 +#define VXF_STATE_INIT (1ULL << 33)
11687 +#define VXF_STATE_ADMIN (1ULL << 34)
11689 +#define VXF_SC_HELPER (1ULL << 36)
11690 +#define VXF_REBOOT_KILL (1ULL << 37)
11691 +#define VXF_PERSISTENT (1ULL << 38)
11693 +#define VXF_FORK_RSS (1ULL << 48)
11694 +#define VXF_PROLIFIC (1ULL << 49)
11696 +#define VXF_IGNEG_NICE (1ULL << 52)
11698 +#define VXF_ONE_TIME (0x0007ULL << 32)
11700 +#define VXF_INIT_SET (VXF_STATE_SETUP | VXF_STATE_INIT | VXF_STATE_ADMIN)
11703 +/* context migration */
11705 +#define VXM_SET_INIT 0x00000001
11706 +#define VXM_SET_REAPER 0x00000002
11708 +/* context caps */
11710 +#define VXC_SET_UTSNAME 0x00000001
11711 +#define VXC_SET_RLIMIT 0x00000002
11712 +#define VXC_FS_SECURITY 0x00000004
11713 +#define VXC_FS_TRUSTED 0x00000008
11714 +#define VXC_TIOCSTI 0x00000010
11716 +/* was VXC_RAW_ICMP 0x00000100 */
11717 +#define VXC_SYSLOG 0x00001000
11718 +#define VXC_OOM_ADJUST 0x00002000
11719 +#define VXC_AUDIT_CONTROL 0x00004000
11721 +#define VXC_SECURE_MOUNT 0x00010000
11722 +/* #define VXC_SECURE_REMOUNT 0x00020000 */
11723 +#define VXC_BINARY_MOUNT 0x00040000
11724 +#define VXC_DEV_MOUNT 0x00080000
11726 +#define VXC_QUOTA_CTL 0x00100000
11727 +#define VXC_ADMIN_MAPPER 0x00200000
11728 +#define VXC_ADMIN_CLOOP 0x00400000
11730 +#define VXC_KTHREAD 0x01000000
11731 +#define VXC_NAMESPACE 0x02000000
11733 +#endif /* _UAPI_VS_CONTEXT_H */
11734 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/context_cmd.h linux-4.4/include/uapi/vserver/context_cmd.h
11735 --- linux-4.4/include/uapi/vserver/context_cmd.h 1970-01-01 01:00:00.000000000 +0100
11736 +++ linux-4.4/include/uapi/vserver/context_cmd.h 2021-02-24 16:56:24.592822971 +0100
11738 +#ifndef _UAPI_VS_CONTEXT_CMD_H
11739 +#define _UAPI_VS_CONTEXT_CMD_H
11742 +/* vinfo commands */
11744 +#define VCMD_task_xid VC_CMD(VINFO, 1, 0)
11747 +#define VCMD_vx_info VC_CMD(VINFO, 5, 0)
11749 +struct vcmd_vx_info_v0 {
11751 + uint32_t initpid;
11752 + /* more to come */
11756 +#define VCMD_ctx_stat VC_CMD(VSTAT, 0, 0)
11758 +struct vcmd_ctx_stat_v0 {
11761 + /* more to come */
11765 +/* context commands */
11767 +#define VCMD_ctx_create_v0 VC_CMD(VPROC, 1, 0)
11768 +#define VCMD_ctx_create VC_CMD(VPROC, 1, 1)
11770 +struct vcmd_ctx_create {
11771 + uint64_t flagword;
11774 +#define VCMD_ctx_migrate_v0 VC_CMD(PROCMIG, 1, 0)
11775 +#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 1)
11777 +struct vcmd_ctx_migrate {
11778 + uint64_t flagword;
11783 +/* flag commands */
11785 +#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0)
11786 +#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0)
11788 +struct vcmd_ctx_flags_v0 {
11789 + uint64_t flagword;
11795 +/* context caps commands */
11797 +#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 1)
11798 +#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 1)
11800 +struct vcmd_ctx_caps_v1 {
11807 +/* bcaps commands */
11809 +#define VCMD_get_bcaps VC_CMD(FLAGS, 9, 0)
11810 +#define VCMD_set_bcaps VC_CMD(FLAGS, 10, 0)
11812 +struct vcmd_bcaps {
11819 +/* umask commands */
11821 +#define VCMD_get_umask VC_CMD(FLAGS, 13, 0)
11822 +#define VCMD_set_umask VC_CMD(FLAGS, 14, 0)
11824 +struct vcmd_umask {
11831 +/* wmask commands */
11833 +#define VCMD_get_wmask VC_CMD(FLAGS, 15, 0)
11834 +#define VCMD_set_wmask VC_CMD(FLAGS, 16, 0)
11836 +struct vcmd_wmask {
11845 +#define VCMD_get_badness VC_CMD(MEMCTRL, 5, 0)
11846 +#define VCMD_set_badness VC_CMD(MEMCTRL, 6, 0)
11848 +struct vcmd_badness_v0 {
11852 +#endif /* _UAPI_VS_CONTEXT_CMD_H */
11853 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/cvirt_cmd.h linux-4.4/include/uapi/vserver/cvirt_cmd.h
11854 --- linux-4.4/include/uapi/vserver/cvirt_cmd.h 1970-01-01 01:00:00.000000000 +0100
11855 +++ linux-4.4/include/uapi/vserver/cvirt_cmd.h 2021-02-24 16:56:24.592822971 +0100
11857 +#ifndef _UAPI_VS_CVIRT_CMD_H
11858 +#define _UAPI_VS_CVIRT_CMD_H
11861 +/* virtual host info name commands */
11863 +#define VCMD_set_vhi_name VC_CMD(VHOST, 1, 0)
11864 +#define VCMD_get_vhi_name VC_CMD(VHOST, 2, 0)
11866 +struct vcmd_vhi_name_v0 {
11872 +enum vhi_name_field {
11873 + VHIN_CONTEXT = 0,
11884 +#define VCMD_virt_stat VC_CMD(VSTAT, 3, 0)
11886 +struct vcmd_virt_stat_v0 {
11889 + uint32_t nr_threads;
11890 + uint32_t nr_running;
11891 + uint32_t nr_uninterruptible;
11892 + uint32_t nr_onhold;
11893 + uint32_t nr_forks;
11894 + uint32_t load[3];
11897 +#endif /* _UAPI_VS_CVIRT_CMD_H */
11898 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/debug_cmd.h linux-4.4/include/uapi/vserver/debug_cmd.h
11899 --- linux-4.4/include/uapi/vserver/debug_cmd.h 1970-01-01 01:00:00.000000000 +0100
11900 +++ linux-4.4/include/uapi/vserver/debug_cmd.h 2021-02-24 16:56:24.592822971 +0100
11902 +#ifndef _UAPI_VS_DEBUG_CMD_H
11903 +#define _UAPI_VS_DEBUG_CMD_H
11906 +/* debug commands */
11908 +#define VCMD_dump_history VC_CMD(DEBUG, 1, 0)
11910 +#define VCMD_read_history VC_CMD(DEBUG, 5, 0)
11911 +#define VCMD_read_monitor VC_CMD(DEBUG, 6, 0)
11913 +struct vcmd_read_history_v0 {
11916 + char __user *data;
11919 +struct vcmd_read_monitor_v0 {
11922 + char __user *data;
11925 +#endif /* _UAPI_VS_DEBUG_CMD_H */
11926 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/device.h linux-4.4/include/uapi/vserver/device.h
11927 --- linux-4.4/include/uapi/vserver/device.h 1970-01-01 01:00:00.000000000 +0100
11928 +++ linux-4.4/include/uapi/vserver/device.h 2021-02-24 16:56:24.592822971 +0100
11930 +#ifndef _UAPI_VS_DEVICE_H
11931 +#define _UAPI_VS_DEVICE_H
11934 +#define DATTR_CREATE 0x00000001
11935 +#define DATTR_OPEN 0x00000002
11937 +#define DATTR_REMAP 0x00000010
11939 +#define DATTR_MASK 0x00000013
11941 +#endif /* _UAPI_VS_DEVICE_H */
11942 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/device_cmd.h linux-4.4/include/uapi/vserver/device_cmd.h
11943 --- linux-4.4/include/uapi/vserver/device_cmd.h 1970-01-01 01:00:00.000000000 +0100
11944 +++ linux-4.4/include/uapi/vserver/device_cmd.h 2021-02-24 16:56:24.592822971 +0100
11946 +#ifndef _UAPI_VS_DEVICE_CMD_H
11947 +#define _UAPI_VS_DEVICE_CMD_H
11950 +/* device vserver commands */
11952 +#define VCMD_set_mapping VC_CMD(DEVICE, 1, 0)
11953 +#define VCMD_unset_mapping VC_CMD(DEVICE, 2, 0)
11955 +struct vcmd_set_mapping_v0 {
11956 + const char __user *device;
11957 + const char __user *target;
11961 +#endif /* _UAPI_VS_DEVICE_CMD_H */
11962 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/dlimit_cmd.h linux-4.4/include/uapi/vserver/dlimit_cmd.h
11963 --- linux-4.4/include/uapi/vserver/dlimit_cmd.h 1970-01-01 01:00:00.000000000 +0100
11964 +++ linux-4.4/include/uapi/vserver/dlimit_cmd.h 2021-02-24 16:56:24.592822971 +0100
11966 +#ifndef _UAPI_VS_DLIMIT_CMD_H
11967 +#define _UAPI_VS_DLIMIT_CMD_H
11970 +/* dlimit vserver commands */
11972 +#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0)
11973 +#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0)
11975 +#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0)
11976 +#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0)
11978 +struct vcmd_ctx_dlimit_base_v0 {
11979 + const char __user *name;
11983 +struct vcmd_ctx_dlimit_v0 {
11984 + const char __user *name;
11985 + uint32_t space_used; /* used space in kbytes */
11986 + uint32_t space_total; /* maximum space in kbytes */
11987 + uint32_t inodes_used; /* used inodes */
11988 + uint32_t inodes_total; /* maximum inodes */
11989 + uint32_t reserved; /* reserved for root in % */
11993 +#define CDLIM_UNSET ((uint32_t)0UL)
11994 +#define CDLIM_INFINITY ((uint32_t)~0UL)
11995 +#define CDLIM_KEEP ((uint32_t)~1UL)
11997 +#define DLIME_UNIT 0
11998 +#define DLIME_KILO 1
11999 +#define DLIME_MEGA 2
12000 +#define DLIME_GIGA 3
12002 +#define DLIMF_SHIFT 0x10
12004 +#define DLIMS_USED 0
12005 +#define DLIMS_TOTAL 2
12008 +uint64_t dlimit_space_32to64(uint32_t val, uint32_t flags, int shift)
12010 + int exp = (flags & DLIMF_SHIFT) ?
12011 + (flags >> shift) & DLIME_GIGA : DLIME_KILO;
12012 + return ((uint64_t)val) << (10 * exp);
12016 +uint32_t dlimit_space_64to32(uint64_t val, uint32_t *flags, int shift)
12020 + if (*flags & DLIMF_SHIFT) {
12021 + while (val > (1LL << 32) && (exp < 3)) {
12025 + *flags &= ~(DLIME_GIGA << shift);
12026 + *flags |= exp << shift;
12032 +#endif /* _UAPI_VS_DLIMIT_CMD_H */
12033 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/inode.h linux-4.4/include/uapi/vserver/inode.h
12034 --- linux-4.4/include/uapi/vserver/inode.h 1970-01-01 01:00:00.000000000 +0100
12035 +++ linux-4.4/include/uapi/vserver/inode.h 2021-02-24 16:56:24.596156409 +0100
12037 +#ifndef _UAPI_VS_INODE_H
12038 +#define _UAPI_VS_INODE_H
12041 +#define IATTR_TAG 0x01000000
12043 +#define IATTR_ADMIN 0x00000001
12044 +#define IATTR_WATCH 0x00000002
12045 +#define IATTR_HIDE 0x00000004
12046 +#define IATTR_FLAGS 0x00000007
12048 +#define IATTR_BARRIER 0x00010000
12049 +#define IATTR_IXUNLINK 0x00020000
12050 +#define IATTR_IMMUTABLE 0x00040000
12051 +#define IATTR_COW 0x00080000
12054 +/* inode ioctls */
12056 +#define FIOC_GETXFLG _IOR('x', 5, long)
12057 +#define FIOC_SETXFLG _IOW('x', 6, long)
12059 +#endif /* _UAPI_VS_INODE_H */
12060 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/inode_cmd.h linux-4.4/include/uapi/vserver/inode_cmd.h
12061 --- linux-4.4/include/uapi/vserver/inode_cmd.h 1970-01-01 01:00:00.000000000 +0100
12062 +++ linux-4.4/include/uapi/vserver/inode_cmd.h 2021-02-24 16:56:24.596156409 +0100
12064 +#ifndef _UAPI_VS_INODE_CMD_H
12065 +#define _UAPI_VS_INODE_CMD_H
12068 +/* inode vserver commands */
12070 +#define VCMD_get_iattr VC_CMD(INODE, 1, 1)
12071 +#define VCMD_set_iattr VC_CMD(INODE, 2, 1)
12073 +#define VCMD_fget_iattr VC_CMD(INODE, 3, 0)
12074 +#define VCMD_fset_iattr VC_CMD(INODE, 4, 0)
12076 +struct vcmd_ctx_iattr_v1 {
12077 + const char __user *name;
12083 +struct vcmd_ctx_fiattr_v0 {
12089 +#endif /* _UAPI_VS_INODE_CMD_H */
12090 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/limit.h linux-4.4/include/uapi/vserver/limit.h
12091 --- linux-4.4/include/uapi/vserver/limit.h 1970-01-01 01:00:00.000000000 +0100
12092 +++ linux-4.4/include/uapi/vserver/limit.h 2021-02-24 16:56:24.596156409 +0100
12094 +#ifndef _UAPI_VS_LIMIT_H
12095 +#define _UAPI_VS_LIMIT_H
12098 +#define VLIMIT_NSOCK 16
12099 +#define VLIMIT_OPENFD 17
12100 +#define VLIMIT_ANON 18
12101 +#define VLIMIT_SHMEM 19
12102 +#define VLIMIT_SEMARY 20
12103 +#define VLIMIT_NSEMS 21
12104 +#define VLIMIT_DENTRY 22
12105 +#define VLIMIT_MAPPED 23
12107 +#endif /* _UAPI_VS_LIMIT_H */
12108 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/limit_cmd.h linux-4.4/include/uapi/vserver/limit_cmd.h
12109 --- linux-4.4/include/uapi/vserver/limit_cmd.h 1970-01-01 01:00:00.000000000 +0100
12110 +++ linux-4.4/include/uapi/vserver/limit_cmd.h 2021-02-24 16:56:24.596156409 +0100
12112 +#ifndef _UAPI_VS_LIMIT_CMD_H
12113 +#define _UAPI_VS_LIMIT_CMD_H
12116 +/* rlimit vserver commands */
12118 +#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0)
12119 +#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0)
12120 +#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0)
12121 +#define VCMD_reset_hits VC_CMD(RLIMIT, 7, 0)
12122 +#define VCMD_reset_minmax VC_CMD(RLIMIT, 9, 0)
12124 +struct vcmd_ctx_rlimit_v0 {
12126 + uint64_t minimum;
12127 + uint64_t softlimit;
12128 + uint64_t maximum;
12131 +struct vcmd_ctx_rlimit_mask_v0 {
12132 + uint32_t minimum;
12133 + uint32_t softlimit;
12134 + uint32_t maximum;
12137 +#define VCMD_rlimit_stat VC_CMD(VSTAT, 1, 0)
12139 +struct vcmd_rlimit_stat_v0 {
12143 + uint64_t minimum;
12144 + uint64_t maximum;
12147 +#define CRLIM_UNSET (0ULL)
12148 +#define CRLIM_INFINITY (~0ULL)
12149 +#define CRLIM_KEEP (~1ULL)
12151 +#endif /* _UAPI_VS_LIMIT_CMD_H */
12152 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/monitor.h linux-4.4/include/uapi/vserver/monitor.h
12153 --- linux-4.4/include/uapi/vserver/monitor.h 1970-01-01 01:00:00.000000000 +0100
12154 +++ linux-4.4/include/uapi/vserver/monitor.h 2021-02-24 16:56:24.596156409 +0100
12156 +#ifndef _UAPI_VS_MONITOR_H
12157 +#define _UAPI_VS_MONITOR_H
12159 +#include <linux/types.h>
12167 + VXM_UPDATE = 0x20,
12171 + VXM_RQINFO_1 = 0x24,
12174 + VXM_ACTIVATE = 0x40,
12181 + VXM_MIGRATE = 0x48,
12184 + /* all other bits are flags */
12185 + VXM_SCHED = 0x80,
12188 +struct _vxm_update_1 {
12189 + uint32_t tokens_max;
12190 + uint32_t fill_rate;
12191 + uint32_t interval;
12194 +struct _vxm_update_2 {
12195 + uint32_t tokens_min;
12196 + uint32_t fill_rate;
12197 + uint32_t interval;
12200 +struct _vxm_rqinfo_1 {
12201 + uint16_t running;
12205 + uint32_t idle_tokens;
12208 +struct _vxm_rqinfo_2 {
12209 + uint32_t norm_time;
12210 + uint32_t idle_time;
12211 + uint32_t idle_skip;
12214 +struct _vxm_sched {
12216 + uint32_t norm_time;
12217 + uint32_t idle_time;
12220 +struct _vxm_task {
12225 +struct _vxm_event {
12234 + struct _vxm_task tsk;
12238 +struct _vx_mon_entry {
12242 + struct _vxm_event ev;
12243 + struct _vxm_sched sd;
12244 + struct _vxm_update_1 u1;
12245 + struct _vxm_update_2 u2;
12246 + struct _vxm_rqinfo_1 q1;
12247 + struct _vxm_rqinfo_2 q2;
12251 +#endif /* _UAPI_VS_MONITOR_H */
12252 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/network.h linux-4.4/include/uapi/vserver/network.h
12253 --- linux-4.4/include/uapi/vserver/network.h 1970-01-01 01:00:00.000000000 +0100
12254 +++ linux-4.4/include/uapi/vserver/network.h 2021-02-24 16:56:24.596156409 +0100
12256 +#ifndef _UAPI_VS_NETWORK_H
12257 +#define _UAPI_VS_NETWORK_H
12259 +#include <linux/types.h>
12262 +#define MAX_N_CONTEXT 65535 /* Arbitrary limit */
12265 +/* network flags */
12267 +#define NXF_INFO_PRIVATE 0x00000008
12269 +#define NXF_SINGLE_IP 0x00000100
12270 +#define NXF_LBACK_REMAP 0x00000200
12271 +#define NXF_LBACK_ALLOW 0x00000400
12273 +#define NXF_HIDE_NETIF 0x02000000
12274 +#define NXF_HIDE_LBACK 0x04000000
12276 +#define NXF_STATE_SETUP (1ULL << 32)
12277 +#define NXF_STATE_ADMIN (1ULL << 34)
12279 +#define NXF_SC_HELPER (1ULL << 36)
12280 +#define NXF_PERSISTENT (1ULL << 38)
12282 +#define NXF_ONE_TIME (0x0005ULL << 32)
12285 +#define NXF_INIT_SET (__nxf_init_set())
12287 +static inline uint64_t __nxf_init_set(void) {
12288 + return NXF_STATE_ADMIN
12289 +#ifdef CONFIG_VSERVER_AUTO_LBACK
12290 + | NXF_LBACK_REMAP
12293 +#ifdef CONFIG_VSERVER_AUTO_SINGLE
12296 + | NXF_HIDE_NETIF;
12300 +/* network caps */
12302 +#define NXC_TUN_CREATE 0x00000001
12304 +#define NXC_RAW_ICMP 0x00000100
12306 +#define NXC_MULTICAST 0x00001000
12309 +/* address types */
12311 +#define NXA_TYPE_IPV4 0x0001
12312 +#define NXA_TYPE_IPV6 0x0002
12314 +#define NXA_TYPE_NONE 0x0000
12315 +#define NXA_TYPE_ANY 0x00FF
12317 +#define NXA_TYPE_ADDR 0x0010
12318 +#define NXA_TYPE_MASK 0x0020
12319 +#define NXA_TYPE_RANGE 0x0040
12321 +#define NXA_MASK_ALL (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
12323 +#define NXA_MOD_BCAST 0x0100
12324 +#define NXA_MOD_LBACK 0x0200
12326 +#define NXA_LOOPBACK 0x1000
12328 +#define NXA_MASK_BIND (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
12329 +#define NXA_MASK_SHOW (NXA_MASK_ALL | NXA_LOOPBACK)
12331 +#endif /* _UAPI_VS_NETWORK_H */
12332 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/network_cmd.h linux-4.4/include/uapi/vserver/network_cmd.h
12333 --- linux-4.4/include/uapi/vserver/network_cmd.h 1970-01-01 01:00:00.000000000 +0100
12334 +++ linux-4.4/include/uapi/vserver/network_cmd.h 2021-02-24 16:56:24.596156409 +0100
12336 +#ifndef _UAPI_VS_NETWORK_CMD_H
12337 +#define _UAPI_VS_NETWORK_CMD_H
12340 +/* vinfo commands */
12342 +#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
12345 +#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
12347 +struct vcmd_nx_info_v0 {
12349 + /* more to come */
12353 +#include <linux/in.h>
12354 +#include <linux/in6.h>
12356 +#define VCMD_net_create_v0 VC_CMD(VNET, 1, 0)
12357 +#define VCMD_net_create VC_CMD(VNET, 1, 1)
12359 +struct vcmd_net_create {
12360 + uint64_t flagword;
12363 +#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0)
12365 +#define VCMD_net_add VC_CMD(NETALT, 1, 0)
12366 +#define VCMD_net_remove VC_CMD(NETALT, 2, 0)
12368 +struct vcmd_net_addr_v0 {
12371 + struct in_addr ip[4];
12372 + struct in_addr mask[4];
12375 +#define VCMD_net_add_ipv4_v1 VC_CMD(NETALT, 1, 1)
12376 +#define VCMD_net_rem_ipv4_v1 VC_CMD(NETALT, 2, 1)
12378 +struct vcmd_net_addr_ipv4_v1 {
12381 + struct in_addr ip;
12382 + struct in_addr mask;
12385 +#define VCMD_net_add_ipv4 VC_CMD(NETALT, 1, 2)
12386 +#define VCMD_net_rem_ipv4 VC_CMD(NETALT, 2, 2)
12388 +struct vcmd_net_addr_ipv4_v2 {
12391 + struct in_addr ip;
12392 + struct in_addr ip2;
12393 + struct in_addr mask;
12396 +#define VCMD_net_add_ipv6 VC_CMD(NETALT, 3, 1)
12397 +#define VCMD_net_remove_ipv6 VC_CMD(NETALT, 4, 1)
12399 +struct vcmd_net_addr_ipv6_v1 {
12403 + struct in6_addr ip;
12404 + struct in6_addr mask;
12407 +#define VCMD_add_match_ipv4 VC_CMD(NETALT, 5, 0)
12408 +#define VCMD_get_match_ipv4 VC_CMD(NETALT, 6, 0)
12410 +struct vcmd_match_ipv4_v0 {
12415 + struct in_addr ip;
12416 + struct in_addr ip2;
12417 + struct in_addr mask;
12420 +#define VCMD_add_match_ipv6 VC_CMD(NETALT, 7, 0)
12421 +#define VCMD_get_match_ipv6 VC_CMD(NETALT, 8, 0)
12423 +struct vcmd_match_ipv6_v0 {
12428 + struct in6_addr ip;
12429 + struct in6_addr ip2;
12430 + struct in6_addr mask;
12436 +/* flag commands */
12438 +#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0)
12439 +#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0)
12441 +struct vcmd_net_flags_v0 {
12442 + uint64_t flagword;
12448 +/* network caps commands */
12450 +#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0)
12451 +#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0)
12453 +struct vcmd_net_caps_v0 {
12458 +#endif /* _UAPI_VS_NETWORK_CMD_H */
12459 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/sched_cmd.h linux-4.4/include/uapi/vserver/sched_cmd.h
12460 --- linux-4.4/include/uapi/vserver/sched_cmd.h 1970-01-01 01:00:00.000000000 +0100
12461 +++ linux-4.4/include/uapi/vserver/sched_cmd.h 2021-02-24 16:56:24.596156409 +0100
12463 +#ifndef _UAPI_VS_SCHED_CMD_H
12464 +#define _UAPI_VS_SCHED_CMD_H
12467 +struct vcmd_prio_bias {
12469 + int32_t prio_bias;
12472 +#define VCMD_set_prio_bias VC_CMD(SCHED, 4, 0)
12473 +#define VCMD_get_prio_bias VC_CMD(SCHED, 5, 0)
12475 +#endif /* _UAPI_VS_SCHED_CMD_H */
12476 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/signal_cmd.h linux-4.4/include/uapi/vserver/signal_cmd.h
12477 --- linux-4.4/include/uapi/vserver/signal_cmd.h 1970-01-01 01:00:00.000000000 +0100
12478 +++ linux-4.4/include/uapi/vserver/signal_cmd.h 2021-02-24 16:56:24.596156409 +0100
12480 +#ifndef _UAPI_VS_SIGNAL_CMD_H
12481 +#define _UAPI_VS_SIGNAL_CMD_H
12484 +/* signalling vserver commands */
12486 +#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0)
12487 +#define VCMD_wait_exit VC_CMD(EVENT, 99, 0)
12489 +struct vcmd_ctx_kill_v0 {
12494 +struct vcmd_wait_exit_v0 {
12495 + int32_t reboot_cmd;
12496 + int32_t exit_code;
12500 +/* process alteration commands */
12502 +#define VCMD_get_pflags VC_CMD(PROCALT, 5, 0)
12503 +#define VCMD_set_pflags VC_CMD(PROCALT, 6, 0)
12505 +struct vcmd_pflags_v0 {
12506 + uint32_t flagword;
12510 +#endif /* _UAPI_VS_SIGNAL_CMD_H */
12511 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/space_cmd.h linux-4.4/include/uapi/vserver/space_cmd.h
12512 --- linux-4.4/include/uapi/vserver/space_cmd.h 1970-01-01 01:00:00.000000000 +0100
12513 +++ linux-4.4/include/uapi/vserver/space_cmd.h 2021-02-24 16:56:24.596156409 +0100
12515 +#ifndef _UAPI_VS_SPACE_CMD_H
12516 +#define _UAPI_VS_SPACE_CMD_H
12519 +#define VCMD_enter_space_v0 VC_CMD(PROCALT, 1, 0)
12520 +#define VCMD_enter_space_v1 VC_CMD(PROCALT, 1, 1)
12521 +#define VCMD_enter_space VC_CMD(PROCALT, 1, 2)
12523 +#define VCMD_set_space_v0 VC_CMD(PROCALT, 3, 0)
12524 +#define VCMD_set_space_v1 VC_CMD(PROCALT, 3, 1)
12525 +#define VCMD_set_space VC_CMD(PROCALT, 3, 2)
12527 +#define VCMD_get_space_mask_v0 VC_CMD(PROCALT, 4, 0)
12529 +#define VCMD_get_space_mask VC_CMD(VSPACE, 0, 1)
12530 +#define VCMD_get_space_default VC_CMD(VSPACE, 1, 0)
12533 +struct vcmd_space_mask_v1 {
12537 +struct vcmd_space_mask_v2 {
12542 +#endif /* _UAPI_VS_SPACE_CMD_H */
12543 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/switch.h linux-4.4/include/uapi/vserver/switch.h
12544 --- linux-4.4/include/uapi/vserver/switch.h 1970-01-01 01:00:00.000000000 +0100
12545 +++ linux-4.4/include/uapi/vserver/switch.h 2021-02-24 16:56:24.596156409 +0100
12547 +#ifndef _UAPI_VS_SWITCH_H
12548 +#define _UAPI_VS_SWITCH_H
12550 +#include <linux/types.h>
12553 +#define VC_CATEGORY(c) (((c) >> 24) & 0x3F)
12554 +#define VC_COMMAND(c) (((c) >> 16) & 0xFF)
12555 +#define VC_VERSION(c) ((c) & 0xFFF)
12557 +#define VC_CMD(c, i, v) ((((VC_CAT_ ## c) & 0x3F) << 24) \
12558 + | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
12562 + Syscall Matrix V2.8
12564 + |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
12565 + |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | |
12566 + |INFO |SETUP | |MOVE | | | | | |
12567 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12568 + SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICE | |
12569 + HOST | 00| 01| 02| 03| 04| 05| | 06| 07|
12570 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12571 + CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | |
12572 + PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15|
12573 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12574 + MEMORY | | | | |MEMCTRL| | |SWAP | |
12575 + | 16| 17| 18| 19| 20| 21| | 22| 23|
12576 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12577 + NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | |
12578 + | 24| 25| 26| 27| 28| 29| | 30| 31|
12579 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12580 + DISK | | | |TAGMIG |DLIMIT | | |INODE | |
12581 + VFS | 32| 33| 34| 35| 36| 37| | 38| 39|
12582 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12583 + OTHER |VSTAT | | | | | | |VINFO | |
12584 + | 40| 41| 42| 43| 44| 45| | 46| 47|
12585 + =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
12586 + SPECIAL|EVENT | | | |FLAGS | | |VSPACE | |
12587 + | 48| 49| 50| 51| 52| 53| | 54| 55|
12588 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12589 + SPECIAL|DEBUG | | | |RLIMIT |SYSCALL| | |COMPAT |
12590 + | 56| 57| 58| 59| 60|TEST 61| | 62| 63|
12591 + -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12595 +#define VC_CAT_VERSION 0
12597 +#define VC_CAT_VSETUP 1
12598 +#define VC_CAT_VHOST 2
12600 +#define VC_CAT_DEVICE 6
12602 +#define VC_CAT_VPROC 9
12603 +#define VC_CAT_PROCALT 10
12604 +#define VC_CAT_PROCMIG 11
12605 +#define VC_CAT_PROCTRL 12
12607 +#define VC_CAT_SCHED 14
12608 +#define VC_CAT_MEMCTRL 20
12610 +#define VC_CAT_VNET 25
12611 +#define VC_CAT_NETALT 26
12612 +#define VC_CAT_NETMIG 27
12613 +#define VC_CAT_NETCTRL 28
12615 +#define VC_CAT_TAGMIG 35
12616 +#define VC_CAT_DLIMIT 36
12617 +#define VC_CAT_INODE 38
12619 +#define VC_CAT_VSTAT 40
12620 +#define VC_CAT_VINFO 46
12621 +#define VC_CAT_EVENT 48
12623 +#define VC_CAT_FLAGS 52
12624 +#define VC_CAT_VSPACE 54
12625 +#define VC_CAT_DEBUG 56
12626 +#define VC_CAT_RLIMIT 60
12628 +#define VC_CAT_SYSTEST 61
12629 +#define VC_CAT_COMPAT 63
12631 +/* query version */
12633 +#define VCMD_get_version VC_CMD(VERSION, 0, 0)
12634 +#define VCMD_get_vci VC_CMD(VERSION, 1, 0)
12636 +#endif /* _UAPI_VS_SWITCH_H */
12637 diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/tag_cmd.h linux-4.4/include/uapi/vserver/tag_cmd.h
12638 --- linux-4.4/include/uapi/vserver/tag_cmd.h 1970-01-01 01:00:00.000000000 +0100
12639 +++ linux-4.4/include/uapi/vserver/tag_cmd.h 2021-02-24 16:56:24.596156409 +0100
12641 +#ifndef _UAPI_VS_TAG_CMD_H
12642 +#define _UAPI_VS_TAG_CMD_H
12645 +/* vinfo commands */
12647 +#define VCMD_task_tag VC_CMD(VINFO, 3, 0)
12650 +/* context commands */
12652 +#define VCMD_tag_migrate VC_CMD(TAGMIG, 1, 0)
12654 +#endif /* _UAPI_VS_TAG_CMD_H */
12655 diff -urNp -x '*.orig' linux-4.4/init/Kconfig linux-4.4/init/Kconfig
12656 --- linux-4.4/init/Kconfig 2021-02-24 16:56:11.929090822 +0100
12657 +++ linux-4.4/init/Kconfig 2021-02-24 16:56:24.596156409 +0100
12658 @@ -938,6 +938,7 @@ config NUMA_BALANCING_DEFAULT_ENABLED
12660 bool "Control Group support"
12664 This option adds support for grouping sets of processes together, for
12665 use with process control subsystems such as Cpusets, CFS, memory
12666 diff -urNp -x '*.orig' linux-4.4/init/main.c linux-4.4/init/main.c
12667 --- linux-4.4/init/main.c 2021-02-24 16:56:11.929090822 +0100
12668 +++ linux-4.4/init/main.c 2021-02-24 16:56:24.596156409 +0100
12670 #include <linux/proc_ns.h>
12671 #include <linux/io.h>
12672 #include <linux/kaiser.h>
12673 +#include <linux/vserver/percpu.h>
12675 #include <asm/io.h>
12676 #include <asm/bugs.h>
12677 diff -urNp -x '*.orig' linux-4.4/ipc/mqueue.c linux-4.4/ipc/mqueue.c
12678 --- linux-4.4/ipc/mqueue.c 2021-02-24 16:56:11.932424260 +0100
12679 +++ linux-4.4/ipc/mqueue.c 2021-02-24 16:56:24.596156409 +0100
12681 #include <linux/ipc_namespace.h>
12682 #include <linux/user_namespace.h>
12683 #include <linux/slab.h>
12684 +#include <linux/vs_context.h>
12685 +#include <linux/vs_limit.h>
12687 #include <net/sock.h>
12689 @@ -75,6 +77,7 @@ struct mqueue_inode_info {
12690 struct pid *notify_owner;
12691 struct user_namespace *notify_user_ns;
12692 struct user_struct *user; /* user who created, for accounting */
12693 + struct vx_info *vxi;
12694 struct sock *notify_sock;
12695 struct sk_buff *notify_cookie;
12697 @@ -230,6 +233,7 @@ static struct inode *mqueue_get_inode(st
12698 if (S_ISREG(mode)) {
12699 struct mqueue_inode_info *info;
12700 unsigned long mq_bytes, mq_treesize;
12701 + struct vx_info *vxi = current_vx_info();
12703 inode->i_fop = &mqueue_file_operations;
12704 inode->i_size = FILENT_SIZE;
12705 @@ -243,6 +247,7 @@ static struct inode *mqueue_get_inode(st
12706 info->notify_user_ns = NULL;
12708 info->user = NULL; /* set when all is ok */
12709 + info->vxi = NULL;
12710 info->msg_tree = RB_ROOT;
12711 info->node_cache = NULL;
12712 memset(&info->attr, 0, sizeof(info->attr));
12713 @@ -276,17 +281,20 @@ static struct inode *mqueue_get_inode(st
12715 spin_lock(&mq_lock);
12716 if (u->mq_bytes + mq_bytes < u->mq_bytes ||
12717 - u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
12718 + u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE) ||
12719 + !vx_ipcmsg_avail(vxi, mq_bytes)) {
12720 spin_unlock(&mq_lock);
12721 /* mqueue_evict_inode() releases info->messages */
12725 u->mq_bytes += mq_bytes;
12726 + vx_ipcmsg_add(vxi, u, mq_bytes);
12727 spin_unlock(&mq_lock);
12730 info->user = get_uid(u);
12731 + info->vxi = get_vx_info(vxi);
12732 } else if (S_ISDIR(mode)) {
12734 /* Some things misbehave if size == 0 on a directory */
12735 @@ -395,6 +403,7 @@ static void mqueue_evict_inode(struct in
12739 + struct vx_info *vxi = info->vxi;
12740 unsigned long mq_bytes, mq_treesize;
12742 /* Total amount of bytes accounted for the mqueue */
12743 @@ -407,6 +416,7 @@ static void mqueue_evict_inode(struct in
12745 spin_lock(&mq_lock);
12746 user->mq_bytes -= mq_bytes;
12747 + vx_ipcmsg_sub(vxi, user, mq_bytes);
12749 * get_ns_from_inode() ensures that the
12750 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
12751 @@ -416,6 +426,7 @@ static void mqueue_evict_inode(struct in
12753 ipc_ns->mq_queues_count--;
12754 spin_unlock(&mq_lock);
12755 + put_vx_info(vxi);
12759 diff -urNp -x '*.orig' linux-4.4/ipc/msg.c linux-4.4/ipc/msg.c
12760 --- linux-4.4/ipc/msg.c 2021-02-24 16:56:11.932424260 +0100
12761 +++ linux-4.4/ipc/msg.c 2021-02-24 16:56:24.596156409 +0100
12763 #include <linux/rwsem.h>
12764 #include <linux/nsproxy.h>
12765 #include <linux/ipc_namespace.h>
12766 +#include <linux/vs_base.h>
12768 #include <asm/current.h>
12769 #include <linux/uaccess.h>
12770 @@ -129,6 +130,7 @@ static int newque(struct ipc_namespace *
12772 msq->q_perm.mode = msgflg & S_IRWXUGO;
12773 msq->q_perm.key = key;
12774 + msq->q_perm.xid = vx_current_xid();
12776 msq->q_perm.security = NULL;
12777 retval = security_msg_queue_alloc(msq);
12778 diff -urNp -x '*.orig' linux-4.4/ipc/sem.c linux-4.4/ipc/sem.c
12779 --- linux-4.4/ipc/sem.c 2021-02-24 16:56:11.932424260 +0100
12780 +++ linux-4.4/ipc/sem.c 2021-02-24 16:56:24.596156409 +0100
12782 #include <linux/rwsem.h>
12783 #include <linux/nsproxy.h>
12784 #include <linux/ipc_namespace.h>
12785 +#include <linux/vs_base.h>
12786 +#include <linux/vs_limit.h>
12788 #include <linux/uaccess.h>
12790 @@ -533,6 +535,7 @@ static int newary(struct ipc_namespace *
12792 sma->sem_perm.mode = (semflg & S_IRWXUGO);
12793 sma->sem_perm.key = key;
12794 + sma->sem_perm.xid = vx_current_xid();
12796 sma->sem_perm.security = NULL;
12797 retval = security_sem_alloc(sma);
12798 @@ -563,6 +566,9 @@ static int newary(struct ipc_namespace *
12801 ns->used_sems += nsems;
12802 + /* FIXME: obsoleted? */
12803 + vx_semary_inc(sma);
12804 + vx_nsems_add(sma, nsems);
12806 sem_unlock(sma, -1);
12808 @@ -1151,6 +1157,9 @@ static void freeary(struct ipc_namespace
12810 wake_up_sem_queue_do(&tasks);
12811 ns->used_sems -= sma->sem_nsems;
12812 + /* FIXME: obsoleted? */
12813 + vx_nsems_sub(sma, sma->sem_nsems);
12814 + vx_semary_dec(sma);
12815 ipc_rcu_putref(sma, sem_rcu_free);
12818 diff -urNp -x '*.orig' linux-4.4/ipc/shm.c linux-4.4/ipc/shm.c
12819 --- linux-4.4/ipc/shm.c 2021-02-24 16:56:11.932424260 +0100
12820 +++ linux-4.4/ipc/shm.c 2021-02-24 16:56:24.599489848 +0100
12822 #include <linux/nsproxy.h>
12823 #include <linux/mount.h>
12824 #include <linux/ipc_namespace.h>
12825 +#include <linux/vs_context.h>
12826 +#include <linux/vs_limit.h>
12828 #include <linux/uaccess.h>
12830 @@ -234,10 +236,14 @@ static void shm_open(struct vm_area_stru
12831 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
12833 struct file *shm_file;
12834 + struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
12835 + int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
12837 shm_file = shp->shm_file;
12838 shp->shm_file = NULL;
12839 - ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
12840 + vx_ipcshm_sub(vxi, shp, numpages);
12841 + ns->shm_tot -= numpages;
12845 if (!is_file_hugepages(shm_file))
12846 @@ -246,6 +252,7 @@ static void shm_destroy(struct ipc_names
12847 user_shm_unlock(i_size_read(file_inode(shm_file)),
12850 + put_vx_info(vxi);
12851 ipc_rcu_putref(shp, shm_rcu_free);
12854 @@ -545,11 +552,15 @@ static int newseg(struct ipc_namespace *
12855 ns->shm_tot + numpages > ns->shm_ctlall)
12858 + if (!vx_ipcshm_avail(current_vx_info(), numpages))
12861 shp = ipc_rcu_alloc(sizeof(*shp));
12865 shp->shm_perm.key = key;
12866 + shp->shm_perm.xid = vx_current_xid();
12867 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
12868 shp->mlock_user = NULL;
12870 @@ -620,6 +631,7 @@ static int newseg(struct ipc_namespace *
12872 ipc_unlock_object(&shp->shm_perm);
12874 + vx_ipcshm_add(current_vx_info(), key, numpages);
12878 diff -urNp -x '*.orig' linux-4.4/kernel/Makefile linux-4.4/kernel/Makefile
12879 --- linux-4.4/kernel/Makefile 2021-02-24 16:56:11.932424260 +0100
12880 +++ linux-4.4/kernel/Makefile 2021-02-24 16:56:24.599489848 +0100
12881 @@ -29,6 +29,7 @@ obj-y += printk/
12884 obj-y += livepatch/
12887 obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
12888 obj-$(CONFIG_FREEZER) += freezer.o
12889 diff -urNp -x '*.orig' linux-4.4/kernel/auditsc.c linux-4.4/kernel/auditsc.c
12890 --- linux-4.4/kernel/auditsc.c 2021-02-24 16:56:11.935757699 +0100
12891 +++ linux-4.4/kernel/auditsc.c 2021-02-24 16:56:24.599489848 +0100
12892 @@ -1964,7 +1964,7 @@ static int audit_set_loginuid_perm(kuid_
12893 if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
12895 /* it is set, you need permission */
12896 - if (!capable(CAP_AUDIT_CONTROL))
12897 + if (!vx_capable(CAP_AUDIT_CONTROL, VXC_AUDIT_CONTROL))
12899 /* reject if this is not an unset and we don't allow that */
12900 if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid))
12901 diff -urNp -x '*.orig' linux-4.4/kernel/capability.c linux-4.4/kernel/capability.c
12902 --- linux-4.4/kernel/capability.c 2021-02-24 16:56:11.935757699 +0100
12903 +++ linux-4.4/kernel/capability.c 2021-02-24 16:56:24.599489848 +0100
12905 #include <linux/syscalls.h>
12906 #include <linux/pid_namespace.h>
12907 #include <linux/user_namespace.h>
12908 +#include <linux/vs_context.h>
12909 #include <asm/uaccess.h>
12912 @@ -107,6 +108,7 @@ static int cap_validate_magic(cap_user_h
12918 * The only thing that can change the capabilities of the current
12919 * process is the current process. As such, we can't be in this code
12920 @@ -344,6 +346,8 @@ bool has_ns_capability_noaudit(struct ta
12924 +#include <linux/vserver/base.h>
12927 * has_capability_noaudit - Does a task have a capability (unaudited) in the
12929 diff -urNp -x '*.orig' linux-4.4/kernel/compat.c linux-4.4/kernel/compat.c
12930 --- linux-4.4/kernel/compat.c 2016-01-11 00:01:32.000000000 +0100
12931 +++ linux-4.4/kernel/compat.c 2021-02-24 16:56:24.599489848 +0100
12933 #include <linux/times.h>
12934 #include <linux/ptrace.h>
12935 #include <linux/gfp.h>
12936 +#include <linux/vs_time.h>
12938 #include <asm/uaccess.h>
12940 @@ -1059,7 +1060,7 @@ COMPAT_SYSCALL_DEFINE1(stime, compat_tim
12944 - do_settimeofday(&tv);
12945 + vx_settimeofday(&tv);
12949 diff -urNp -x '*.orig' linux-4.4/kernel/cred.c linux-4.4/kernel/cred.c
12950 --- linux-4.4/kernel/cred.c 2021-02-24 16:56:11.939091137 +0100
12951 +++ linux-4.4/kernel/cred.c 2021-02-24 16:56:24.599489848 +0100
12952 @@ -64,31 +64,6 @@ struct cred init_cred = {
12953 .group_info = &init_groups,
12956 -static inline void set_cred_subscribers(struct cred *cred, int n)
12958 -#ifdef CONFIG_DEBUG_CREDENTIALS
12959 - atomic_set(&cred->subscribers, n);
12963 -static inline int read_cred_subscribers(const struct cred *cred)
12965 -#ifdef CONFIG_DEBUG_CREDENTIALS
12966 - return atomic_read(&cred->subscribers);
12972 -static inline void alter_cred_subscribers(const struct cred *_cred, int n)
12974 -#ifdef CONFIG_DEBUG_CREDENTIALS
12975 - struct cred *cred = (struct cred *) _cred;
12977 - atomic_add(n, &cred->subscribers);
12982 * The RCU callback to actually dispose of a set of credentials
12984 @@ -243,21 +218,16 @@ error:
12986 * Call commit_creds() or abort_creds() to clean up.
12988 -struct cred *prepare_creds(void)
12989 +struct cred *__prepare_creds(const struct cred *old)
12991 - struct task_struct *task = current;
12992 - const struct cred *old;
12995 - validate_process_creds();
12997 new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
13001 kdebug("prepare_creds() alloc %p", new);
13003 - old = task->cred;
13004 memcpy(new, old, sizeof(struct cred));
13007 @@ -287,6 +257,13 @@ error:
13012 +struct cred *prepare_creds(void)
13014 + validate_process_creds();
13016 + return __prepare_creds(current->cred);
13018 EXPORT_SYMBOL(prepare_creds);
13021 diff -urNp -x '*.orig' linux-4.4/kernel/exit.c linux-4.4/kernel/exit.c
13022 --- linux-4.4/kernel/exit.c 2021-02-24 16:56:11.945758014 +0100
13023 +++ linux-4.4/kernel/exit.c 2021-02-24 16:56:24.599489848 +0100
13025 #include <linux/fs_struct.h>
13026 #include <linux/init_task.h>
13027 #include <linux/perf_event.h>
13028 +#include <linux/vs_limit.h>
13029 +#include <linux/vs_context.h>
13030 +#include <linux/vs_network.h>
13031 +#include <linux/vs_pid.h>
13032 #include <trace/events/sched.h>
13033 #include <linux/hw_breakpoint.h>
13034 #include <linux/oom.h>
13035 @@ -460,15 +464,25 @@ static struct task_struct *find_child_re
13037 struct pid_namespace *pid_ns = task_active_pid_ns(father);
13038 struct task_struct *reaper = pid_ns->child_reaper;
13039 + struct vx_info *vxi = task_get_vx_info(father);
13040 struct task_struct *p, *n;
13043 + BUG_ON(!vxi->vx_reaper);
13044 + if (vxi->vx_reaper != init_pid_ns.child_reaper &&
13045 + vxi->vx_reaper != father) {
13046 + reaper = vxi->vx_reaper;
13051 if (likely(reaper != father))
13055 reaper = find_alive_thread(father);
13057 pid_ns->child_reaper = reaper;
13062 write_unlock_irq(&tasklist_lock);
13063 @@ -485,7 +499,10 @@ static struct task_struct *find_child_re
13064 zap_pid_ns_processes(pid_ns);
13065 write_lock_irq(&tasklist_lock);
13070 + put_vx_info(vxi);
13075 @@ -573,9 +590,13 @@ static void forget_original_parent(struc
13078 reaper = find_new_reaper(father, reaper);
13079 - list_for_each_entry(p, &father->children, sibling) {
13080 + for (p = list_first_entry(&father->children, struct task_struct, sibling);
13081 + &p->sibling != &father->children; ) {
13082 + struct task_struct *next, *this_reaper = reaper;
13084 + this_reaper = task_active_pid_ns(reaper)->child_reaper;
13085 for_each_thread(p, t) {
13086 - t->real_parent = reaper;
13087 + t->real_parent = this_reaper;
13088 BUG_ON((!t->ptrace) != (t->parent == father));
13089 if (likely(!t->ptrace))
13090 t->parent = t->real_parent;
13091 @@ -587,10 +608,13 @@ static void forget_original_parent(struc
13092 * If this is a threaded reparent there is no need to
13093 * notify anyone anything has happened.
13095 - if (!same_thread_group(reaper, father))
13096 + if (!same_thread_group(this_reaper, father))
13097 reparent_leader(father, p, dead);
13098 + next = list_next_entry(p, sibling);
13099 + list_add(&p->sibling, &this_reaper->children);
13102 - list_splice_tail_init(&father->children, &reaper->children);
13103 + INIT_LIST_HEAD(&father->children);
13107 @@ -759,6 +783,9 @@ void do_exit(long code)
13109 flush_ptrace_hw_breakpoint(tsk);
13111 + /* needs to stay before exit_notify() */
13112 + exit_vx_info_early(tsk, code);
13114 TASKS_RCU(preempt_disable());
13115 TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
13116 TASKS_RCU(preempt_enable());
13117 @@ -790,6 +817,10 @@ void do_exit(long code)
13119 validate_creds_for_do_exit(tsk);
13121 + /* needs to stay after exit_notify() and before preempt_disable() */
13122 + exit_vx_info(tsk, code);
13123 + exit_nx_info(tsk);
13125 check_stack_usage();
13127 if (tsk->nr_dirtied)
13128 @@ -816,6 +847,7 @@ void do_exit(long code)
13129 tsk->state = TASK_DEAD;
13130 tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
13132 + printk("bad task: %p [%lx]\n", current, current->state);
13134 /* Avoid "noreturn function does return". */
13136 diff -urNp -x '*.orig' linux-4.4/kernel/fork.c linux-4.4/kernel/fork.c
13137 --- linux-4.4/kernel/fork.c 2021-02-24 16:56:11.945758014 +0100
13138 +++ linux-4.4/kernel/fork.c 2021-02-24 16:56:24.599489848 +0100
13140 #include <linux/aio.h>
13141 #include <linux/compiler.h>
13142 #include <linux/sysctl.h>
13143 +#include <linux/vs_context.h>
13144 +#include <linux/vs_network.h>
13145 +#include <linux/vs_limit.h>
13147 #include <asm/pgtable.h>
13148 #include <asm/pgalloc.h>
13149 @@ -227,6 +230,8 @@ void free_task(struct task_struct *tsk)
13150 arch_release_thread_info(tsk->stack);
13151 free_thread_info(tsk->stack);
13152 rt_mutex_debug_task_free(tsk);
13153 + clr_vx_info(&tsk->vx_info);
13154 + clr_nx_info(&tsk->nx_info);
13155 ftrace_graph_exit_task(tsk);
13156 put_seccomp_filter(tsk);
13157 arch_release_task_struct(tsk);
13158 @@ -1280,6 +1285,8 @@ static struct task_struct *copy_process(
13161 struct task_struct *p;
13162 + struct vx_info *vxi;
13163 + struct nx_info *nxi;
13164 void *cgrp_ss_priv[CGROUP_CANFORK_COUNT] = {};
13166 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
13167 @@ -1353,7 +1360,12 @@ static struct task_struct *copy_process(
13168 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
13169 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
13171 + init_vx_info(&p->vx_info, current_vx_info());
13172 + init_nx_info(&p->nx_info, current_nx_info());
13175 + if (!vx_nproc_avail(1))
13176 + goto bad_fork_free;
13177 if (atomic_read(&p->real_cred->user->processes) >=
13178 task_rlimit(p, RLIMIT_NPROC)) {
13179 if (p->real_cred->user != INIT_USER &&
13180 @@ -1648,6 +1660,18 @@ static struct task_struct *copy_process(
13182 spin_unlock(¤t->sighand->siglock);
13183 syscall_tracepoint_update(p);
13185 + /* p is copy of current */
13186 + vxi = p->vx_info;
13188 + claim_vx_info(vxi, p);
13189 + atomic_inc(&vxi->cvirt.nr_threads);
13190 + atomic_inc(&vxi->cvirt.total_forks);
13193 + nxi = p->nx_info;
13195 + claim_nx_info(nxi, p);
13196 write_unlock_irq(&tasklist_lock);
13198 proc_fork_connector(p);
13199 diff -urNp -x '*.orig' linux-4.4/kernel/kthread.c linux-4.4/kernel/kthread.c
13200 --- linux-4.4/kernel/kthread.c 2021-02-24 16:56:11.949091452 +0100
13201 +++ linux-4.4/kernel/kthread.c 2021-02-24 16:56:24.599489848 +0100
13203 #include <linux/ptrace.h>
13204 #include <linux/uaccess.h>
13205 #include <linux/cgroup.h>
13206 +#include <linux/vs_pid.h>
13207 #include <trace/events/sched.h>
13209 static DEFINE_SPINLOCK(kthread_create_lock);
13210 diff -urNp -x '*.orig' linux-4.4/kernel/nsproxy.c linux-4.4/kernel/nsproxy.c
13211 --- linux-4.4/kernel/nsproxy.c 2016-01-11 00:01:32.000000000 +0100
13212 +++ linux-4.4/kernel/nsproxy.c 2021-02-24 16:56:24.599489848 +0100
13213 @@ -20,11 +20,14 @@
13214 #include <linux/mnt_namespace.h>
13215 #include <linux/utsname.h>
13216 #include <linux/pid_namespace.h>
13217 +#include <linux/vserver/global.h>
13218 +#include <linux/vserver/debug.h>
13219 #include <net/net_namespace.h>
13220 #include <linux/ipc_namespace.h>
13221 #include <linux/proc_ns.h>
13222 #include <linux/file.h>
13223 #include <linux/syscalls.h>
13224 +#include "../fs/mount.h"
13226 static struct kmem_cache *nsproxy_cachep;
13228 @@ -46,8 +49,11 @@ static inline struct nsproxy *create_nsp
13229 struct nsproxy *nsproxy;
13231 nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
13234 atomic_set(&nsproxy->count, 1);
13235 + atomic_inc(&vs_global_nsproxy);
13237 + vxdprintk(VXD_CBIT(space, 2), "create_nsproxy = %p[1]", nsproxy);
13241 @@ -56,9 +62,12 @@ static inline struct nsproxy *create_nsp
13242 * Return the newly created nsproxy. Do not attach this to the task,
13243 * leave it to the caller to do proper locking and attach it to task.
13245 -static struct nsproxy *create_new_namespaces(unsigned long flags,
13246 - struct task_struct *tsk, struct user_namespace *user_ns,
13247 - struct fs_struct *new_fs)
13248 +static struct nsproxy *unshare_namespaces(
13249 + unsigned long flags,
13250 + struct nsproxy *orig,
13251 + struct fs_struct *new_fs,
13252 + struct user_namespace *new_user,
13253 + struct pid_namespace *new_pid)
13255 struct nsproxy *new_nsp;
13257 @@ -67,32 +76,31 @@ static struct nsproxy *create_new_namesp
13259 return ERR_PTR(-ENOMEM);
13261 - new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
13262 + new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_user, new_fs);
13263 if (IS_ERR(new_nsp->mnt_ns)) {
13264 err = PTR_ERR(new_nsp->mnt_ns);
13268 - new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
13269 + new_nsp->uts_ns = copy_utsname(flags, new_user, orig->uts_ns);
13270 if (IS_ERR(new_nsp->uts_ns)) {
13271 err = PTR_ERR(new_nsp->uts_ns);
13275 - new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
13276 + new_nsp->ipc_ns = copy_ipcs(flags, new_user, orig->ipc_ns);
13277 if (IS_ERR(new_nsp->ipc_ns)) {
13278 err = PTR_ERR(new_nsp->ipc_ns);
13282 - new_nsp->pid_ns_for_children =
13283 - copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
13284 + new_nsp->pid_ns_for_children = copy_pid_ns(flags, new_user, new_pid);
13285 if (IS_ERR(new_nsp->pid_ns_for_children)) {
13286 err = PTR_ERR(new_nsp->pid_ns_for_children);
13290 - new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
13291 + new_nsp->net_ns = copy_net_ns(flags, new_user, orig->net_ns);
13292 if (IS_ERR(new_nsp->net_ns)) {
13293 err = PTR_ERR(new_nsp->net_ns);
13295 @@ -117,6 +125,41 @@ out_ns:
13296 return ERR_PTR(err);
13299 +static struct nsproxy *create_new_namespaces(unsigned long flags,
13300 + struct task_struct *tsk, struct user_namespace *user_ns,
13301 + struct fs_struct *new_fs)
13304 + return unshare_namespaces(flags, tsk->nsproxy,
13305 + new_fs, user_ns, task_active_pid_ns(tsk));
13309 + * copies the nsproxy, setting refcount to 1, and grabbing a
13310 + * reference to all contained namespaces.
13312 +struct nsproxy *copy_nsproxy(struct nsproxy *orig)
13314 + struct nsproxy *ns = create_nsproxy();
13317 + memcpy(ns, orig, sizeof(struct nsproxy));
13318 + atomic_set(&ns->count, 1);
13321 + get_mnt_ns(ns->mnt_ns);
13323 + get_uts_ns(ns->uts_ns);
13325 + get_ipc_ns(ns->ipc_ns);
13326 + if (ns->pid_ns_for_children)
13327 + get_pid_ns(ns->pid_ns_for_children);
13329 + get_net(ns->net_ns);
13335 * called from clone. This now handles copy for nsproxy and all
13336 * namespaces therein.
13337 @@ -125,7 +168,10 @@ int copy_namespaces(unsigned long flags,
13339 struct nsproxy *old_ns = tsk->nsproxy;
13340 struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
13341 - struct nsproxy *new_ns;
13342 + struct nsproxy *new_ns = NULL;
13344 + vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08lx,%p[%p])",
13345 + flags, tsk, old_ns);
13347 if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
13348 CLONE_NEWPID | CLONE_NEWNET)))) {
13349 @@ -133,7 +179,7 @@ int copy_namespaces(unsigned long flags,
13353 - if (!ns_capable(user_ns, CAP_SYS_ADMIN))
13354 + if (!vx_ns_can_unshare(user_ns, CAP_SYS_ADMIN, flags))
13358 @@ -152,6 +198,9 @@ int copy_namespaces(unsigned long flags,
13359 return PTR_ERR(new_ns);
13361 tsk->nsproxy = new_ns;
13362 + vxdprintk(VXD_CBIT(space, 3),
13363 + "copy_namespaces(0x%08lx,%p[%p]) = [%p]",
13364 + flags, tsk, old_ns, new_ns);
13368 @@ -165,7 +214,9 @@ void free_nsproxy(struct nsproxy *ns)
13369 put_ipc_ns(ns->ipc_ns);
13370 if (ns->pid_ns_for_children)
13371 put_pid_ns(ns->pid_ns_for_children);
13372 - put_net(ns->net_ns);
13374 + put_net(ns->net_ns);
13375 + atomic_dec(&vs_global_nsproxy);
13376 kmem_cache_free(nsproxy_cachep, ns);
13379 @@ -179,12 +230,16 @@ int unshare_nsproxy_namespaces(unsigned
13380 struct user_namespace *user_ns;
13383 + vxdprintk(VXD_CBIT(space, 4),
13384 + "unshare_nsproxy_namespaces(0x%08lx,[%p])",
13385 + unshare_flags, current->nsproxy);
13387 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
13388 CLONE_NEWNET | CLONE_NEWPID)))
13391 user_ns = new_cred ? new_cred->user_ns : current_user_ns();
13392 - if (!ns_capable(user_ns, CAP_SYS_ADMIN))
13393 + if (!vx_ns_can_unshare(user_ns, CAP_SYS_ADMIN, unshare_flags))
13396 *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
13397 diff -urNp -x '*.orig' linux-4.4/kernel/pid.c linux-4.4/kernel/pid.c
13398 --- linux-4.4/kernel/pid.c 2021-02-24 16:56:11.955758328 +0100
13399 +++ linux-4.4/kernel/pid.c 2021-02-24 16:56:24.599489848 +0100
13401 #include <linux/syscalls.h>
13402 #include <linux/proc_ns.h>
13403 #include <linux/proc_fs.h>
13404 +#include <linux/vs_pid.h>
13406 #define pid_hashfn(nr, ns) \
13407 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
13408 @@ -381,7 +382,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
13410 struct pid *find_vpid(int nr)
13412 - return find_pid_ns(nr, task_active_pid_ns(current));
13413 + return find_pid_ns(vx_rmap_pid(nr), task_active_pid_ns(current));
13415 EXPORT_SYMBOL_GPL(find_vpid);
13417 @@ -437,6 +438,9 @@ void transfer_pid(struct task_struct *ol
13418 struct task_struct *pid_task(struct pid *pid, enum pid_type type)
13420 struct task_struct *result = NULL;
13422 + if (type == __PIDTYPE_REALPID)
13423 + type = PIDTYPE_PID;
13425 struct hlist_node *first;
13426 first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
13427 @@ -455,7 +459,7 @@ struct task_struct *find_task_by_pid_ns(
13429 RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
13430 "find_task_by_pid_ns() needs rcu_read_lock() protection");
13431 - return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
13432 + return pid_task(find_pid_ns(vx_rmap_pid(nr), ns), PIDTYPE_PID);
13435 struct task_struct *find_task_by_vpid(pid_t vnr)
13436 @@ -499,7 +503,7 @@ struct pid *find_get_pid(pid_t nr)
13438 EXPORT_SYMBOL_GPL(find_get_pid);
13440 -pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
13441 +pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns)
13445 @@ -513,6 +517,11 @@ pid_t pid_nr_ns(struct pid *pid, struct
13447 EXPORT_SYMBOL_GPL(pid_nr_ns);
13449 +pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
13451 + return vx_map_pid(pid_unmapped_nr_ns(pid, ns));
13454 pid_t pid_vnr(struct pid *pid)
13456 return pid_nr_ns(pid, task_active_pid_ns(current));
13457 diff -urNp -x '*.orig' linux-4.4/kernel/pid_namespace.c linux-4.4/kernel/pid_namespace.c
13458 --- linux-4.4/kernel/pid_namespace.c 2021-02-24 16:56:11.955758328 +0100
13459 +++ linux-4.4/kernel/pid_namespace.c 2021-02-24 16:56:24.599489848 +0100
13461 #include <linux/proc_ns.h>
13462 #include <linux/reboot.h>
13463 #include <linux/export.h>
13464 +#include <linux/vserver/global.h>
13468 @@ -111,6 +112,7 @@ static struct pid_namespace *create_pid_
13469 ns->ns.ops = &pidns_operations;
13471 kref_init(&ns->kref);
13472 + atomic_inc(&vs_global_pid_ns);
13474 ns->parent = get_pid_ns(parent_pid_ns);
13475 ns->user_ns = get_user_ns(user_ns);
13476 @@ -128,6 +130,7 @@ static struct pid_namespace *create_pid_
13478 kfree(ns->pidmap[0].page);
13480 + atomic_dec(&vs_global_pid_ns);
13481 kmem_cache_free(pid_ns_cachep, ns);
13483 return ERR_PTR(err);
13484 diff -urNp -x '*.orig' linux-4.4/kernel/printk/printk.c linux-4.4/kernel/printk/printk.c
13485 --- linux-4.4/kernel/printk/printk.c 2021-02-24 16:56:11.959091767 +0100
13486 +++ linux-4.4/kernel/printk/printk.c 2021-02-24 16:56:24.602823286 +0100
13488 #include <linux/utsname.h>
13489 #include <linux/ctype.h>
13490 #include <linux/uio.h>
13491 +#include <linux/vs_cvirt.h>
13493 #include <asm/uaccess.h>
13495 @@ -503,7 +504,7 @@ int check_syslog_permissions(int type, i
13498 if (syslog_action_restricted(type)) {
13499 - if (capable(CAP_SYSLOG))
13500 + if (vx_capable(CAP_SYSLOG, VXC_SYSLOG))
13503 * For historical reasons, accept CAP_SYS_ADMIN too, with
13504 @@ -1315,12 +1316,9 @@ int do_syslog(int type, char __user *buf
13509 - case SYSLOG_ACTION_CLOSE: /* Close log */
13511 - case SYSLOG_ACTION_OPEN: /* Open log */
13513 - case SYSLOG_ACTION_READ: /* Read from log */
13514 + if ((type == SYSLOG_ACTION_READ) ||
13515 + (type == SYSLOG_ACTION_READ_ALL) ||
13516 + (type == SYSLOG_ACTION_READ_CLEAR)) {
13518 if (!buf || len < 0)
13520 @@ -1331,6 +1329,16 @@ int do_syslog(int type, char __user *buf
13525 + if (!vx_check(0, VS_ADMIN|VS_WATCH))
13526 + return vx_do_syslog(type, buf, len);
13529 + case SYSLOG_ACTION_CLOSE: /* Close log */
13531 + case SYSLOG_ACTION_OPEN: /* Open log */
13533 + case SYSLOG_ACTION_READ: /* Read from log */
13534 error = wait_event_interruptible(log_wait,
13535 syslog_seq != log_next_seq);
13537 @@ -1343,16 +1351,6 @@ int do_syslog(int type, char __user *buf
13539 /* Read last kernel messages */
13540 case SYSLOG_ACTION_READ_ALL:
13542 - if (!buf || len < 0)
13547 - if (!access_ok(VERIFY_WRITE, buf, len)) {
13551 error = syslog_print_all(buf, len, clear);
13553 /* Clear ring buffer */
13554 diff -urNp -x '*.orig' linux-4.4/kernel/ptrace.c linux-4.4/kernel/ptrace.c
13555 --- linux-4.4/kernel/ptrace.c 2021-02-24 16:56:11.959091767 +0100
13556 +++ linux-4.4/kernel/ptrace.c 2021-02-24 16:56:24.602823286 +0100
13558 #include <linux/syscalls.h>
13559 #include <linux/uaccess.h>
13560 #include <linux/regset.h>
13561 +#include <linux/vs_context.h>
13562 #include <linux/hw_breakpoint.h>
13563 #include <linux/cn_proc.h>
13564 #include <linux/compat.h>
13565 @@ -306,6 +307,11 @@ ok:
13566 !ptrace_has_cap(mm->user_ns, mode)))
13569 + if (!vx_check(task->xid, VS_ADMIN_P|VS_WATCH_P|VS_IDENT))
13571 + if (!vx_check(task->xid, VS_IDENT) &&
13572 + !task_vx_flags(task, VXF_STATE_ADMIN, 0))
13574 if (mode & PTRACE_MODE_SCHED)
13576 return security_ptrace_access_check(task, mode);
13577 diff -urNp -x '*.orig' linux-4.4/kernel/reboot.c linux-4.4/kernel/reboot.c
13578 --- linux-4.4/kernel/reboot.c 2021-02-24 16:56:11.959091767 +0100
13579 +++ linux-4.4/kernel/reboot.c 2021-02-24 16:56:24.602823286 +0100
13581 #include <linux/syscalls.h>
13582 #include <linux/syscore_ops.h>
13583 #include <linux/uaccess.h>
13584 +#include <linux/vs_pid.h>
13587 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
13588 @@ -269,6 +270,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off);
13590 static DEFINE_MUTEX(reboot_mutex);
13592 +long vs_reboot(unsigned int, void __user *);
13595 * Reboot system call: for obvious reasons only root may call it,
13596 * and even root needs to set up some magic numbers in the registers
13597 @@ -311,6 +314,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int
13598 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
13599 cmd = LINUX_REBOOT_CMD_HALT;
13601 + if (!vx_check(0, VS_ADMIN|VS_WATCH))
13602 + return vs_reboot(cmd, arg);
13604 mutex_lock(&reboot_mutex);
13606 case LINUX_REBOOT_CMD_RESTART:
13607 diff -urNp -x '*.orig' linux-4.4/kernel/sched/core.c linux-4.4/kernel/sched/core.c
13608 --- linux-4.4/kernel/sched/core.c 2021-02-24 16:56:11.962425205 +0100
13609 +++ linux-4.4/kernel/sched/core.c 2021-02-24 16:56:24.602823286 +0100
13611 #include <linux/binfmts.h>
13612 #include <linux/context_tracking.h>
13613 #include <linux/compiler.h>
13614 +#include <linux/vs_sched.h>
13615 +#include <linux/vs_cvirt.h>
13617 #include <asm/switch_to.h>
13618 #include <asm/tlb.h>
13619 @@ -3561,7 +3563,7 @@ SYSCALL_DEFINE1(nice, int, increment)
13621 nice = clamp_val(nice, MIN_NICE, MAX_NICE);
13622 if (increment < 0 && !can_nice(current, nice))
13624 + return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
13626 retval = security_task_setnice(current, nice);
13628 diff -urNp -x '*.orig' linux-4.4/kernel/sched/cputime.c linux-4.4/kernel/sched/cputime.c
13629 --- linux-4.4/kernel/sched/cputime.c 2021-02-24 16:56:11.962425205 +0100
13630 +++ linux-4.4/kernel/sched/cputime.c 2021-02-24 16:56:24.602823286 +0100
13632 #include <linux/kernel_stat.h>
13633 #include <linux/static_key.h>
13634 #include <linux/context_tracking.h>
13635 +#include <linux/vs_sched.h>
13639 @@ -135,14 +136,17 @@ static inline void task_group_account_fi
13640 void account_user_time(struct task_struct *p, cputime_t cputime,
13641 cputime_t cputime_scaled)
13643 + struct vx_info *vxi = p->vx_info; /* p is _always_ current */
13644 + int nice = (task_nice(p) > 0);
13647 /* Add user time to process. */
13648 p->utime += cputime;
13649 p->utimescaled += cputime_scaled;
13650 + vx_account_user(vxi, cputime, nice);
13651 account_group_user_time(p, cputime);
13653 - index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
13654 + index = (nice) ? CPUTIME_NICE : CPUTIME_USER;
13656 /* Add user time to cpustat. */
13657 task_group_account_field(p, index, (__force u64) cputime);
13658 @@ -189,9 +193,12 @@ static inline
13659 void __account_system_time(struct task_struct *p, cputime_t cputime,
13660 cputime_t cputime_scaled, int index)
13662 + struct vx_info *vxi = p->vx_info; /* p is _always_ current */
13664 /* Add system time to process. */
13665 p->stime += cputime;
13666 p->stimescaled += cputime_scaled;
13667 + vx_account_system(vxi, cputime, 0 /* do we have idle time? */);
13668 account_group_system_time(p, cputime);
13670 /* Add system time to cpustat. */
13671 diff -urNp -x '*.orig' linux-4.4/kernel/sched/fair.c linux-4.4/kernel/sched/fair.c
13672 --- linux-4.4/kernel/sched/fair.c 2021-02-24 16:56:11.965758643 +0100
13673 +++ linux-4.4/kernel/sched/fair.c 2021-02-24 16:56:24.602823286 +0100
13675 #include <linux/mempolicy.h>
13676 #include <linux/migrate.h>
13677 #include <linux/task_work.h>
13678 +#include <linux/vs_cvirt.h>
13680 #include <trace/events/sched.h>
13682 @@ -3089,6 +3090,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
13683 __enqueue_entity(cfs_rq, se);
13686 + if (entity_is_task(se))
13687 + vx_activate_task(task_of(se));
13688 if (cfs_rq->nr_running == 1) {
13689 list_add_leaf_cfs_rq(cfs_rq);
13690 check_enqueue_throttle(cfs_rq);
13691 @@ -3170,6 +3173,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
13692 if (se != cfs_rq->curr)
13693 __dequeue_entity(cfs_rq, se);
13695 + if (entity_is_task(se))
13696 + vx_deactivate_task(task_of(se));
13697 account_entity_dequeue(cfs_rq, se);
13700 diff -urNp -x '*.orig' linux-4.4/kernel/sched/loadavg.c linux-4.4/kernel/sched/loadavg.c
13701 --- linux-4.4/kernel/sched/loadavg.c 2021-02-24 16:56:11.965758643 +0100
13702 +++ linux-4.4/kernel/sched/loadavg.c 2021-02-24 16:56:24.606156724 +0100
13703 @@ -73,9 +73,16 @@ EXPORT_SYMBOL(avenrun); /* should be rem
13705 void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
13707 - loads[0] = (avenrun[0] + offset) << shift;
13708 - loads[1] = (avenrun[1] + offset) << shift;
13709 - loads[2] = (avenrun[2] + offset) << shift;
13710 + if (vx_flags(VXF_VIRT_LOAD, 0)) {
13711 + struct vx_info *vxi = current_vx_info();
13712 + loads[0] = (vxi->cvirt.load[0] + offset) << shift;
13713 + loads[1] = (vxi->cvirt.load[1] + offset) << shift;
13714 + loads[2] = (vxi->cvirt.load[2] + offset) << shift;
13716 + loads[0] = (avenrun[0] + offset) << shift;
13717 + loads[1] = (avenrun[1] + offset) << shift;
13718 + loads[2] = (avenrun[2] + offset) << shift;
13722 long calc_load_fold_active(struct rq *this_rq)
13723 diff -urNp -x '*.orig' linux-4.4/kernel/signal.c linux-4.4/kernel/signal.c
13724 --- linux-4.4/kernel/signal.c 2021-02-24 16:56:11.965758643 +0100
13725 +++ linux-4.4/kernel/signal.c 2021-02-24 16:56:24.606156724 +0100
13727 #include <linux/compat.h>
13728 #include <linux/cn_proc.h>
13729 #include <linux/compiler.h>
13730 +#include <linux/vs_context.h>
13731 +#include <linux/vs_pid.h>
13733 #define CREATE_TRACE_POINTS
13734 #include <trace/events/signal.h>
13735 @@ -782,9 +784,18 @@ static int check_kill_permission(int sig
13739 + vxdprintk(VXD_CBIT(misc, 7),
13740 + "check_kill_permission(%d,%p,%p[#%u,%u])",
13741 + sig, info, t, vx_task_xid(t), t->pid);
13743 if (!valid_signal(sig))
13746 +/* FIXME: needed? if so, why?
13747 + if ((info != SEND_SIG_NOINFO) &&
13748 + (is_si_special(info) || !si_fromuser(info)))
13751 if (!si_fromuser(info))
13754 @@ -808,6 +819,20 @@ static int check_kill_permission(int sig
13759 + if (t->pid == 1 && current->xid)
13763 + /* FIXME: we shouldn't return ESRCH ever, to avoid
13764 + loops, maybe ENOENT or EACCES? */
13765 + if (!vx_check(vx_task_xid(t), VS_WATCH_P | VS_IDENT)) {
13766 + vxdprintk(current->xid || VXD_CBIT(misc, 7),
13767 + "signal %d[%p] xid mismatch %p[#%u,%u] xid=#%u",
13768 + sig, info, t, vx_task_xid(t), t->pid, current->xid);
13772 return security_task_kill(t, info, sig, 0);
13775 @@ -1359,8 +1384,14 @@ int kill_pid_info(int sig, struct siginf
13778 p = pid_task(pid, PIDTYPE_PID);
13780 - error = group_send_sig_info(sig, info, p);
13782 + if (vx_check(vx_task_xid(p), VS_IDENT))
13783 + error = group_send_sig_info(sig, info, p);
13785 + rcu_read_unlock();
13790 if (likely(!p || error != -ESRCH))
13792 @@ -1405,7 +1436,7 @@ int kill_pid_info_as_cred(int sig, struc
13795 p = pid_task(pid, PIDTYPE_PID);
13797 + if (!p || !vx_check(vx_task_xid(p), VS_IDENT)) {
13801 @@ -1461,8 +1492,10 @@ static int kill_something_info(int sig,
13802 struct task_struct * p;
13804 for_each_process(p) {
13805 - if (task_pid_vnr(p) > 1 &&
13806 - !same_thread_group(p, current)) {
13807 + if (vx_check(vx_task_xid(p), VS_ADMIN|VS_IDENT) &&
13808 + task_pid_vnr(p) > 1 &&
13809 + !same_thread_group(p, current) &&
13810 + !vx_current_initpid(p->pid)) {
13811 int err = group_send_sig_info(sig, info, p);
13814 @@ -2333,6 +2366,11 @@ relock:
13815 !sig_kernel_only(signr))
13818 + /* virtual init is protected against user signals */
13819 + if ((ksig->info.si_code == SI_USER) &&
13820 + vx_current_initpid(current->pid))
13823 if (sig_kernel_stop(signr)) {
13825 * The default action is to stop all threads in
13826 diff -urNp -x '*.orig' linux-4.4/kernel/softirq.c linux-4.4/kernel/softirq.c
13827 --- linux-4.4/kernel/softirq.c 2016-01-11 00:01:32.000000000 +0100
13828 +++ linux-4.4/kernel/softirq.c 2021-02-24 16:56:24.606156724 +0100
13830 #include <linux/smpboot.h>
13831 #include <linux/tick.h>
13832 #include <linux/irq.h>
13833 +#include <linux/vs_context.h>
13835 #define CREATE_TRACE_POINTS
13836 #include <trace/events/irq.h>
13837 diff -urNp -x '*.orig' linux-4.4/kernel/sys.c linux-4.4/kernel/sys.c
13838 --- linux-4.4/kernel/sys.c 2021-02-24 16:56:11.965758643 +0100
13839 +++ linux-4.4/kernel/sys.c 2021-02-24 16:56:24.606156724 +0100
13841 #include <linux/nospec.h>
13843 #include <linux/kmsg_dump.h>
13844 +#include <linux/vs_pid.h>
13845 /* Move somewhere else to avoid recompiling? */
13846 #include <generated/utsrelease.h>
13848 @@ -159,7 +160,10 @@ static int set_one_prio(struct task_stru
13851 if (niceval < task_nice(p) && !can_nice(p, niceval)) {
13853 + if (vx_flags(VXF_IGNEG_NICE, 0))
13859 no_nice = security_task_setnice(p, niceval);
13860 @@ -210,6 +214,8 @@ SYSCALL_DEFINE3(setpriority, int, which,
13862 pgrp = task_pgrp(current);
13863 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
13864 + if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13866 error = set_one_prio(p, niceval, error);
13867 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
13869 @@ -276,6 +282,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
13871 pgrp = task_pgrp(current);
13872 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
13873 + if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13875 niceval = nice_to_rlimit(task_nice(p));
13876 if (niceval > retval)
13878 @@ -292,6 +300,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
13879 goto out_unlock; /* No processes for this user */
13881 do_each_thread(g, p) {
13882 + if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13884 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
13885 niceval = nice_to_rlimit(task_nice(p));
13886 if (niceval > retval)
13887 @@ -1213,7 +1223,8 @@ SYSCALL_DEFINE2(sethostname, char __user
13889 char tmp[__NEW_UTS_LEN];
13891 - if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
13892 + if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
13893 + CAP_SYS_ADMIN, VXC_SET_UTSNAME))
13896 if (len < 0 || len > __NEW_UTS_LEN)
13897 @@ -1266,7 +1277,8 @@ SYSCALL_DEFINE2(setdomainname, char __us
13899 char tmp[__NEW_UTS_LEN];
13901 - if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
13902 + if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
13903 + CAP_SYS_ADMIN, VXC_SET_UTSNAME))
13905 if (len < 0 || len > __NEW_UTS_LEN)
13907 @@ -1386,7 +1398,7 @@ int do_prlimit(struct task_struct *tsk,
13908 /* Keep the capable check against init_user_ns until
13909 cgroups can contain all limits */
13910 if (new_rlim->rlim_max > rlim->rlim_max &&
13911 - !capable(CAP_SYS_RESOURCE))
13912 + !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
13915 retval = security_task_setrlimit(tsk->group_leader,
13916 @@ -1439,7 +1451,8 @@ static int check_prlimit_permission(stru
13917 gid_eq(cred->gid, tcred->sgid) &&
13918 gid_eq(cred->gid, tcred->gid))
13920 - if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
13921 + if (vx_ns_capable(tcred->user_ns,
13922 + CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
13926 diff -urNp -x '*.orig' linux-4.4/kernel/sysctl.c linux-4.4/kernel/sysctl.c
13927 --- linux-4.4/kernel/sysctl.c 2021-02-24 16:56:11.969092082 +0100
13928 +++ linux-4.4/kernel/sysctl.c 2021-02-24 16:56:24.606156724 +0100
13930 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
13931 #include <linux/lockdep.h>
13933 +extern char vshelper_path[];
13934 #ifdef CONFIG_CHR_DEV_SG
13935 #include <scsi/sg.h>
13937 @@ -281,6 +282,13 @@ static int max_extfrag_threshold = 1000;
13939 static struct ctl_table kern_table[] = {
13941 + .procname = "vshelper",
13942 + .data = &vshelper_path,
13945 + .proc_handler = proc_dostring,
13948 .procname = "sched_child_runs_first",
13949 .data = &sysctl_sched_child_runs_first,
13950 .maxlen = sizeof(unsigned int),
13951 @@ -1388,7 +1396,6 @@ static struct ctl_table vm_table[] = {
13956 #endif /* CONFIG_COMPACTION */
13958 .procname = "min_free_kbytes",
13959 diff -urNp -x '*.orig' linux-4.4/kernel/sysctl_binary.c linux-4.4/kernel/sysctl_binary.c
13960 --- linux-4.4/kernel/sysctl_binary.c 2021-02-24 16:56:11.969092082 +0100
13961 +++ linux-4.4/kernel/sysctl_binary.c 2021-02-24 16:56:24.606156724 +0100
13962 @@ -73,6 +73,7 @@ static const struct bin_table bin_kern_t
13964 { CTL_INT, KERN_PANIC, "panic" },
13965 { CTL_INT, KERN_REALROOTDEV, "real-root-dev" },
13966 + { CTL_STR, KERN_VSHELPER, "vshelper" },
13968 { CTL_STR, KERN_SPARC_REBOOT, "reboot-cmd" },
13969 { CTL_INT, KERN_CTLALTDEL, "ctrl-alt-del" },
13970 diff -urNp -x '*.orig' linux-4.4/kernel/time/posix-timers.c linux-4.4/kernel/time/posix-timers.c
13971 --- linux-4.4/kernel/time/posix-timers.c 2021-02-24 16:56:11.969092082 +0100
13972 +++ linux-4.4/kernel/time/posix-timers.c 2021-02-24 16:56:24.606156724 +0100
13974 #include <linux/workqueue.h>
13975 #include <linux/export.h>
13976 #include <linux/hashtable.h>
13977 +#include <linux/vs_context.h>
13979 #include "timekeeping.h"
13981 @@ -417,6 +418,7 @@ int posix_timer_event(struct k_itimer *t
13983 struct task_struct *task;
13984 int shared, ret = -1;
13987 * FIXME: if ->sigq is queued we can race with
13988 * dequeue_signal()->do_schedule_next_timer().
13989 @@ -433,10 +435,18 @@ int posix_timer_event(struct k_itimer *t
13991 task = pid_task(timr->it_pid, PIDTYPE_PID);
13993 + struct vx_info_save vxis;
13994 + struct vx_info *vxi;
13996 + vxi = get_vx_info(task->vx_info);
13997 + enter_vx_info(vxi, &vxis);
13998 shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
13999 ret = send_sigqueue(timr->sigq, task, shared);
14000 + leave_vx_info(&vxis);
14001 + put_vx_info(vxi);
14005 /* If we failed to send the signal the timer stops. */
14008 diff -urNp -x '*.orig' linux-4.4/kernel/time/time.c linux-4.4/kernel/time/time.c
14009 --- linux-4.4/kernel/time/time.c 2021-02-24 16:56:11.972425520 +0100
14010 +++ linux-4.4/kernel/time/time.c 2021-02-24 16:56:24.606156724 +0100
14012 #include <linux/fs.h>
14013 #include <linux/math64.h>
14014 #include <linux/ptrace.h>
14015 +#include <linux/vs_time.h>
14017 #include <asm/uaccess.h>
14018 #include <asm/unistd.h>
14019 @@ -94,7 +95,7 @@ SYSCALL_DEFINE1(stime, time_t __user *,
14023 - do_settimeofday(&tv);
14024 + vx_settimeofday(&tv);
14028 @@ -187,7 +188,7 @@ int do_sys_settimeofday(const struct tim
14032 - return do_settimeofday(tv);
14033 + return vx_settimeofday(tv);
14037 diff -urNp -x '*.orig' linux-4.4/kernel/time/timekeeping.c linux-4.4/kernel/time/timekeeping.c
14038 --- linux-4.4/kernel/time/timekeeping.c 2021-02-24 16:56:11.972425520 +0100
14039 +++ linux-4.4/kernel/time/timekeeping.c 2021-02-24 16:56:24.606156724 +0100
14041 #include <linux/stop_machine.h>
14042 #include <linux/pvclock_gtod.h>
14043 #include <linux/compiler.h>
14044 +#include <linux/vs_time.h>
14046 #include "tick-internal.h"
14047 #include "ntp_internal.h"
14048 @@ -922,7 +923,9 @@ void ktime_get_raw_and_real_ts64(struct
14049 } while (read_seqcount_retry(&tk_core.seq, seq));
14051 timespec64_add_ns(ts_raw, nsecs_raw);
14052 + vx_adjust_timespec(ts_raw);
14053 timespec64_add_ns(ts_real, nsecs_real);
14054 + vx_adjust_timespec(ts_real);
14056 EXPORT_SYMBOL(ktime_get_raw_and_real_ts64);
14058 diff -urNp -x '*.orig' linux-4.4/kernel/time/timer.c linux-4.4/kernel/time/timer.c
14059 --- linux-4.4/kernel/time/timer.c 2021-02-24 16:56:11.972425520 +0100
14060 +++ linux-4.4/kernel/time/timer.c 2021-02-24 16:56:24.609490163 +0100
14062 #include <linux/slab.h>
14063 #include <linux/compat.h>
14064 #include <linux/random.h>
14065 +#include <linux/vs_base.h>
14066 +#include <linux/vs_cvirt.h>
14067 +#include <linux/vs_pid.h>
14068 +#include <linux/vserver/sched.h>
14070 #include <asm/uaccess.h>
14071 #include <asm/unistd.h>
14072 diff -urNp -x '*.orig' linux-4.4/kernel/user_namespace.c linux-4.4/kernel/user_namespace.c
14073 --- linux-4.4/kernel/user_namespace.c 2021-02-24 16:56:11.982425835 +0100
14074 +++ linux-4.4/kernel/user_namespace.c 2021-02-24 16:56:24.609490163 +0100
14076 #include <linux/ctype.h>
14077 #include <linux/projid.h>
14078 #include <linux/fs_struct.h>
14079 +#include <linux/vserver/global.h>
14081 static struct kmem_cache *user_ns_cachep __read_mostly;
14082 static DEFINE_MUTEX(userns_state_mutex);
14083 @@ -97,6 +98,7 @@ int create_user_ns(struct cred *new)
14085 atomic_set(&ns->count, 1);
14086 /* Leave the new->user_ns reference with the new user namespace. */
14087 + atomic_inc(&vs_global_user_ns);
14088 ns->parent = parent_ns;
14089 ns->level = parent_ns->level + 1;
14091 @@ -145,6 +147,7 @@ void free_user_ns(struct user_namespace
14092 key_put(ns->persistent_keyring_register);
14094 ns_free_inum(&ns->ns);
14095 + atomic_dec(&vs_global_user_ns);
14096 kmem_cache_free(user_ns_cachep, ns);
14098 } while (atomic_dec_and_test(&parent->count));
14099 @@ -358,6 +361,18 @@ gid_t from_kgid_munged(struct user_names
14101 EXPORT_SYMBOL(from_kgid_munged);
14103 +ktag_t make_ktag(struct user_namespace *from, vtag_t tag)
14105 + return KTAGT_INIT(tag);
14107 +EXPORT_SYMBOL(make_ktag);
14109 +vtag_t from_ktag(struct user_namespace *to, ktag_t tag)
14111 + return __ktag_val(tag);
14113 +EXPORT_SYMBOL(from_ktag);
14116 * make_kprojid - Map a user-namespace projid pair into a kprojid.
14117 * @ns: User namespace that the projid is in
14118 diff -urNp -x '*.orig' linux-4.4/kernel/utsname.c linux-4.4/kernel/utsname.c
14119 --- linux-4.4/kernel/utsname.c 2016-01-11 00:01:32.000000000 +0100
14120 +++ linux-4.4/kernel/utsname.c 2021-02-24 16:56:24.609490163 +0100
14121 @@ -16,14 +16,17 @@
14122 #include <linux/slab.h>
14123 #include <linux/user_namespace.h>
14124 #include <linux/proc_ns.h>
14125 +#include <linux/vserver/global.h>
14127 static struct uts_namespace *create_uts_ns(void)
14129 struct uts_namespace *uts_ns;
14131 uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
14134 kref_init(&uts_ns->kref);
14135 + atomic_inc(&vs_global_uts_ns);
14140 @@ -87,6 +90,7 @@ void free_uts_ns(struct kref *kref)
14141 ns = container_of(kref, struct uts_namespace, kref);
14142 put_user_ns(ns->user_ns);
14143 ns_free_inum(&ns->ns);
14144 + atomic_dec(&vs_global_uts_ns);
14148 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/Kconfig linux-4.4/kernel/vserver/Kconfig
14149 --- linux-4.4/kernel/vserver/Kconfig 1970-01-01 01:00:00.000000000 +0100
14150 +++ linux-4.4/kernel/vserver/Kconfig 2021-02-24 16:56:24.612823601 +0100
14153 +# Linux VServer configuration
14156 +menu "Linux VServer"
14158 +config VSERVER_AUTO_LBACK
14159 + bool "Automatically Assign Loopback IP"
14162 + Automatically assign a guest specific loopback
14163 + IP and add it to the kernel network stack on
14166 +config VSERVER_AUTO_SINGLE
14167 + bool "Automatic Single IP Special Casing"
14170 + This allows network contexts with a single IP to
14171 + automatically remap 0.0.0.0 bindings to that IP,
14172 + avoiding further network checks and improving
14175 + (note: such guests do not allow to change the ip
14176 + on the fly and do not show loopback addresses)
14178 +config VSERVER_COWBL
14179 + bool "Enable COW Immutable Link Breaking"
14182 + This enables the COW (Copy-On-Write) link break code.
14183 + It allows you to treat unified files like normal files
14184 + when writing to them (which will implicitely break the
14185 + link and create a copy of the unified file)
14187 +config VSERVER_VTIME
14188 + bool "Enable Virtualized Guest Time (EXPERIMENTAL)"
14191 + This enables per guest time offsets to allow for
14192 + adjusting the system clock individually per guest.
14193 + this adds some overhead to the time functions and
14194 + therefore should not be enabled without good reason.
14196 +config VSERVER_DEVICE
14197 + bool "Enable Guest Device Mapping (EXPERIMENTAL)"
14200 + This enables generic device remapping.
14202 +config VSERVER_PROC_SECURE
14203 + bool "Enable Proc Security"
14204 + depends on PROC_FS
14207 + This configures ProcFS security to initially hide
14208 + non-process entries for all contexts except the main and
14209 + spectator context (i.e. for all guests), which is a secure
14212 + (note: on 1.2x the entries were visible by default)
14215 + prompt "Persistent Inode Tagging"
14216 + default TAGGING_ID24
14218 + This adds persistent context information to filesystems
14219 + mounted with the tagxid option. Tagging is a requirement
14220 + for per-context disk limits and per-context quota.
14223 +config TAGGING_NONE
14226 + do not store per-context information in inodes.
14228 +config TAGGING_UID16
14229 + bool "UID16/GID32"
14231 + reduces UID to 16 bit, but leaves GID at 32 bit.
14233 +config TAGGING_GID16
14234 + bool "UID32/GID16"
14236 + reduces GID to 16 bit, but leaves UID at 32 bit.
14238 +config TAGGING_ID24
14239 + bool "UID24/GID24"
14241 + uses the upper 8bit from UID and GID for XID tagging
14242 + which leaves 24bit for UID/GID each, which should be
14243 + more than sufficient for normal use.
14245 +config TAGGING_INTERN
14246 + bool "UID32/GID32"
14248 + this uses otherwise reserved inode fields in the on
14249 + disk representation, which limits the use to a few
14250 + filesystems (currently ext2 and ext3)
14255 + bool "Tag NFSD User Auth and Files"
14258 + Enable this if you do want the in-kernel NFS
14259 + Server to use the tagging specified above.
14260 + (will require patched clients too)
14262 +config VSERVER_PRIVACY
14263 + bool "Honor Privacy Aspects of Guests"
14266 + When enabled, most context checks will disallow
14267 + access to structures assigned to a specific context,
14268 + like ptys or loop devices.
14270 +config VSERVER_CONTEXTS
14271 + int "Maximum number of Contexts (1-65533)" if EMBEDDED
14273 + default "768" if 64BIT
14276 + This setting will optimize certain data structures
14277 + and memory allocations according to the expected
14280 + note: this is not a strict upper limit.
14282 +config VSERVER_WARN
14283 + bool "VServer Warnings"
14286 + This enables various runtime warnings, which will
14287 + notify about potential manipulation attempts or
14288 + resource shortage. It is generally considered to
14289 + be a good idea to have that enabled.
14291 +config VSERVER_WARN_DEVPTS
14292 + bool "VServer DevPTS Warnings"
14293 + depends on VSERVER_WARN
14296 + This enables DevPTS related warnings, issued when a
14297 + process inside a context tries to lookup or access
14298 + a dynamic pts from the host or a different context.
14300 +config VSERVER_DEBUG
14301 + bool "VServer Debugging Code"
14304 + Set this to yes if you want to be able to activate
14305 + debugging output at runtime. It adds a very small
14306 + overhead to all vserver related functions and
14307 + increases the kernel size by about 20k.
14309 +config VSERVER_HISTORY
14310 + bool "VServer History Tracing"
14311 + depends on VSERVER_DEBUG
14314 + Set this to yes if you want to record the history of
14315 + linux-vserver activities, so they can be replayed in
14316 + the event of a kernel panic or oops.
14318 +config VSERVER_HISTORY_SIZE
14319 + int "Per-CPU History Size (32-65536)"
14320 + depends on VSERVER_HISTORY
14324 + This allows you to specify the number of entries in
14325 + the per-CPU history buffer.
14327 +config VSERVER_EXTRA_MNT_CHECK
14328 + bool "Extra Checks for Reachability"
14331 + Set this to yes if you want to do extra checks for
14332 + vfsmount reachability in the proc filesystem code.
14333 + This shouldn't be required on any setup utilizing
14337 + prompt "Quotes used in debug and warn messages"
14338 + default QUOTES_ISO8859
14340 +config QUOTES_ISO8859
14341 + bool "Extended ASCII (ISO 8859) angle quotes"
14343 + This uses the extended ASCII characters \xbb
14344 + and \xab for quoting file and process names.
14346 +config QUOTES_UTF8
14347 + bool "UTF-8 angle quotes"
14349 + This uses the the UTF-8 sequences for angle
14350 + quotes to quote file and process names.
14352 +config QUOTES_ASCII
14353 + bool "ASCII single quotes"
14355 + This uses the ASCII single quote character
14356 + (\x27) to quote file and process names.
14366 + select NAMESPACES
14372 +config VSERVER_SECURITY
14374 + depends on SECURITY
14376 + select SECURITY_CAPABILITIES
14378 +config VSERVER_DISABLED
14382 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/Makefile linux-4.4/kernel/vserver/Makefile
14383 --- linux-4.4/kernel/vserver/Makefile 1970-01-01 01:00:00.000000000 +0100
14384 +++ linux-4.4/kernel/vserver/Makefile 2021-02-24 16:56:24.612823601 +0100
14387 +# Makefile for the Linux vserver routines.
14391 +obj-y += vserver.o
14393 +vserver-y := switch.o context.o space.o sched.o network.o inode.o \
14394 + limit.o cvirt.o cacct.o signal.o helper.o init.o \
14397 +vserver-$(CONFIG_INET) += inet.o
14398 +vserver-$(CONFIG_PROC_FS) += proc.o
14399 +vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
14400 +vserver-$(CONFIG_VSERVER_HISTORY) += history.o
14401 +vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
14402 +vserver-$(CONFIG_VSERVER_DEVICE) += device.o
14404 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cacct.c linux-4.4/kernel/vserver/cacct.c
14405 --- linux-4.4/kernel/vserver/cacct.c 1970-01-01 01:00:00.000000000 +0100
14406 +++ linux-4.4/kernel/vserver/cacct.c 2021-02-24 16:56:24.609490163 +0100
14409 + * linux/kernel/vserver/cacct.c
14411 + * Virtual Server: Context Accounting
14413 + * Copyright (C) 2006-2007 Herbert P?tzl
14415 + * V0.01 added accounting stats
14419 +#include <linux/types.h>
14420 +#include <linux/vs_context.h>
14421 +#include <linux/vserver/cacct_cmd.h>
14422 +#include <linux/vserver/cacct_int.h>
14424 +#include <asm/errno.h>
14425 +#include <asm/uaccess.h>
14428 +int vc_sock_stat(struct vx_info *vxi, void __user *data)
14430 + struct vcmd_sock_stat_v0 vc_data;
14433 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
14436 + field = vc_data.field;
14437 + if ((field < 0) || (field >= VXA_SOCK_SIZE))
14440 + for (j = 0; j < 3; j++) {
14441 + vc_data.count[j] = vx_sock_count(&vxi->cacct, field, j);
14442 + vc_data.total[j] = vx_sock_total(&vxi->cacct, field, j);
14445 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
14450 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cacct_init.h linux-4.4/kernel/vserver/cacct_init.h
14451 --- linux-4.4/kernel/vserver/cacct_init.h 1970-01-01 01:00:00.000000000 +0100
14452 +++ linux-4.4/kernel/vserver/cacct_init.h 2021-02-24 16:56:24.609490163 +0100
14456 +static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
14461 + for (i = 0; i < VXA_SOCK_SIZE; i++) {
14462 + for (j = 0; j < 3; j++) {
14463 + atomic_long_set(&cacct->sock[i][j].count, 0);
14464 + atomic_long_set(&cacct->sock[i][j].total, 0);
14467 + for (i = 0; i < 8; i++)
14468 + atomic_set(&cacct->slab[i], 0);
14469 + for (i = 0; i < 5; i++)
14470 + for (j = 0; j < 4; j++)
14471 + atomic_set(&cacct->page[i][j], 0);
14474 +static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
14479 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cacct_proc.h linux-4.4/kernel/vserver/cacct_proc.h
14480 --- linux-4.4/kernel/vserver/cacct_proc.h 1970-01-01 01:00:00.000000000 +0100
14481 +++ linux-4.4/kernel/vserver/cacct_proc.h 2021-02-24 16:56:24.609490163 +0100
14483 +#ifndef _VX_CACCT_PROC_H
14484 +#define _VX_CACCT_PROC_H
14486 +#include <linux/vserver/cacct_int.h>
14489 +#define VX_SOCKA_TOP \
14490 + "Type\t recv #/bytes\t\t send #/bytes\t\t fail #/bytes\n"
14492 +static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
14494 + int i, j, length = 0;
14495 + static char *type[VXA_SOCK_SIZE] = {
14496 + "UNSPEC", "UNIX", "INET", "INET6", "PACKET", "OTHER"
14499 + length += sprintf(buffer + length, VX_SOCKA_TOP);
14500 + for (i = 0; i < VXA_SOCK_SIZE; i++) {
14501 + length += sprintf(buffer + length, "%s:", type[i]);
14502 + for (j = 0; j < 3; j++) {
14503 + length += sprintf(buffer + length,
14504 + "\t%10lu/%-10lu",
14505 + vx_sock_count(cacct, i, j),
14506 + vx_sock_total(cacct, i, j));
14508 + buffer[length++] = '\n';
14511 + length += sprintf(buffer + length, "\n");
14512 + length += sprintf(buffer + length,
14513 + "slab:\t %8u %8u %8u %8u\n",
14514 + atomic_read(&cacct->slab[1]),
14515 + atomic_read(&cacct->slab[4]),
14516 + atomic_read(&cacct->slab[0]),
14517 + atomic_read(&cacct->slab[2]));
14519 + length += sprintf(buffer + length, "\n");
14520 + for (i = 0; i < 5; i++) {
14521 + length += sprintf(buffer + length,
14522 + "page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n", i,
14523 + atomic_read(&cacct->page[i][0]),
14524 + atomic_read(&cacct->page[i][1]),
14525 + atomic_read(&cacct->page[i][2]),
14526 + atomic_read(&cacct->page[i][3]),
14527 + atomic_read(&cacct->page[i][4]),
14528 + atomic_read(&cacct->page[i][5]),
14529 + atomic_read(&cacct->page[i][6]),
14530 + atomic_read(&cacct->page[i][7]));
14535 +#endif /* _VX_CACCT_PROC_H */
14536 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/context.c linux-4.4/kernel/vserver/context.c
14537 --- linux-4.4/kernel/vserver/context.c 1970-01-01 01:00:00.000000000 +0100
14538 +++ linux-4.4/kernel/vserver/context.c 2021-02-24 16:56:24.609490163 +0100
14541 + * linux/kernel/vserver/context.c
14543 + * Virtual Server: Context Support
14545 + * Copyright (C) 2003-2011 Herbert P?tzl
14547 + * V0.01 context helper
14548 + * V0.02 vx_ctx_kill syscall command
14549 + * V0.03 replaced context_info calls
14550 + * V0.04 redesign of struct (de)alloc
14551 + * V0.05 rlimit basic implementation
14552 + * V0.06 task_xid and info commands
14553 + * V0.07 context flags and caps
14554 + * V0.08 switch to RCU based hash
14555 + * V0.09 revert to non RCU for now
14556 + * V0.10 and back to working RCU hash
14557 + * V0.11 and back to locking again
14558 + * V0.12 referenced context store
14559 + * V0.13 separate per cpu data
14560 + * V0.14 changed vcmds to vxi arg
14561 + * V0.15 added context stat
14562 + * V0.16 have __create claim() the vxi
14563 + * V0.17 removed older and legacy stuff
14564 + * V0.18 added user credentials
14565 + * V0.19 added warn mask
14569 +#include <linux/slab.h>
14570 +#include <linux/types.h>
14571 +#include <linux/security.h>
14572 +#include <linux/pid_namespace.h>
14573 +#include <linux/capability.h>
14575 +#include <linux/vserver/context.h>
14576 +#include <linux/vserver/network.h>
14577 +#include <linux/vserver/debug.h>
14578 +#include <linux/vserver/limit.h>
14579 +#include <linux/vserver/limit_int.h>
14580 +#include <linux/vserver/space.h>
14581 +#include <linux/init_task.h>
14582 +#include <linux/fs_struct.h>
14583 +#include <linux/cred.h>
14585 +#include <linux/vs_context.h>
14586 +#include <linux/vs_limit.h>
14587 +#include <linux/vs_pid.h>
14588 +#include <linux/vserver/context_cmd.h>
14590 +#include "cvirt_init.h"
14591 +#include "cacct_init.h"
14592 +#include "limit_init.h"
14593 +#include "sched_init.h"
14596 +atomic_t vx_global_ctotal = ATOMIC_INIT(0);
14597 +atomic_t vx_global_cactive = ATOMIC_INIT(0);
14600 +/* now inactive context structures */
14602 +static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
14604 +static DEFINE_SPINLOCK(vx_info_inactive_lock);
14607 +/* __alloc_vx_info()
14609 + * allocate an initialized vx_info struct
14610 + * doesn't make it visible (hash) */
14612 +static struct vx_info *__alloc_vx_info(vxid_t xid)
14614 + struct vx_info *new = NULL;
14617 + vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
14619 + /* would this benefit from a slab cache? */
14620 + new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
14624 + memset(new, 0, sizeof(struct vx_info));
14626 + new->ptr_pc = alloc_percpu(struct _vx_info_pc);
14627 + if (!new->ptr_pc)
14630 + new->vx_id = xid;
14631 + INIT_HLIST_NODE(&new->vx_hlist);
14632 + atomic_set(&new->vx_usecnt, 0);
14633 + atomic_set(&new->vx_tasks, 0);
14634 + new->vx_parent = NULL;
14635 + new->vx_state = 0;
14636 + init_waitqueue_head(&new->vx_wait);
14638 + /* prepare reaper */
14639 + get_task_struct(init_pid_ns.child_reaper);
14640 + new->vx_reaper = init_pid_ns.child_reaper;
14641 + new->vx_badness_bias = 0;
14643 + /* rest of init goes here */
14644 + vx_info_init_limit(&new->limit);
14645 + vx_info_init_sched(&new->sched);
14646 + vx_info_init_cvirt(&new->cvirt);
14647 + vx_info_init_cacct(&new->cacct);
14649 + /* per cpu data structures */
14650 + for_each_possible_cpu(cpu) {
14651 + vx_info_init_sched_pc(
14652 + &vx_per_cpu(new, sched_pc, cpu), cpu);
14653 + vx_info_init_cvirt_pc(
14654 + &vx_per_cpu(new, cvirt_pc, cpu), cpu);
14657 + new->vx_flags = VXF_INIT_SET;
14658 + new->vx_bcaps = CAP_FULL_SET; // maybe ~CAP_SETPCAP
14659 + new->vx_ccaps = 0;
14660 + new->vx_umask = 0;
14661 + new->vx_wmask = 0;
14663 + new->reboot_cmd = 0;
14664 + new->exit_code = 0;
14666 + // preconfig spaces
14667 + for (index = 0; index < VX_SPACES; index++) {
14668 + struct _vx_space *space = &new->space[index];
14671 + spin_lock(&init_fs.lock);
14673 + spin_unlock(&init_fs.lock);
14674 + space->vx_fs = &init_fs;
14676 + /* FIXME: do we want defaults? */
14677 + // space->vx_real_cred = 0;
14678 + // space->vx_cred = 0;
14682 + vxdprintk(VXD_CBIT(xid, 0),
14683 + "alloc_vx_info(%d) = %p", xid, new);
14684 + vxh_alloc_vx_info(new);
14685 + atomic_inc(&vx_global_ctotal);
14694 +/* __dealloc_vx_info()
14696 + * final disposal of vx_info */
14698 +static void __dealloc_vx_info(struct vx_info *vxi)
14700 +#ifdef CONFIG_VSERVER_WARN
14701 + struct vx_info_save vxis;
14704 + vxdprintk(VXD_CBIT(xid, 0),
14705 + "dealloc_vx_info(%p)", vxi);
14706 + vxh_dealloc_vx_info(vxi);
14708 +#ifdef CONFIG_VSERVER_WARN
14709 + enter_vx_info(vxi, &vxis);
14710 + vx_info_exit_limit(&vxi->limit);
14711 + vx_info_exit_sched(&vxi->sched);
14712 + vx_info_exit_cvirt(&vxi->cvirt);
14713 + vx_info_exit_cacct(&vxi->cacct);
14715 + for_each_possible_cpu(cpu) {
14716 + vx_info_exit_sched_pc(
14717 + &vx_per_cpu(vxi, sched_pc, cpu), cpu);
14718 + vx_info_exit_cvirt_pc(
14719 + &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
14721 + leave_vx_info(&vxis);
14725 + vxi->vx_state |= VXS_RELEASED;
14728 + free_percpu(vxi->ptr_pc);
14731 + atomic_dec(&vx_global_ctotal);
14734 +static void __shutdown_vx_info(struct vx_info *vxi)
14736 + struct nsproxy *nsproxy;
14737 + struct fs_struct *fs;
14738 + struct cred *cred;
14743 + vxi->vx_state |= VXS_SHUTDOWN;
14744 + vs_state_change(vxi, VSC_SHUTDOWN);
14746 + for (index = 0; index < VX_SPACES; index++) {
14747 + struct _vx_space *space = &vxi->space[index];
14749 + nsproxy = xchg(&space->vx_nsproxy, NULL);
14751 + put_nsproxy(nsproxy);
14753 + fs = xchg(&space->vx_fs, NULL);
14754 + spin_lock(&fs->lock);
14755 + kill = !--fs->users;
14756 + spin_unlock(&fs->lock);
14758 + free_fs_struct(fs);
14760 + cred = (struct cred *)xchg(&space->vx_cred, NULL);
14762 + abort_creds(cred);
14766 +/* exported stuff */
14768 +void free_vx_info(struct vx_info *vxi)
14770 + unsigned long flags;
14773 + /* check for reference counts first */
14774 + BUG_ON(atomic_read(&vxi->vx_usecnt));
14775 + BUG_ON(atomic_read(&vxi->vx_tasks));
14777 + /* context must not be hashed */
14778 + BUG_ON(vx_info_state(vxi, VXS_HASHED));
14780 + /* context shutdown is mandatory */
14781 + BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
14783 + /* spaces check */
14784 + for (index = 0; index < VX_SPACES; index++) {
14785 + struct _vx_space *space = &vxi->space[index];
14787 + BUG_ON(space->vx_nsproxy);
14788 + BUG_ON(space->vx_fs);
14789 + // BUG_ON(space->vx_real_cred);
14790 + // BUG_ON(space->vx_cred);
14793 + spin_lock_irqsave(&vx_info_inactive_lock, flags);
14794 + hlist_del(&vxi->vx_hlist);
14795 + spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
14797 + __dealloc_vx_info(vxi);
14801 +/* hash table for vx_info hash */
14803 +#define VX_HASH_SIZE 13
14805 +static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
14806 + { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
14808 +static DEFINE_SPINLOCK(vx_info_hash_lock);
14811 +static inline unsigned int __hashval(vxid_t xid)
14813 + return (xid % VX_HASH_SIZE);
14818 +/* __hash_vx_info()
14820 + * add the vxi to the global hash table
14821 + * requires the hash_lock to be held */
14823 +static inline void __hash_vx_info(struct vx_info *vxi)
14825 + struct hlist_head *head;
14827 + vxd_assert_lock(&vx_info_hash_lock);
14828 + vxdprintk(VXD_CBIT(xid, 4),
14829 + "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
14830 + vxh_hash_vx_info(vxi);
14832 + /* context must not be hashed */
14833 + BUG_ON(vx_info_state(vxi, VXS_HASHED));
14835 + vxi->vx_state |= VXS_HASHED;
14836 + head = &vx_info_hash[__hashval(vxi->vx_id)];
14837 + hlist_add_head(&vxi->vx_hlist, head);
14838 + atomic_inc(&vx_global_cactive);
14841 +/* __unhash_vx_info()
14843 + * remove the vxi from the global hash table
14844 + * requires the hash_lock to be held */
14846 +static inline void __unhash_vx_info(struct vx_info *vxi)
14848 + unsigned long flags;
14850 + vxd_assert_lock(&vx_info_hash_lock);
14851 + vxdprintk(VXD_CBIT(xid, 4),
14852 + "__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
14853 + atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
14854 + vxh_unhash_vx_info(vxi);
14856 + /* context must be hashed */
14857 + BUG_ON(!vx_info_state(vxi, VXS_HASHED));
14858 + /* but without tasks */
14859 + BUG_ON(atomic_read(&vxi->vx_tasks));
14861 + vxi->vx_state &= ~VXS_HASHED;
14862 + hlist_del_init(&vxi->vx_hlist);
14863 + spin_lock_irqsave(&vx_info_inactive_lock, flags);
14864 + hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
14865 + spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
14866 + atomic_dec(&vx_global_cactive);
14870 +/* __lookup_vx_info()
14872 + * requires the hash_lock to be held
14873 + * doesn't increment the vx_refcnt */
14875 +static inline struct vx_info *__lookup_vx_info(vxid_t xid)
14877 + struct hlist_head *head = &vx_info_hash[__hashval(xid)];
14878 + struct hlist_node *pos;
14879 + struct vx_info *vxi;
14881 + vxd_assert_lock(&vx_info_hash_lock);
14882 + hlist_for_each(pos, head) {
14883 + vxi = hlist_entry(pos, struct vx_info, vx_hlist);
14885 + if (vxi->vx_id == xid)
14890 + vxdprintk(VXD_CBIT(xid, 0),
14891 + "__lookup_vx_info(#%u): %p[#%u]",
14892 + xid, vxi, vxi ? vxi->vx_id : 0);
14893 + vxh_lookup_vx_info(vxi, xid);
14898 +/* __create_vx_info()
14900 + * create the requested context
14901 + * get(), claim() and hash it */
14903 +static struct vx_info *__create_vx_info(int id)
14905 + struct vx_info *new, *vxi = NULL;
14907 + vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
14909 + if (!(new = __alloc_vx_info(id)))
14910 + return ERR_PTR(-ENOMEM);
14912 + /* required to make dynamic xids unique */
14913 + spin_lock(&vx_info_hash_lock);
14915 + /* static context requested */
14916 + if ((vxi = __lookup_vx_info(id))) {
14917 + vxdprintk(VXD_CBIT(xid, 0),
14918 + "create_vx_info(%d) = %p (already there)", id, vxi);
14919 + if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
14920 + vxi = ERR_PTR(-EBUSY);
14922 + vxi = ERR_PTR(-EEXIST);
14925 + /* new context */
14926 + vxdprintk(VXD_CBIT(xid, 0),
14927 + "create_vx_info(%d) = %p (new)", id, new);
14928 + claim_vx_info(new, NULL);
14929 + __hash_vx_info(get_vx_info(new));
14930 + vxi = new, new = NULL;
14933 + spin_unlock(&vx_info_hash_lock);
14934 + vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
14936 + __dealloc_vx_info(new);
14941 +/* exported stuff */
14944 +void unhash_vx_info(struct vx_info *vxi)
14946 + spin_lock(&vx_info_hash_lock);
14947 + __unhash_vx_info(vxi);
14948 + spin_unlock(&vx_info_hash_lock);
14949 + __shutdown_vx_info(vxi);
14950 + __wakeup_vx_info(vxi);
14954 +/* lookup_vx_info()
14956 + * search for a vx_info and get() it
14957 + * negative id means current */
14959 +struct vx_info *lookup_vx_info(int id)
14961 + struct vx_info *vxi = NULL;
14964 + vxi = get_vx_info(current_vx_info());
14965 + } else if (id > 1) {
14966 + spin_lock(&vx_info_hash_lock);
14967 + vxi = get_vx_info(__lookup_vx_info(id));
14968 + spin_unlock(&vx_info_hash_lock);
14973 +/* xid_is_hashed()
14975 + * verify that xid is still hashed */
14977 +int xid_is_hashed(vxid_t xid)
14981 + spin_lock(&vx_info_hash_lock);
14982 + hashed = (__lookup_vx_info(xid) != NULL);
14983 + spin_unlock(&vx_info_hash_lock);
14987 +#ifdef CONFIG_PROC_FS
14991 + * get a subset of hashed xids for proc
14992 + * assumes size is at least one */
14994 +int get_xid_list(int index, unsigned int *xids, int size)
14996 + int hindex, nr_xids = 0;
14998 + /* only show current and children */
14999 + if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
15002 + xids[nr_xids] = vx_current_xid();
15006 + for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
15007 + struct hlist_head *head = &vx_info_hash[hindex];
15008 + struct hlist_node *pos;
15010 + spin_lock(&vx_info_hash_lock);
15011 + hlist_for_each(pos, head) {
15012 + struct vx_info *vxi;
15017 + vxi = hlist_entry(pos, struct vx_info, vx_hlist);
15018 + xids[nr_xids] = vxi->vx_id;
15019 + if (++nr_xids >= size) {
15020 + spin_unlock(&vx_info_hash_lock);
15024 + /* keep the lock time short */
15025 + spin_unlock(&vx_info_hash_lock);
15032 +#ifdef CONFIG_VSERVER_DEBUG
15034 +void dump_vx_info_inactive(int level)
15036 + struct hlist_node *entry, *next;
15038 + hlist_for_each_safe(entry, next, &vx_info_inactive) {
15039 + struct vx_info *vxi =
15040 + list_entry(entry, struct vx_info, vx_hlist);
15042 + dump_vx_info(vxi, level);
15049 +int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
15051 + struct user_struct *new_user, *old_user;
15056 + if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
15059 + new_user = alloc_uid(vxi->vx_id, p->uid);
15063 + old_user = p->user;
15064 + if (new_user != old_user) {
15065 + atomic_inc(&new_user->processes);
15066 + atomic_dec(&old_user->processes);
15067 + p->user = new_user;
15069 + free_uid(old_user);
15075 +void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
15077 + // p->cap_effective &= vxi->vx_cap_bset;
15078 + p->cap_effective =
15079 + cap_intersect(p->cap_effective, vxi->cap_bset);
15080 + // p->cap_inheritable &= vxi->vx_cap_bset;
15081 + p->cap_inheritable =
15082 + cap_intersect(p->cap_inheritable, vxi->cap_bset);
15083 + // p->cap_permitted &= vxi->vx_cap_bset;
15084 + p->cap_permitted =
15085 + cap_intersect(p->cap_permitted, vxi->cap_bset);
15090 +#include <linux/file.h>
15091 +#include <linux/fdtable.h>
15093 +static int vx_openfd_task(struct task_struct *tsk)
15095 + struct files_struct *files = tsk->files;
15096 + struct fdtable *fdt;
15097 + const unsigned long *bptr;
15098 + int count, total;
15100 + /* no rcu_read_lock() because of spin_lock() */
15101 + spin_lock(&files->file_lock);
15102 + fdt = files_fdtable(files);
15103 + bptr = fdt->open_fds;
15104 + count = fdt->max_fds / (sizeof(unsigned long) * 8);
15105 + for (total = 0; count > 0; count--) {
15107 + total += hweight_long(*bptr);
15110 + spin_unlock(&files->file_lock);
15115 +/* for *space compatibility */
15117 +asmlinkage long sys_unshare(unsigned long);
15120 + * migrate task to new context
15121 + * gets vxi, puts old_vxi on change
15122 + * optionally unshares namespaces (hack)
15125 +int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
15127 + struct vx_info *old_vxi;
15133 + vxdprintk(VXD_CBIT(xid, 5),
15134 + "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
15135 + vxi->vx_id, atomic_read(&vxi->vx_usecnt));
15137 + if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
15138 + !vx_info_flags(vxi, VXF_STATE_SETUP, 0))
15141 + if (vx_info_state(vxi, VXS_SHUTDOWN))
15144 + old_vxi = task_get_vx_info(p);
15145 + if (old_vxi == vxi)
15148 +// if (!(ret = vx_migrate_user(p, vxi))) {
15153 + openfd = vx_openfd_task(p);
15156 + atomic_dec(&old_vxi->cvirt.nr_threads);
15157 + atomic_dec(&old_vxi->cvirt.nr_running);
15158 + __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
15159 + /* FIXME: what about the struct files here? */
15160 + __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
15161 + /* account for the executable */
15162 + __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
15164 + atomic_inc(&vxi->cvirt.nr_threads);
15165 + atomic_inc(&vxi->cvirt.nr_running);
15166 + __rlim_inc(&vxi->limit, RLIMIT_NPROC);
15167 + /* FIXME: what about the struct files here? */
15168 + __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
15169 + /* account for the executable */
15170 + __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
15173 + release_vx_info(old_vxi, p);
15174 + clr_vx_info(&p->vx_info);
15176 + claim_vx_info(vxi, p);
15177 + set_vx_info(&p->vx_info, vxi);
15178 + p->xid = vxi->vx_id;
15180 + vxdprintk(VXD_CBIT(xid, 5),
15181 + "moved task %p into vxi:%p[#%d]",
15182 + p, vxi, vxi->vx_id);
15184 + // vx_mask_cap_bset(vxi, p);
15187 + /* hack for *spaces to provide compatibility */
15189 + struct nsproxy *old_nsp, *new_nsp;
15191 + ret = unshare_nsproxy_namespaces(
15192 + CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER,
15193 + &new_nsp, NULL, NULL);
15197 + old_nsp = xchg(&p->nsproxy, new_nsp);
15198 + vx_set_space(vxi,
15199 + CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER, 0);
15200 + put_nsproxy(old_nsp);
15204 + put_vx_info(old_vxi);
15208 +int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
15210 + struct task_struct *old_reaper;
15211 + struct vx_info *reaper_vxi;
15216 + vxdprintk(VXD_CBIT(xid, 6),
15217 + "vx_set_reaper(%p[#%d],%p[#%d,%d])",
15218 + vxi, vxi->vx_id, p, p->xid, p->pid);
15220 + old_reaper = vxi->vx_reaper;
15221 + if (old_reaper == p)
15224 + reaper_vxi = task_get_vx_info(p);
15225 + if (reaper_vxi && reaper_vxi != vxi) {
15227 + "Unsuitable reaper [" VS_Q("%s") ",%u:#%u] "
15229 + p->comm, p->pid, p->xid, vx_current_xid());
15233 + /* set new child reaper */
15234 + get_task_struct(p);
15235 + vxi->vx_reaper = p;
15236 + put_task_struct(old_reaper);
15238 + put_vx_info(reaper_vxi);
15242 +int vx_set_init(struct vx_info *vxi, struct task_struct *p)
15247 + vxdprintk(VXD_CBIT(xid, 6),
15248 + "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
15249 + vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
15251 + vxi->vx_flags &= ~VXF_STATE_INIT;
15252 + // vxi->vx_initpid = p->tgid;
15253 + vxi->vx_initpid = p->pid;
15257 +void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
15259 + vxdprintk(VXD_CBIT(xid, 6),
15260 + "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
15261 + vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
15263 + vxi->exit_code = code;
15264 + vxi->vx_initpid = 0;
15268 +void vx_set_persistent(struct vx_info *vxi)
15270 + vxdprintk(VXD_CBIT(xid, 6),
15271 + "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
15273 + get_vx_info(vxi);
15274 + claim_vx_info(vxi, NULL);
15277 +void vx_clear_persistent(struct vx_info *vxi)
15279 + vxdprintk(VXD_CBIT(xid, 6),
15280 + "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
15282 + release_vx_info(vxi, NULL);
15283 + put_vx_info(vxi);
15286 +void vx_update_persistent(struct vx_info *vxi)
15288 + if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
15289 + vx_set_persistent(vxi);
15291 + vx_clear_persistent(vxi);
15295 +/* task must be current or locked */
15297 +void exit_vx_info(struct task_struct *p, int code)
15299 + struct vx_info *vxi = p->vx_info;
15302 + atomic_dec(&vxi->cvirt.nr_threads);
15305 + vxi->exit_code = code;
15306 + release_vx_info(vxi, p);
15310 +void exit_vx_info_early(struct task_struct *p, int code)
15312 + struct vx_info *vxi = p->vx_info;
15315 + if (vxi->vx_initpid == p->pid)
15316 + vx_exit_init(vxi, p, code);
15317 + if (vxi->vx_reaper == p)
15318 + vx_set_reaper(vxi, init_pid_ns.child_reaper);
15323 +/* vserver syscall commands below here */
15325 +/* taks xid and vx_info functions */
15327 +#include <asm/uaccess.h>
15330 +int vc_task_xid(uint32_t id)
15335 + struct task_struct *tsk;
15338 + tsk = find_task_by_real_pid(id);
15339 + xid = (tsk) ? tsk->xid : -ESRCH;
15340 + rcu_read_unlock();
15342 + xid = vx_current_xid();
15347 +int vc_vx_info(struct vx_info *vxi, void __user *data)
15349 + struct vcmd_vx_info_v0 vc_data;
15351 + vc_data.xid = vxi->vx_id;
15352 + vc_data.initpid = vxi->vx_initpid;
15354 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15360 +int vc_ctx_stat(struct vx_info *vxi, void __user *data)
15362 + struct vcmd_ctx_stat_v0 vc_data;
15364 + vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
15365 + vc_data.tasks = atomic_read(&vxi->vx_tasks);
15367 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15373 +/* context functions */
15375 +int vc_ctx_create(uint32_t xid, void __user *data)
15377 + struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
15378 + struct vx_info *new_vxi;
15381 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
15384 + if ((xid > MAX_S_CONTEXT) || (xid < 2))
15387 + new_vxi = __create_vx_info(xid);
15388 + if (IS_ERR(new_vxi))
15389 + return PTR_ERR(new_vxi);
15391 + /* initial flags */
15392 + new_vxi->vx_flags = vc_data.flagword;
15395 + if (vs_state_change(new_vxi, VSC_STARTUP))
15398 + ret = vx_migrate_task(current, new_vxi, (!data));
15402 + /* return context id on success */
15403 + ret = new_vxi->vx_id;
15405 + /* get a reference for persistent contexts */
15406 + if ((vc_data.flagword & VXF_PERSISTENT))
15407 + vx_set_persistent(new_vxi);
15409 + release_vx_info(new_vxi, NULL);
15410 + put_vx_info(new_vxi);
15415 +int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
15417 + struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
15420 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
15423 + ret = vx_migrate_task(current, vxi, 0);
15426 + if (vc_data.flagword & VXM_SET_INIT)
15427 + ret = vx_set_init(vxi, current);
15430 + if (vc_data.flagword & VXM_SET_REAPER)
15431 + ret = vx_set_reaper(vxi, current);
15436 +int vc_get_cflags(struct vx_info *vxi, void __user *data)
15438 + struct vcmd_ctx_flags_v0 vc_data;
15440 + vc_data.flagword = vxi->vx_flags;
15442 + /* special STATE flag handling */
15443 + vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
15445 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15450 +int vc_set_cflags(struct vx_info *vxi, void __user *data)
15452 + struct vcmd_ctx_flags_v0 vc_data;
15453 + uint64_t mask, trigger;
15455 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15458 + /* special STATE flag handling */
15459 + mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
15460 + trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
15462 + if (vxi == current_vx_info()) {
15463 + /* if (trigger & VXF_STATE_SETUP)
15464 + vx_mask_cap_bset(vxi, current); */
15465 + if (trigger & VXF_STATE_INIT) {
15468 + ret = vx_set_init(vxi, current);
15471 + ret = vx_set_reaper(vxi, current);
15477 + vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
15478 + vc_data.flagword, mask);
15479 + if (trigger & VXF_PERSISTENT)
15480 + vx_update_persistent(vxi);
15486 +static inline uint64_t caps_from_cap_t(kernel_cap_t c)
15488 + uint64_t v = c.cap[0] | ((uint64_t)c.cap[1] << 32);
15490 + // printk("caps_from_cap_t(%08x:%08x) = %016llx\n", c.cap[1], c.cap[0], v);
15494 +static inline kernel_cap_t cap_t_from_caps(uint64_t v)
15496 + kernel_cap_t c = __cap_empty_set;
15498 + c.cap[0] = v & 0xFFFFFFFF;
15499 + c.cap[1] = (v >> 32) & 0xFFFFFFFF;
15501 + // printk("cap_t_from_caps(%016llx) = %08x:%08x\n", v, c.cap[1], c.cap[0]);
15506 +static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
15509 + *bcaps = caps_from_cap_t(vxi->vx_bcaps);
15511 + *ccaps = vxi->vx_ccaps;
15516 +int vc_get_ccaps(struct vx_info *vxi, void __user *data)
15518 + struct vcmd_ctx_caps_v1 vc_data;
15521 + ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
15524 + vc_data.cmask = ~0ULL;
15526 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15531 +static int do_set_caps(struct vx_info *vxi,
15532 + uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
15534 + uint64_t bcold = caps_from_cap_t(vxi->vx_bcaps);
15537 + printk("do_set_caps(%16llx, %16llx, %16llx, %16llx)\n",
15538 + bcaps, bmask, ccaps, cmask);
15540 + vxi->vx_bcaps = cap_t_from_caps(
15541 + vs_mask_flags(bcold, bcaps, bmask));
15542 + vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
15547 +int vc_set_ccaps(struct vx_info *vxi, void __user *data)
15549 + struct vcmd_ctx_caps_v1 vc_data;
15551 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15554 + return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
15557 +int vc_get_bcaps(struct vx_info *vxi, void __user *data)
15559 + struct vcmd_bcaps vc_data;
15562 + ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
15565 + vc_data.bmask = ~0ULL;
15567 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15572 +int vc_set_bcaps(struct vx_info *vxi, void __user *data)
15574 + struct vcmd_bcaps vc_data;
15576 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15579 + return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
15583 +int vc_get_umask(struct vx_info *vxi, void __user *data)
15585 + struct vcmd_umask vc_data;
15587 + vc_data.umask = vxi->vx_umask;
15588 + vc_data.mask = ~0ULL;
15590 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15595 +int vc_set_umask(struct vx_info *vxi, void __user *data)
15597 + struct vcmd_umask vc_data;
15599 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15602 + vxi->vx_umask = vs_mask_flags(vxi->vx_umask,
15603 + vc_data.umask, vc_data.mask);
15608 +int vc_get_wmask(struct vx_info *vxi, void __user *data)
15610 + struct vcmd_wmask vc_data;
15612 + vc_data.wmask = vxi->vx_wmask;
15613 + vc_data.mask = ~0ULL;
15615 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15620 +int vc_set_wmask(struct vx_info *vxi, void __user *data)
15622 + struct vcmd_wmask vc_data;
15624 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15627 + vxi->vx_wmask = vs_mask_flags(vxi->vx_wmask,
15628 + vc_data.wmask, vc_data.mask);
15633 +int vc_get_badness(struct vx_info *vxi, void __user *data)
15635 + struct vcmd_badness_v0 vc_data;
15637 + vc_data.bias = vxi->vx_badness_bias;
15639 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15644 +int vc_set_badness(struct vx_info *vxi, void __user *data)
15646 + struct vcmd_badness_v0 vc_data;
15648 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15651 + vxi->vx_badness_bias = vc_data.bias;
15655 +#include <linux/module.h>
15657 +EXPORT_SYMBOL_GPL(free_vx_info);
15659 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cvirt.c linux-4.4/kernel/vserver/cvirt.c
15660 --- linux-4.4/kernel/vserver/cvirt.c 1970-01-01 01:00:00.000000000 +0100
15661 +++ linux-4.4/kernel/vserver/cvirt.c 2021-02-24 16:56:24.609490163 +0100
15664 + * linux/kernel/vserver/cvirt.c
15666 + * Virtual Server: Context Virtualization
15668 + * Copyright (C) 2004-2007 Herbert P?tzl
15670 + * V0.01 broken out from limit.c
15671 + * V0.02 added utsname stuff
15672 + * V0.03 changed vcmds to vxi arg
15676 +#include <linux/types.h>
15677 +#include <linux/utsname.h>
15678 +#include <linux/vs_cvirt.h>
15679 +#include <linux/vserver/switch.h>
15680 +#include <linux/vserver/cvirt_cmd.h>
15682 +#include <asm/uaccess.h>
15685 +void vx_vsi_boottime(struct timespec *boottime)
15687 + struct vx_info *vxi = current_vx_info();
15689 + set_normalized_timespec(boottime,
15690 + boottime->tv_sec + vxi->cvirt.bias_uptime.tv_sec,
15691 + boottime->tv_nsec + vxi->cvirt.bias_uptime.tv_nsec);
15695 +void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
15697 + struct vx_info *vxi = current_vx_info();
15699 + set_normalized_timespec(uptime,
15700 + uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
15701 + uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
15704 + set_normalized_timespec(idle,
15705 + idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
15706 + idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
15710 +uint64_t vx_idle_jiffies(void)
15712 + return init_task.utime + init_task.stime;
15717 +static inline uint32_t __update_loadavg(uint32_t load,
15718 + int wsize, int delta, int n)
15720 + unsigned long long calc, prev;
15722 + /* just set it to n */
15723 + if (unlikely(delta >= wsize))
15724 + return (n << FSHIFT);
15726 + calc = delta * n;
15728 + prev = (wsize - delta);
15731 + do_div(calc, wsize);
15736 +void vx_update_load(struct vx_info *vxi)
15738 + uint32_t now, last, delta;
15739 + unsigned int nr_running, nr_uninterruptible;
15740 + unsigned int total;
15741 + unsigned long flags;
15743 + spin_lock_irqsave(&vxi->cvirt.load_lock, flags);
15746 + last = vxi->cvirt.load_last;
15747 + delta = now - last;
15749 + if (delta < 5*HZ)
15752 + nr_running = atomic_read(&vxi->cvirt.nr_running);
15753 + nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
15754 + total = nr_running + nr_uninterruptible;
15756 + vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
15757 + 60*HZ, delta, total);
15758 + vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
15759 + 5*60*HZ, delta, total);
15760 + vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
15761 + 15*60*HZ, delta, total);
15763 + vxi->cvirt.load_last = now;
15765 + atomic_inc(&vxi->cvirt.load_updates);
15766 + spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags);
15771 + * Commands to do_syslog:
15773 + * 0 -- Close the log. Currently a NOP.
15774 + * 1 -- Open the log. Currently a NOP.
15775 + * 2 -- Read from the log.
15776 + * 3 -- Read all messages remaining in the ring buffer.
15777 + * 4 -- Read and clear all messages remaining in the ring buffer
15778 + * 5 -- Clear ring buffer.
15779 + * 6 -- Disable printk's to console
15780 + * 7 -- Enable printk's to console
15781 + * 8 -- Set level of messages printed to console
15782 + * 9 -- Return number of unread characters in the log buffer
15783 + * 10 -- Return size of the log buffer
15785 +int vx_do_syslog(int type, char __user *buf, int len)
15788 + int do_clear = 0;
15789 + struct vx_info *vxi = current_vx_info();
15790 + struct _vx_syslog *log;
15794 + log = &vxi->cvirt.syslog;
15797 + case 0: /* Close log */
15798 + case 1: /* Open log */
15800 + case 2: /* Read from log */
15801 + error = wait_event_interruptible(log->log_wait,
15802 + (log->log_start - log->log_end));
15805 + spin_lock_irq(&log->logbuf_lock);
15806 + spin_unlock_irq(&log->logbuf_lock);
15808 + case 4: /* Read/clear last kernel messages */
15810 + /* fall through */
15811 + case 3: /* Read last kernel messages */
15814 + case 5: /* Clear ring buffer */
15817 + case 6: /* Disable logging to console */
15818 + case 7: /* Enable logging to console */
15819 + case 8: /* Set level of messages printed to console */
15822 + case 9: /* Number of chars in the log buffer */
15824 + case 10: /* Size of the log buffer */
15834 +/* virtual host info names */
15836 +static char *vx_vhi_name(struct vx_info *vxi, int id)
15838 + struct nsproxy *nsproxy;
15839 + struct uts_namespace *uts;
15841 + if (id == VHIN_CONTEXT)
15842 + return vxi->vx_name;
15844 + nsproxy = vxi->space[0].vx_nsproxy;
15848 + uts = nsproxy->uts_ns;
15853 + case VHIN_SYSNAME:
15854 + return uts->name.sysname;
15855 + case VHIN_NODENAME:
15856 + return uts->name.nodename;
15857 + case VHIN_RELEASE:
15858 + return uts->name.release;
15859 + case VHIN_VERSION:
15860 + return uts->name.version;
15861 + case VHIN_MACHINE:
15862 + return uts->name.machine;
15863 + case VHIN_DOMAINNAME:
15864 + return uts->name.domainname;
15871 +int vc_set_vhi_name(struct vx_info *vxi, void __user *data)
15873 + struct vcmd_vhi_name_v0 vc_data;
15876 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15879 + name = vx_vhi_name(vxi, vc_data.field);
15883 + memcpy(name, vc_data.name, 65);
15887 +int vc_get_vhi_name(struct vx_info *vxi, void __user *data)
15889 + struct vcmd_vhi_name_v0 vc_data;
15892 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15895 + name = vx_vhi_name(vxi, vc_data.field);
15899 + memcpy(vc_data.name, name, 65);
15900 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15906 +int vc_virt_stat(struct vx_info *vxi, void __user *data)
15908 + struct vcmd_virt_stat_v0 vc_data;
15909 + struct _vx_cvirt *cvirt = &vxi->cvirt;
15910 + struct timespec uptime;
15912 + ktime_get_ts(&uptime);
15913 + set_normalized_timespec(&uptime,
15914 + uptime.tv_sec - cvirt->bias_uptime.tv_sec,
15915 + uptime.tv_nsec - cvirt->bias_uptime.tv_nsec);
15917 + vc_data.offset = timespec_to_ns(&cvirt->bias_ts);
15918 + vc_data.uptime = timespec_to_ns(&uptime);
15919 + vc_data.nr_threads = atomic_read(&cvirt->nr_threads);
15920 + vc_data.nr_running = atomic_read(&cvirt->nr_running);
15921 + vc_data.nr_uninterruptible = atomic_read(&cvirt->nr_uninterruptible);
15922 + vc_data.nr_onhold = atomic_read(&cvirt->nr_onhold);
15923 + vc_data.nr_forks = atomic_read(&cvirt->total_forks);
15924 + vc_data.load[0] = cvirt->load[0];
15925 + vc_data.load[1] = cvirt->load[1];
15926 + vc_data.load[2] = cvirt->load[2];
15928 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15934 +#ifdef CONFIG_VSERVER_VTIME
15936 +/* virtualized time base */
15938 +void vx_adjust_timespec(struct timespec *ts)
15940 + struct vx_info *vxi;
15942 + if (!vx_flags(VXF_VIRT_TIME, 0))
15945 + vxi = current_vx_info();
15946 + ts->tv_sec += vxi->cvirt.bias_ts.tv_sec;
15947 + ts->tv_nsec += vxi->cvirt.bias_ts.tv_nsec;
15949 + if (ts->tv_nsec >= NSEC_PER_SEC) {
15951 + ts->tv_nsec -= NSEC_PER_SEC;
15952 + } else if (ts->tv_nsec < 0) {
15954 + ts->tv_nsec += NSEC_PER_SEC;
15958 +int vx_settimeofday(const struct timespec *ts)
15960 + struct timespec ats, delta;
15961 + struct vx_info *vxi;
15963 + if (!vx_flags(VXF_VIRT_TIME, 0))
15964 + return do_settimeofday(ts);
15966 + getnstimeofday(&ats);
15967 + delta = timespec_sub(*ts, ats);
15969 + vxi = current_vx_info();
15970 + vxi->cvirt.bias_ts = timespec_add(vxi->cvirt.bias_ts, delta);
15976 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cvirt_init.h linux-4.4/kernel/vserver/cvirt_init.h
15977 --- linux-4.4/kernel/vserver/cvirt_init.h 1970-01-01 01:00:00.000000000 +0100
15978 +++ linux-4.4/kernel/vserver/cvirt_init.h 2021-02-24 16:56:24.609490163 +0100
15982 +extern uint64_t vx_idle_jiffies(void);
15984 +static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
15986 + uint64_t idle_jiffies = vx_idle_jiffies();
15987 + uint64_t nsuptime;
15989 + ktime_get_ts(&cvirt->bias_uptime);
15990 + nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
15991 + * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
15992 + cvirt->bias_clock = nsec_to_clock_t(nsuptime);
15993 + cvirt->bias_ts.tv_sec = 0;
15994 + cvirt->bias_ts.tv_nsec = 0;
15996 + jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
15997 + atomic_set(&cvirt->nr_threads, 0);
15998 + atomic_set(&cvirt->nr_running, 0);
15999 + atomic_set(&cvirt->nr_uninterruptible, 0);
16000 + atomic_set(&cvirt->nr_onhold, 0);
16002 + spin_lock_init(&cvirt->load_lock);
16003 + cvirt->load_last = jiffies;
16004 + atomic_set(&cvirt->load_updates, 0);
16005 + cvirt->load[0] = 0;
16006 + cvirt->load[1] = 0;
16007 + cvirt->load[2] = 0;
16008 + atomic_set(&cvirt->total_forks, 0);
16010 + spin_lock_init(&cvirt->syslog.logbuf_lock);
16011 + init_waitqueue_head(&cvirt->syslog.log_wait);
16012 + cvirt->syslog.log_start = 0;
16013 + cvirt->syslog.log_end = 0;
16014 + cvirt->syslog.con_start = 0;
16015 + cvirt->syslog.logged_chars = 0;
16019 +void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
16021 + // cvirt_pc->cpustat = { 0 };
16024 +static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
16026 +#ifdef CONFIG_VSERVER_WARN
16029 + vxwprintk_xid((value = atomic_read(&cvirt->nr_threads)),
16030 + "!!! cvirt: %p[nr_threads] = %d on exit.",
16032 + vxwprintk_xid((value = atomic_read(&cvirt->nr_running)),
16033 + "!!! cvirt: %p[nr_running] = %d on exit.",
16035 + vxwprintk_xid((value = atomic_read(&cvirt->nr_uninterruptible)),
16036 + "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
16038 + vxwprintk_xid((value = atomic_read(&cvirt->nr_onhold)),
16039 + "!!! cvirt: %p[nr_onhold] = %d on exit.",
16045 +void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
16050 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cvirt_proc.h linux-4.4/kernel/vserver/cvirt_proc.h
16051 --- linux-4.4/kernel/vserver/cvirt_proc.h 1970-01-01 01:00:00.000000000 +0100
16052 +++ linux-4.4/kernel/vserver/cvirt_proc.h 2021-02-24 16:56:24.609490163 +0100
16054 +#ifndef _VX_CVIRT_PROC_H
16055 +#define _VX_CVIRT_PROC_H
16057 +#include <linux/nsproxy.h>
16058 +#include <linux/mnt_namespace.h>
16059 +#include <linux/ipc_namespace.h>
16060 +#include <linux/utsname.h>
16061 +#include <linux/ipc.h>
16063 +extern int vx_info_mnt_namespace(struct mnt_namespace *, char *);
16066 +int vx_info_proc_nsproxy(struct nsproxy *nsproxy, char *buffer)
16068 + struct mnt_namespace *ns;
16069 + struct uts_namespace *uts;
16070 + struct ipc_namespace *ipc;
16076 + length += sprintf(buffer + length,
16077 + "NSProxy:\t%p [%p,%p,%p]\n",
16078 + nsproxy, nsproxy->mnt_ns,
16079 + nsproxy->uts_ns, nsproxy->ipc_ns);
16081 + ns = nsproxy->mnt_ns;
16085 + length += vx_info_mnt_namespace(ns, buffer + length);
16089 + uts = nsproxy->uts_ns;
16093 + length += sprintf(buffer + length,
16094 + "SysName:\t%.*s\n"
16095 + "NodeName:\t%.*s\n"
16096 + "Release:\t%.*s\n"
16097 + "Version:\t%.*s\n"
16098 + "Machine:\t%.*s\n"
16099 + "DomainName:\t%.*s\n",
16100 + __NEW_UTS_LEN, uts->name.sysname,
16101 + __NEW_UTS_LEN, uts->name.nodename,
16102 + __NEW_UTS_LEN, uts->name.release,
16103 + __NEW_UTS_LEN, uts->name.version,
16104 + __NEW_UTS_LEN, uts->name.machine,
16105 + __NEW_UTS_LEN, uts->name.domainname);
16108 + ipc = nsproxy->ipc_ns;
16112 + length += sprintf(buffer + length,
16113 + "SEMS:\t\t%d %d %d %d %d\n"
16114 + "MSG:\t\t%d %d %d\n"
16115 + "SHM:\t\t%lu %lu %d %ld\n",
16116 + ipc->sem_ctls[0], ipc->sem_ctls[1],
16117 + ipc->sem_ctls[2], ipc->sem_ctls[3],
16119 + ipc->msg_ctlmax, ipc->msg_ctlmnb, ipc->msg_ctlmni,
16120 + (unsigned long)ipc->shm_ctlmax,
16121 + (unsigned long)ipc->shm_ctlall,
16122 + ipc->shm_ctlmni, ipc->shm_tot);
16129 +#include <linux/sched.h>
16131 +#define LOAD_INT(x) ((x) >> FSHIFT)
16132 +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1 - 1)) * 100)
16135 +int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
16140 + length += sprintf(buffer + length,
16141 + "BiasUptime:\t%lu.%02lu\n",
16142 + (unsigned long)cvirt->bias_uptime.tv_sec,
16143 + (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
16145 + a = cvirt->load[0] + (FIXED_1 / 200);
16146 + b = cvirt->load[1] + (FIXED_1 / 200);
16147 + c = cvirt->load[2] + (FIXED_1 / 200);
16148 + length += sprintf(buffer + length,
16149 + "nr_threads:\t%d\n"
16150 + "nr_running:\t%d\n"
16151 + "nr_unintr:\t%d\n"
16152 + "nr_onhold:\t%d\n"
16153 + "load_updates:\t%d\n"
16154 + "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
16155 + "total_forks:\t%d\n",
16156 + atomic_read(&cvirt->nr_threads),
16157 + atomic_read(&cvirt->nr_running),
16158 + atomic_read(&cvirt->nr_uninterruptible),
16159 + atomic_read(&cvirt->nr_onhold),
16160 + atomic_read(&cvirt->load_updates),
16161 + LOAD_INT(a), LOAD_FRAC(a),
16162 + LOAD_INT(b), LOAD_FRAC(b),
16163 + LOAD_INT(c), LOAD_FRAC(c),
16164 + atomic_read(&cvirt->total_forks));
16169 +int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
16170 + char *buffer, int cpu)
16176 +#endif /* _VX_CVIRT_PROC_H */
16177 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/debug.c linux-4.4/kernel/vserver/debug.c
16178 --- linux-4.4/kernel/vserver/debug.c 1970-01-01 01:00:00.000000000 +0100
16179 +++ linux-4.4/kernel/vserver/debug.c 2021-02-24 16:56:24.609490163 +0100
16182 + * kernel/vserver/debug.c
16184 + * Copyright (C) 2005-2007 Herbert P?tzl
16186 + * V0.01 vx_info dump support
16190 +#include <linux/module.h>
16192 +#include <linux/vserver/context.h>
16195 +void dump_vx_info(struct vx_info *vxi, int level)
16197 + printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
16198 + atomic_read(&vxi->vx_usecnt),
16199 + atomic_read(&vxi->vx_tasks),
16202 + __dump_vx_limit(&vxi->limit);
16203 + __dump_vx_sched(&vxi->sched);
16204 + __dump_vx_cvirt(&vxi->cvirt);
16205 + __dump_vx_cacct(&vxi->cacct);
16211 +EXPORT_SYMBOL_GPL(dump_vx_info);
16213 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/device.c linux-4.4/kernel/vserver/device.c
16214 --- linux-4.4/kernel/vserver/device.c 1970-01-01 01:00:00.000000000 +0100
16215 +++ linux-4.4/kernel/vserver/device.c 2021-02-24 16:56:24.609490163 +0100
16218 + * linux/kernel/vserver/device.c
16220 + * Linux-VServer: Device Support
16222 + * Copyright (C) 2006 Herbert P?tzl
16223 + * Copyright (C) 2007 Daniel Hokka Zakrisson
16225 + * V0.01 device mapping basics
16226 + * V0.02 added defaults
16230 +#include <linux/slab.h>
16231 +#include <linux/rcupdate.h>
16232 +#include <linux/fs.h>
16233 +#include <linux/namei.h>
16234 +#include <linux/hash.h>
16236 +#include <asm/errno.h>
16237 +#include <asm/uaccess.h>
16238 +#include <linux/vserver/base.h>
16239 +#include <linux/vserver/debug.h>
16240 +#include <linux/vserver/context.h>
16241 +#include <linux/vserver/device.h>
16242 +#include <linux/vserver/device_cmd.h>
16245 +#define DMAP_HASH_BITS 4
16248 +struct vs_mapping {
16250 + struct hlist_node hlist;
16251 + struct list_head list;
16253 +#define dm_hlist u.hlist
16254 +#define dm_list u.list
16257 + struct vx_dmap_target target;
16261 +static struct hlist_head dmap_main_hash[1 << DMAP_HASH_BITS];
16263 +static DEFINE_SPINLOCK(dmap_main_hash_lock);
16265 +static struct vx_dmap_target dmap_defaults[2] = {
16266 + { .flags = DATTR_OPEN },
16267 + { .flags = DATTR_OPEN },
16271 +struct kmem_cache *dmap_cachep __read_mostly;
16273 +int __init dmap_cache_init(void)
16275 + dmap_cachep = kmem_cache_create("dmap_cache",
16276 + sizeof(struct vs_mapping), 0,
16277 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
16281 +__initcall(dmap_cache_init);
16284 +static inline unsigned int __hashval(dev_t dev, int bits)
16286 + return hash_long((unsigned long)dev, bits);
16290 +/* __hash_mapping()
16291 + * add the mapping to the hash table
16293 +static inline void __hash_mapping(struct vx_info *vxi, struct vs_mapping *vdm)
16295 + spinlock_t *hash_lock = &dmap_main_hash_lock;
16296 + struct hlist_head *head, *hash = dmap_main_hash;
16297 + int device = vdm->device;
16299 + spin_lock(hash_lock);
16300 + vxdprintk(VXD_CBIT(misc, 8), "__hash_mapping: %p[#%d] %08x:%08x",
16301 + vxi, vxi ? vxi->vx_id : 0, device, vdm->target.target);
16303 + head = &hash[__hashval(device, DMAP_HASH_BITS)];
16304 + hlist_add_head(&vdm->dm_hlist, head);
16305 + spin_unlock(hash_lock);
16309 +static inline int __mode_to_default(umode_t mode)
16322 +/* __set_default()
16325 +static inline void __set_default(struct vx_info *vxi, umode_t mode,
16326 + struct vx_dmap_target *vdmt)
16328 + spinlock_t *hash_lock = &dmap_main_hash_lock;
16329 + spin_lock(hash_lock);
16332 + vxi->dmap.targets[__mode_to_default(mode)] = *vdmt;
16334 + dmap_defaults[__mode_to_default(mode)] = *vdmt;
16337 + spin_unlock(hash_lock);
16339 + vxdprintk(VXD_CBIT(misc, 8), "__set_default: %p[#%u] %08x %04x",
16340 + vxi, vxi ? vxi->vx_id : 0, vdmt->target, vdmt->flags);
16344 +/* __remove_default()
16345 + * remove a default
16347 +static inline int __remove_default(struct vx_info *vxi, umode_t mode)
16349 + spinlock_t *hash_lock = &dmap_main_hash_lock;
16350 + spin_lock(hash_lock);
16353 + vxi->dmap.targets[__mode_to_default(mode)].flags = 0;
16354 + else /* remove == reset */
16355 + dmap_defaults[__mode_to_default(mode)].flags = DATTR_OPEN | mode;
16357 + spin_unlock(hash_lock);
16362 +/* __find_mapping()
16363 + * find a mapping in the hash table
16365 + * caller must hold hash_lock
16367 +static inline int __find_mapping(vxid_t xid, dev_t device, umode_t mode,
16368 + struct vs_mapping **local, struct vs_mapping **global)
16370 + struct hlist_head *hash = dmap_main_hash;
16371 + struct hlist_head *head = &hash[__hashval(device, DMAP_HASH_BITS)];
16372 + struct hlist_node *pos;
16373 + struct vs_mapping *vdm;
16379 + hlist_for_each(pos, head) {
16380 + vdm = hlist_entry(pos, struct vs_mapping, dm_hlist);
16382 + if ((vdm->device == device) &&
16383 + !((vdm->target.flags ^ mode) & S_IFMT)) {
16384 + if (vdm->xid == xid) {
16387 + } else if (global && vdm->xid == 0)
16392 + if (global && *global)
16399 +/* __lookup_mapping()
16400 + * find a mapping and store the result in target and flags
16402 +static inline int __lookup_mapping(struct vx_info *vxi,
16403 + dev_t device, dev_t *target, int *flags, umode_t mode)
16405 + spinlock_t *hash_lock = &dmap_main_hash_lock;
16406 + struct vs_mapping *vdm, *global;
16407 + struct vx_dmap_target *vdmt;
16409 + vxid_t xid = vxi->vx_id;
16412 + spin_lock(hash_lock);
16413 + if (__find_mapping(xid, device, mode, &vdm, &global) > 0) {
16415 + vdmt = &vdm->target;
16419 + index = __mode_to_default(mode);
16420 + if (vxi && vxi->dmap.targets[index].flags) {
16422 + vdmt = &vxi->dmap.targets[index];
16423 + } else if (global) {
16425 + vdmt = &global->target;
16429 + vdmt = &dmap_defaults[index];
16433 + if (target && (vdmt->flags & DATTR_REMAP))
16434 + *target = vdmt->target;
16436 + *target = device;
16438 + *flags = vdmt->flags;
16440 + spin_unlock(hash_lock);
16446 +/* __remove_mapping()
16447 + * remove a mapping from the hash table
16449 +static inline int __remove_mapping(struct vx_info *vxi, dev_t device,
16452 + spinlock_t *hash_lock = &dmap_main_hash_lock;
16453 + struct vs_mapping *vdm = NULL;
16456 + spin_lock(hash_lock);
16458 + ret = __find_mapping((vxi ? vxi->vx_id : 0), device, mode, &vdm,
16460 + vxdprintk(VXD_CBIT(misc, 8), "__remove_mapping: %p[#%d] %08x %04x",
16461 + vxi, vxi ? vxi->vx_id : 0, device, mode);
16464 + hlist_del(&vdm->dm_hlist);
16467 + spin_unlock(hash_lock);
16469 + kmem_cache_free(dmap_cachep, vdm);
16475 +int vs_map_device(struct vx_info *vxi,
16476 + dev_t device, dev_t *target, umode_t mode)
16478 + int ret, flags = DATTR_MASK;
16482 + *target = device;
16485 + ret = __lookup_mapping(vxi, device, target, &flags, mode);
16486 + vxdprintk(VXD_CBIT(misc, 8), "vs_map_device: %08x target: %08x flags: %04x mode: %04x mapped=%d",
16487 + device, target ? *target : 0, flags, mode, ret);
16489 + return (flags & DATTR_MASK);
16494 +static int do_set_mapping(struct vx_info *vxi,
16495 + dev_t device, dev_t target, int flags, umode_t mode)
16498 + struct vs_mapping *new;
16500 + new = kmem_cache_alloc(dmap_cachep, GFP_KERNEL);
16504 + INIT_HLIST_NODE(&new->dm_hlist);
16505 + new->device = device;
16506 + new->target.target = target;
16507 + new->target.flags = flags | mode;
16508 + new->xid = (vxi ? vxi->vx_id : 0);
16510 + vxdprintk(VXD_CBIT(misc, 8), "do_set_mapping: %08x target: %08x flags: %04x", device, target, flags);
16511 + __hash_mapping(vxi, new);
16513 + struct vx_dmap_target new = {
16514 + .target = target,
16515 + .flags = flags | mode,
16517 + __set_default(vxi, mode, &new);
16523 +static int do_unset_mapping(struct vx_info *vxi,
16524 + dev_t device, dev_t target, int flags, umode_t mode)
16526 + int ret = -EINVAL;
16529 + ret = __remove_mapping(vxi, device, mode);
16533 + ret = __remove_default(vxi, mode);
16543 +static inline int __user_device(const char __user *name, dev_t *dev,
16546 + struct path path;
16553 + ret = user_lpath(name, &path);
16556 + if (path.dentry->d_inode) {
16557 + *dev = path.dentry->d_inode->i_rdev;
16558 + *mode = path.dentry->d_inode->i_mode;
16564 +static inline int __mapping_mode(dev_t device, dev_t target,
16565 + umode_t device_mode, umode_t target_mode, umode_t *mode)
16568 + *mode = device_mode & S_IFMT;
16570 + *mode = target_mode & S_IFMT;
16574 + /* if both given, device and target mode have to match */
16575 + if (device && target &&
16576 + ((device_mode ^ target_mode) & S_IFMT))
16582 +static inline int do_mapping(struct vx_info *vxi, const char __user *device_path,
16583 + const char __user *target_path, int flags, int set)
16585 + dev_t device = ~0, target = ~0;
16586 + umode_t device_mode = 0, target_mode = 0, mode;
16589 + ret = __user_device(device_path, &device, &device_mode);
16592 + ret = __user_device(target_path, &target, &target_mode);
16596 + ret = __mapping_mode(device, target,
16597 + device_mode, target_mode, &mode);
16602 + return do_set_mapping(vxi, device, target,
16605 + return do_unset_mapping(vxi, device, target,
16610 +int vc_set_mapping(struct vx_info *vxi, void __user *data)
16612 + struct vcmd_set_mapping_v0 vc_data;
16614 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16617 + return do_mapping(vxi, vc_data.device, vc_data.target,
16618 + vc_data.flags, 1);
16621 +int vc_unset_mapping(struct vx_info *vxi, void __user *data)
16623 + struct vcmd_set_mapping_v0 vc_data;
16625 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16628 + return do_mapping(vxi, vc_data.device, vc_data.target,
16629 + vc_data.flags, 0);
16633 +#ifdef CONFIG_COMPAT
16635 +int vc_set_mapping_x32(struct vx_info *vxi, void __user *data)
16637 + struct vcmd_set_mapping_v0_x32 vc_data;
16639 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16642 + return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
16643 + compat_ptr(vc_data.target_ptr), vc_data.flags, 1);
16646 +int vc_unset_mapping_x32(struct vx_info *vxi, void __user *data)
16648 + struct vcmd_set_mapping_v0_x32 vc_data;
16650 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16653 + return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
16654 + compat_ptr(vc_data.target_ptr), vc_data.flags, 0);
16657 +#endif /* CONFIG_COMPAT */
16660 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/dlimit.c linux-4.4/kernel/vserver/dlimit.c
16661 --- linux-4.4/kernel/vserver/dlimit.c 1970-01-01 01:00:00.000000000 +0100
16662 +++ linux-4.4/kernel/vserver/dlimit.c 2021-02-24 16:56:24.609490163 +0100
16665 + * linux/kernel/vserver/dlimit.c
16667 + * Virtual Server: Context Disk Limits
16669 + * Copyright (C) 2004-2009 Herbert P?tzl
16671 + * V0.01 initial version
16672 + * V0.02 compat32 splitup
16673 + * V0.03 extended interface
16677 +#include <linux/statfs.h>
16678 +#include <linux/sched.h>
16679 +#include <linux/namei.h>
16680 +#include <linux/vs_tag.h>
16681 +#include <linux/vs_dlimit.h>
16682 +#include <linux/vserver/dlimit_cmd.h>
16683 +#include <linux/slab.h>
16684 +// #include <linux/gfp.h>
16686 +#include <asm/uaccess.h>
16688 +/* __alloc_dl_info()
16690 + * allocate an initialized dl_info struct
16691 + * doesn't make it visible (hash) */
16693 +static struct dl_info *__alloc_dl_info(struct super_block *sb, vtag_t tag)
16695 + struct dl_info *new = NULL;
16697 + vxdprintk(VXD_CBIT(dlim, 5),
16698 + "alloc_dl_info(%p,%d)*", sb, tag);
16700 + /* would this benefit from a slab cache? */
16701 + new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
16705 + memset(new, 0, sizeof(struct dl_info));
16706 + new->dl_tag = tag;
16708 + // INIT_RCU_HEAD(&new->dl_rcu);
16709 + INIT_HLIST_NODE(&new->dl_hlist);
16710 + spin_lock_init(&new->dl_lock);
16711 + atomic_set(&new->dl_refcnt, 0);
16712 + atomic_set(&new->dl_usecnt, 0);
16714 + /* rest of init goes here */
16716 + vxdprintk(VXD_CBIT(dlim, 4),
16717 + "alloc_dl_info(%p,%d) = %p", sb, tag, new);
16721 +/* __dealloc_dl_info()
16723 + * final disposal of dl_info */
16725 +static void __dealloc_dl_info(struct dl_info *dli)
16727 + vxdprintk(VXD_CBIT(dlim, 4),
16728 + "dealloc_dl_info(%p)", dli);
16730 + dli->dl_hlist.next = LIST_POISON1;
16731 + dli->dl_tag = -1;
16734 + BUG_ON(atomic_read(&dli->dl_usecnt));
16735 + BUG_ON(atomic_read(&dli->dl_refcnt));
16741 +/* hash table for dl_info hash */
16743 +#define DL_HASH_SIZE 13
16745 +struct hlist_head dl_info_hash[DL_HASH_SIZE];
16747 +static DEFINE_SPINLOCK(dl_info_hash_lock);
16750 +static inline unsigned int __hashval(struct super_block *sb, vtag_t tag)
16752 + return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
16757 +/* __hash_dl_info()
16759 + * add the dli to the global hash table
16760 + * requires the hash_lock to be held */
16762 +static inline void __hash_dl_info(struct dl_info *dli)
16764 + struct hlist_head *head;
16766 + vxdprintk(VXD_CBIT(dlim, 6),
16767 + "__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
16768 + get_dl_info(dli);
16769 + head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
16770 + hlist_add_head_rcu(&dli->dl_hlist, head);
16773 +/* __unhash_dl_info()
16775 + * remove the dli from the global hash table
16776 + * requires the hash_lock to be held */
16778 +static inline void __unhash_dl_info(struct dl_info *dli)
16780 + vxdprintk(VXD_CBIT(dlim, 6),
16781 + "__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
16782 + hlist_del_rcu(&dli->dl_hlist);
16783 + put_dl_info(dli);
16787 +/* __lookup_dl_info()
16789 + * requires the rcu_read_lock()
16790 + * doesn't increment the dl_refcnt */
16792 +static inline struct dl_info *__lookup_dl_info(struct super_block *sb, vtag_t tag)
16794 + struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
16795 + struct dl_info *dli;
16797 + hlist_for_each_entry_rcu(dli, head, dl_hlist) {
16798 + if (dli->dl_tag == tag && dli->dl_sb == sb)
16805 +struct dl_info *locate_dl_info(struct super_block *sb, vtag_t tag)
16807 + struct dl_info *dli;
16810 + dli = get_dl_info(__lookup_dl_info(sb, tag));
16811 + vxdprintk(VXD_CBIT(dlim, 7),
16812 + "locate_dl_info(%p,#%d) = %p", sb, tag, dli);
16813 + rcu_read_unlock();
16817 +void rcu_free_dl_info(struct rcu_head *head)
16819 + struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
16820 + int usecnt, refcnt;
16822 + BUG_ON(!dli || !head);
16824 + usecnt = atomic_read(&dli->dl_usecnt);
16825 + BUG_ON(usecnt < 0);
16827 + refcnt = atomic_read(&dli->dl_refcnt);
16828 + BUG_ON(refcnt < 0);
16830 + vxdprintk(VXD_CBIT(dlim, 3),
16831 + "rcu_free_dl_info(%p)", dli);
16833 + __dealloc_dl_info(dli);
16835 + printk("!!! rcu didn't free\n");
16841 +static int do_addrem_dlimit(uint32_t id, const char __user *name,
16842 + uint32_t flags, int add)
16844 + struct path path;
16847 + ret = user_lpath(name, &path);
16849 + struct super_block *sb;
16850 + struct dl_info *dli;
16853 + if (!path.dentry->d_inode)
16854 + goto out_release;
16855 + if (!(sb = path.dentry->d_inode->i_sb))
16856 + goto out_release;
16859 + dli = __alloc_dl_info(sb, id);
16860 + spin_lock(&dl_info_hash_lock);
16863 + if (__lookup_dl_info(sb, id))
16865 + __hash_dl_info(dli);
16868 + spin_lock(&dl_info_hash_lock);
16869 + dli = __lookup_dl_info(sb, id);
16874 + __unhash_dl_info(dli);
16878 + spin_unlock(&dl_info_hash_lock);
16880 + __dealloc_dl_info(dli);
16887 +int vc_add_dlimit(uint32_t id, void __user *data)
16889 + struct vcmd_ctx_dlimit_base_v0 vc_data;
16891 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16894 + return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
16897 +int vc_rem_dlimit(uint32_t id, void __user *data)
16899 + struct vcmd_ctx_dlimit_base_v0 vc_data;
16901 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16904 + return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
16907 +#ifdef CONFIG_COMPAT
16909 +int vc_add_dlimit_x32(uint32_t id, void __user *data)
16911 + struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
16913 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16916 + return do_addrem_dlimit(id,
16917 + compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
16920 +int vc_rem_dlimit_x32(uint32_t id, void __user *data)
16922 + struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
16924 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16927 + return do_addrem_dlimit(id,
16928 + compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
16931 +#endif /* CONFIG_COMPAT */
16935 +int do_set_dlimit(uint32_t id, const char __user *name,
16936 + uint32_t space_used, uint32_t space_total,
16937 + uint32_t inodes_used, uint32_t inodes_total,
16938 + uint32_t reserved, uint32_t flags)
16940 + struct path path;
16943 + ret = user_lpath(name, &path);
16945 + struct super_block *sb;
16946 + struct dl_info *dli;
16949 + if (!path.dentry->d_inode)
16950 + goto out_release;
16951 + if (!(sb = path.dentry->d_inode->i_sb))
16952 + goto out_release;
16954 + /* sanity checks */
16955 + if ((reserved != CDLIM_KEEP &&
16956 + reserved > 100) ||
16957 + (inodes_used != CDLIM_KEEP &&
16958 + inodes_used > inodes_total) ||
16959 + (space_used != CDLIM_KEEP &&
16960 + space_used > space_total))
16961 + goto out_release;
16964 + dli = locate_dl_info(sb, id);
16966 + goto out_release;
16968 + spin_lock(&dli->dl_lock);
16970 + if (inodes_used != CDLIM_KEEP)
16971 + dli->dl_inodes_used = inodes_used;
16972 + if (inodes_total != CDLIM_KEEP)
16973 + dli->dl_inodes_total = inodes_total;
16974 + if (space_used != CDLIM_KEEP)
16975 + dli->dl_space_used = dlimit_space_32to64(
16976 + space_used, flags, DLIMS_USED);
16978 + if (space_total == CDLIM_INFINITY)
16979 + dli->dl_space_total = DLIM_INFINITY;
16980 + else if (space_total != CDLIM_KEEP)
16981 + dli->dl_space_total = dlimit_space_32to64(
16982 + space_total, flags, DLIMS_TOTAL);
16984 + if (reserved != CDLIM_KEEP)
16985 + dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
16987 + spin_unlock(&dli->dl_lock);
16989 + put_dl_info(dli);
16998 +int vc_set_dlimit(uint32_t id, void __user *data)
17000 + struct vcmd_ctx_dlimit_v0 vc_data;
17002 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17005 + return do_set_dlimit(id, vc_data.name,
17006 + vc_data.space_used, vc_data.space_total,
17007 + vc_data.inodes_used, vc_data.inodes_total,
17008 + vc_data.reserved, vc_data.flags);
17011 +#ifdef CONFIG_COMPAT
17013 +int vc_set_dlimit_x32(uint32_t id, void __user *data)
17015 + struct vcmd_ctx_dlimit_v0_x32 vc_data;
17017 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17020 + return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
17021 + vc_data.space_used, vc_data.space_total,
17022 + vc_data.inodes_used, vc_data.inodes_total,
17023 + vc_data.reserved, vc_data.flags);
17026 +#endif /* CONFIG_COMPAT */
17030 +int do_get_dlimit(uint32_t id, const char __user *name,
17031 + uint32_t *space_used, uint32_t *space_total,
17032 + uint32_t *inodes_used, uint32_t *inodes_total,
17033 + uint32_t *reserved, uint32_t *flags)
17035 + struct path path;
17038 + ret = user_lpath(name, &path);
17040 + struct super_block *sb;
17041 + struct dl_info *dli;
17044 + if (!path.dentry->d_inode)
17045 + goto out_release;
17046 + if (!(sb = path.dentry->d_inode->i_sb))
17047 + goto out_release;
17050 + dli = locate_dl_info(sb, id);
17052 + goto out_release;
17054 + spin_lock(&dli->dl_lock);
17055 + *inodes_used = dli->dl_inodes_used;
17056 + *inodes_total = dli->dl_inodes_total;
17058 + *space_used = dlimit_space_64to32(
17059 + dli->dl_space_used, flags, DLIMS_USED);
17061 + if (dli->dl_space_total == DLIM_INFINITY)
17062 + *space_total = CDLIM_INFINITY;
17064 + *space_total = dlimit_space_64to32(
17065 + dli->dl_space_total, flags, DLIMS_TOTAL);
17067 + *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
17068 + spin_unlock(&dli->dl_lock);
17070 + put_dl_info(dli);
17081 +int vc_get_dlimit(uint32_t id, void __user *data)
17083 + struct vcmd_ctx_dlimit_v0 vc_data;
17086 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17089 + ret = do_get_dlimit(id, vc_data.name,
17090 + &vc_data.space_used, &vc_data.space_total,
17091 + &vc_data.inodes_used, &vc_data.inodes_total,
17092 + &vc_data.reserved, &vc_data.flags);
17096 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17101 +#ifdef CONFIG_COMPAT
17103 +int vc_get_dlimit_x32(uint32_t id, void __user *data)
17105 + struct vcmd_ctx_dlimit_v0_x32 vc_data;
17108 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17111 + ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
17112 + &vc_data.space_used, &vc_data.space_total,
17113 + &vc_data.inodes_used, &vc_data.inodes_total,
17114 + &vc_data.reserved, &vc_data.flags);
17118 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17123 +#endif /* CONFIG_COMPAT */
17126 +void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
17128 + struct dl_info *dli;
17129 + __u64 blimit, bfree, bavail;
17132 + dli = locate_dl_info(sb, dx_current_tag());
17136 + spin_lock(&dli->dl_lock);
17137 + if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
17140 + /* reduce max inodes available to limit */
17141 + if (buf->f_files > dli->dl_inodes_total)
17142 + buf->f_files = dli->dl_inodes_total;
17144 + ifree = dli->dl_inodes_total - dli->dl_inodes_used;
17145 + /* reduce free inodes to min */
17146 + if (ifree < buf->f_ffree)
17147 + buf->f_ffree = ifree;
17150 + if (dli->dl_space_total == DLIM_INFINITY)
17153 + blimit = dli->dl_space_total >> sb->s_blocksize_bits;
17155 + if (dli->dl_space_total < dli->dl_space_used)
17158 + bfree = (dli->dl_space_total - dli->dl_space_used)
17159 + >> sb->s_blocksize_bits;
17161 + bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
17162 + if (bavail < dli->dl_space_used)
17165 + bavail = (bavail - dli->dl_space_used)
17166 + >> sb->s_blocksize_bits;
17168 + /* reduce max space available to limit */
17169 + if (buf->f_blocks > blimit)
17170 + buf->f_blocks = blimit;
17172 + /* reduce free space to min */
17173 + if (bfree < buf->f_bfree)
17174 + buf->f_bfree = bfree;
17176 + /* reduce avail space to min */
17177 + if (bavail < buf->f_bavail)
17178 + buf->f_bavail = bavail;
17181 + spin_unlock(&dli->dl_lock);
17182 + put_dl_info(dli);
17187 +#include <linux/module.h>
17189 +EXPORT_SYMBOL_GPL(locate_dl_info);
17190 +EXPORT_SYMBOL_GPL(rcu_free_dl_info);
17192 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/helper.c linux-4.4/kernel/vserver/helper.c
17193 --- linux-4.4/kernel/vserver/helper.c 1970-01-01 01:00:00.000000000 +0100
17194 +++ linux-4.4/kernel/vserver/helper.c 2021-02-24 16:56:24.609490163 +0100
17197 + * linux/kernel/vserver/helper.c
17199 + * Virtual Context Support
17201 + * Copyright (C) 2004-2007 Herbert P?tzl
17203 + * V0.01 basic helper
17207 +#include <linux/kmod.h>
17208 +#include <linux/reboot.h>
17209 +#include <linux/vs_context.h>
17210 +#include <linux/vs_network.h>
17211 +#include <linux/vserver/signal.h>
17214 +char vshelper_path[255] = "/sbin/vshelper";
17216 +static int vshelper_init(struct subprocess_info *info, struct cred *new_cred)
17218 + current->flags &= ~PF_NO_SETAFFINITY;
17222 +static int vs_call_usermodehelper(char *path, char **argv, char **envp, int wait)
17224 + struct subprocess_info *info;
17225 + gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
17227 + info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
17228 + vshelper_init, NULL, NULL);
17229 + if (info == NULL)
17232 + return call_usermodehelper_exec(info, wait);
17235 +static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
17239 + if ((ret = vs_call_usermodehelper(name, argv, envp,
17240 + sync ? UMH_WAIT_PROC : UMH_WAIT_EXEC))) {
17241 + printk(KERN_WARNING "%s: (%s %s) returned %s with %d\n",
17242 + name, argv[1], argv[2],
17243 + sync ? "sync" : "async", ret);
17245 + vxdprintk(VXD_CBIT(switch, 4),
17246 + "%s: (%s %s) returned %s with %d",
17247 + name, argv[1], argv[2], sync ? "sync" : "async", ret);
17252 + * vshelper path is set via /proc/sys
17253 + * invoked by vserver sys_reboot(), with
17254 + * the following arguments
17256 + * argv [0] = vshelper_path;
17257 + * argv [1] = action: "restart", "halt", "poweroff", ...
17258 + * argv [2] = context identifier
17260 + * envp [*] = type-specific parameters
17263 +long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
17265 + char id_buf[8], cmd_buf[16];
17266 + char uid_buf[16], pid_buf[16];
17269 + char *argv[] = {vshelper_path, NULL, id_buf, 0};
17270 + char *envp[] = {"HOME=/", "TERM=linux",
17271 + "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
17272 + uid_buf, pid_buf, cmd_buf, 0};
17274 + if (vx_info_state(vxi, VXS_HELPER))
17276 + vxi->vx_state |= VXS_HELPER;
17278 + snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
17280 + snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17281 + snprintf(uid_buf, sizeof(uid_buf), "VS_UID=%d",
17282 + from_kuid(&init_user_ns, current_uid()));
17283 + snprintf(pid_buf, sizeof(pid_buf), "VS_PID=%d", current->pid);
17286 + case LINUX_REBOOT_CMD_RESTART:
17287 + argv[1] = "restart";
17290 + case LINUX_REBOOT_CMD_HALT:
17291 + argv[1] = "halt";
17294 + case LINUX_REBOOT_CMD_POWER_OFF:
17295 + argv[1] = "poweroff";
17298 + case LINUX_REBOOT_CMD_SW_SUSPEND:
17299 + argv[1] = "swsusp";
17302 + case LINUX_REBOOT_CMD_OOM:
17307 + vxi->vx_state &= ~VXS_HELPER;
17311 + ret = do_vshelper(vshelper_path, argv, envp, 0);
17312 + vxi->vx_state &= ~VXS_HELPER;
17313 + __wakeup_vx_info(vxi);
17314 + return (ret) ? -EPERM : 0;
17318 +long vs_reboot(unsigned int cmd, void __user *arg)
17320 + struct vx_info *vxi = current_vx_info();
17323 + vxdprintk(VXD_CBIT(misc, 5),
17324 + "vs_reboot(%p[#%d],%u)",
17325 + vxi, vxi ? vxi->vx_id : 0, cmd);
17327 + ret = vs_reboot_helper(vxi, cmd, arg);
17331 + vxi->reboot_cmd = cmd;
17332 + if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
17334 + case LINUX_REBOOT_CMD_RESTART:
17335 + case LINUX_REBOOT_CMD_HALT:
17336 + case LINUX_REBOOT_CMD_POWER_OFF:
17337 + vx_info_kill(vxi, 0, SIGKILL);
17338 + vx_info_kill(vxi, 1, SIGKILL);
17346 +long vs_oom_action(unsigned int cmd)
17348 + struct vx_info *vxi = current_vx_info();
17351 + vxdprintk(VXD_CBIT(misc, 5),
17352 + "vs_oom_action(%p[#%d],%u)",
17353 + vxi, vxi ? vxi->vx_id : 0, cmd);
17355 + ret = vs_reboot_helper(vxi, cmd, NULL);
17359 + vxi->reboot_cmd = cmd;
17360 + if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
17361 + vx_info_kill(vxi, 0, SIGKILL);
17362 + vx_info_kill(vxi, 1, SIGKILL);
17368 + * argv [0] = vshelper_path;
17369 + * argv [1] = action: "startup", "shutdown"
17370 + * argv [2] = context identifier
17372 + * envp [*] = type-specific parameters
17375 +long vs_state_change(struct vx_info *vxi, unsigned int cmd)
17377 + char id_buf[8], cmd_buf[16];
17378 + char *argv[] = {vshelper_path, NULL, id_buf, 0};
17379 + char *envp[] = {"HOME=/", "TERM=linux",
17380 + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
17382 + if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
17385 + snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
17386 + snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17389 + case VSC_STARTUP:
17390 + argv[1] = "startup";
17392 + case VSC_SHUTDOWN:
17393 + argv[1] = "shutdown";
17399 + return do_vshelper(vshelper_path, argv, envp, 1);
17404 + * argv [0] = vshelper_path;
17405 + * argv [1] = action: "netup", "netdown"
17406 + * argv [2] = context identifier
17408 + * envp [*] = type-specific parameters
17411 +long vs_net_change(struct nx_info *nxi, unsigned int cmd)
17413 + char id_buf[8], cmd_buf[16];
17414 + char *argv[] = {vshelper_path, NULL, id_buf, 0};
17415 + char *envp[] = {"HOME=/", "TERM=linux",
17416 + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
17418 + if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
17421 + snprintf(id_buf, sizeof(id_buf), "%d", nxi->nx_id);
17422 + snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17426 + argv[1] = "netup";
17428 + case VSC_NETDOWN:
17429 + argv[1] = "netdown";
17435 + return do_vshelper(vshelper_path, argv, envp, 1);
17438 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/history.c linux-4.4/kernel/vserver/history.c
17439 --- linux-4.4/kernel/vserver/history.c 1970-01-01 01:00:00.000000000 +0100
17440 +++ linux-4.4/kernel/vserver/history.c 2021-02-24 16:56:24.609490163 +0100
17443 + * kernel/vserver/history.c
17445 + * Virtual Context History Backtrace
17447 + * Copyright (C) 2004-2007 Herbert P?tzl
17449 + * V0.01 basic structure
17450 + * V0.02 hash/unhash and trace
17451 + * V0.03 preemption fixes
17455 +#include <linux/module.h>
17456 +#include <asm/uaccess.h>
17458 +#include <linux/vserver/context.h>
17459 +#include <linux/vserver/debug.h>
17460 +#include <linux/vserver/debug_cmd.h>
17461 +#include <linux/vserver/history.h>
17464 +#ifdef CONFIG_VSERVER_HISTORY
17465 +#define VXH_SIZE CONFIG_VSERVER_HISTORY_SIZE
17467 +#define VXH_SIZE 64
17470 +struct _vx_history {
17471 + unsigned int counter;
17473 + struct _vx_hist_entry entry[VXH_SIZE + 1];
17477 +DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
17479 +unsigned volatile int vxh_active = 1;
17481 +static atomic_t sequence = ATOMIC_INIT(0);
17486 + * requires disabled preemption */
17488 +struct _vx_hist_entry *vxh_advance(void *loc)
17490 + unsigned int cpu = smp_processor_id();
17491 + struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
17492 + struct _vx_hist_entry *entry;
17493 + unsigned int index;
17495 + index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
17496 + entry = &hist->entry[index];
17498 + entry->seq = atomic_inc_return(&sequence);
17499 + entry->loc = loc;
17503 +EXPORT_SYMBOL_GPL(vxh_advance);
17506 +#define VXH_LOC_FMTS "(#%04x,*%d):%p"
17508 +#define VXH_LOC_ARGS(e) (e)->seq, cpu, (e)->loc
17511 +#define VXH_VXI_FMTS "%p[#%d,%d.%d]"
17513 +#define VXH_VXI_ARGS(e) (e)->vxi.ptr, \
17514 + (e)->vxi.ptr ? (e)->vxi.xid : 0, \
17515 + (e)->vxi.ptr ? (e)->vxi.usecnt : 0, \
17516 + (e)->vxi.ptr ? (e)->vxi.tasks : 0
17518 +void vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
17520 + switch (e->type) {
17521 + case VXH_THROW_OOPS:
17522 + printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
17525 + case VXH_GET_VX_INFO:
17526 + case VXH_PUT_VX_INFO:
17527 + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
17529 + (e->type == VXH_GET_VX_INFO) ? "get" : "put",
17530 + VXH_VXI_ARGS(e));
17533 + case VXH_INIT_VX_INFO:
17534 + case VXH_SET_VX_INFO:
17535 + case VXH_CLR_VX_INFO:
17536 + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
17538 + (e->type == VXH_INIT_VX_INFO) ? "init" :
17539 + ((e->type == VXH_SET_VX_INFO) ? "set" : "clr"),
17540 + VXH_VXI_ARGS(e), e->sc.data);
17543 + case VXH_CLAIM_VX_INFO:
17544 + case VXH_RELEASE_VX_INFO:
17545 + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
17547 + (e->type == VXH_CLAIM_VX_INFO) ? "claim" : "release",
17548 + VXH_VXI_ARGS(e), e->sc.data);
17551 + case VXH_ALLOC_VX_INFO:
17552 + case VXH_DEALLOC_VX_INFO:
17553 + printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
17555 + (e->type == VXH_ALLOC_VX_INFO) ? "alloc" : "dealloc",
17556 + VXH_VXI_ARGS(e));
17559 + case VXH_HASH_VX_INFO:
17560 + case VXH_UNHASH_VX_INFO:
17561 + printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
17563 + (e->type == VXH_HASH_VX_INFO) ? "hash" : "unhash",
17564 + VXH_VXI_ARGS(e));
17567 + case VXH_LOC_VX_INFO:
17568 + case VXH_LOOKUP_VX_INFO:
17569 + case VXH_CREATE_VX_INFO:
17570 + printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
17572 + (e->type == VXH_CREATE_VX_INFO) ? "create" :
17573 + ((e->type == VXH_LOC_VX_INFO) ? "loc" : "lookup"),
17574 + e->ll.arg, VXH_VXI_ARGS(e));
17579 +static void __vxh_dump_history(void)
17581 + unsigned int i, cpu;
17583 + printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
17584 + atomic_read(&sequence), NR_CPUS);
17586 + for (i = 0; i < VXH_SIZE; i++) {
17587 + for_each_online_cpu(cpu) {
17588 + struct _vx_history *hist =
17589 + &per_cpu(vx_history_buffer, cpu);
17590 + unsigned int index = (hist->counter - i) % VXH_SIZE;
17591 + struct _vx_hist_entry *entry = &hist->entry[index];
17593 + vxh_dump_entry(entry, cpu);
17598 +void vxh_dump_history(void)
17602 + local_irq_enable();
17604 + local_irq_disable();
17606 + __vxh_dump_history();
17610 +/* vserver syscall commands below here */
17613 +int vc_dump_history(uint32_t id)
17616 + __vxh_dump_history();
17623 +int do_read_history(struct __user _vx_hist_entry *data,
17624 + int cpu, uint32_t *index, uint32_t *count)
17626 + int pos, ret = 0;
17627 + struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
17628 + int end = hist->counter;
17629 + int start = end - VXH_SIZE + 2;
17630 + int idx = *index;
17632 + /* special case: get current pos */
17638 + /* have we lost some data? */
17642 + for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
17643 + struct _vx_hist_entry *entry =
17644 + &hist->entry[idx % VXH_SIZE];
17646 + /* send entry to userspace */
17647 + ret = copy_to_user(&data[pos], entry, sizeof(*entry));
17651 + /* save new index and count */
17654 + return ret ? ret : (*index < end);
17657 +int vc_read_history(uint32_t id, void __user *data)
17659 + struct vcmd_read_history_v0 vc_data;
17662 + if (id >= NR_CPUS)
17665 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17668 + ret = do_read_history((struct __user _vx_hist_entry *)vc_data.data,
17669 + id, &vc_data.index, &vc_data.count);
17671 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17676 +#ifdef CONFIG_COMPAT
17678 +int vc_read_history_x32(uint32_t id, void __user *data)
17680 + struct vcmd_read_history_v0_x32 vc_data;
17683 + if (id >= NR_CPUS)
17686 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17689 + ret = do_read_history((struct __user _vx_hist_entry *)
17690 + compat_ptr(vc_data.data_ptr),
17691 + id, &vc_data.index, &vc_data.count);
17693 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17698 +#endif /* CONFIG_COMPAT */
17700 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/inet.c linux-4.4/kernel/vserver/inet.c
17701 --- linux-4.4/kernel/vserver/inet.c 1970-01-01 01:00:00.000000000 +0100
17702 +++ linux-4.4/kernel/vserver/inet.c 2021-02-24 16:56:24.609490163 +0100
17705 +#include <linux/in.h>
17706 +#include <linux/inetdevice.h>
17707 +#include <linux/export.h>
17708 +#include <linux/vs_inet.h>
17709 +#include <linux/vs_inet6.h>
17710 +#include <linux/vserver/debug.h>
17711 +#include <net/route.h>
17712 +#include <net/addrconf.h>
17715 +int nx_v4_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
17719 + if (!nxi1 || !nxi2 || nxi1 == nxi2)
17722 + struct nx_addr_v4 *ptr;
17723 + unsigned long irqflags;
17725 + spin_lock_irqsave(&nxi1->addr_lock, irqflags);
17726 + for (ptr = &nxi1->v4; ptr; ptr = ptr->next) {
17727 + if (v4_nx_addr_in_nx_info(nxi2, ptr, -1)) {
17732 + spin_unlock_irqrestore(&nxi1->addr_lock, irqflags);
17735 + vxdprintk(VXD_CBIT(net, 2),
17736 + "nx_v4_addr_conflict(%p,%p): %d",
17737 + nxi1, nxi2, ret);
17743 +#ifdef CONFIG_IPV6
17745 +int nx_v6_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
17749 + if (!nxi1 || !nxi2 || nxi1 == nxi2)
17752 + struct nx_addr_v6 *ptr;
17753 + unsigned long irqflags;
17755 + spin_lock_irqsave(&nxi1->addr_lock, irqflags);
17756 + for (ptr = &nxi1->v6; ptr; ptr = ptr->next) {
17757 + if (v6_nx_addr_in_nx_info(nxi2, ptr, -1)) {
17762 + spin_unlock_irqrestore(&nxi1->addr_lock, irqflags);
17765 + vxdprintk(VXD_CBIT(net, 2),
17766 + "nx_v6_addr_conflict(%p,%p): %d",
17767 + nxi1, nxi2, ret);
17774 +int v4_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
17776 + struct in_device *in_dev;
17777 + struct in_ifaddr **ifap;
17778 + struct in_ifaddr *ifa;
17783 + in_dev = in_dev_get(dev);
17787 + for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
17788 + ifap = &ifa->ifa_next) {
17789 + if (v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW)) {
17794 + in_dev_put(in_dev);
17800 +#ifdef CONFIG_IPV6
17802 +int v6_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
17804 + struct inet6_dev *in_dev;
17805 + struct inet6_ifaddr *ifa;
17810 + in_dev = in6_dev_get(dev);
17814 + // for (ifap = &in_dev->addr_list; (ifa = *ifap) != NULL;
17815 + list_for_each_entry(ifa, &in_dev->addr_list, if_list) {
17816 + if (v6_addr_in_nx_info(nxi, &ifa->addr, -1)) {
17821 + in6_dev_put(in_dev);
17828 +int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
17834 + if (nxi->v4.type && v4_dev_in_nx_info(dev, nxi))
17836 +#ifdef CONFIG_IPV6
17838 + if (nxi->v6.type && v6_dev_in_nx_info(dev, nxi))
17843 + vxdprintk(VXD_CBIT(net, 3),
17844 + "dev_in_nx_info(%p,%p[#%d]) = %d",
17845 + dev, nxi, nxi ? nxi->nx_id : 0, ret);
17849 +struct rtable *ip_v4_find_src(struct net *net, struct nx_info *nxi,
17850 + struct flowi4 *fl4)
17852 + struct rtable *rt;
17857 + /* FIXME: handle lback only case */
17858 + if (!NX_IPV4(nxi))
17859 + return ERR_PTR(-EPERM);
17861 + vxdprintk(VXD_CBIT(net, 4),
17862 + "ip_v4_find_src(%p[#%u]) " NIPQUAD_FMT " -> " NIPQUAD_FMT,
17863 + nxi, nxi ? nxi->nx_id : 0,
17864 + NIPQUAD(fl4->saddr), NIPQUAD(fl4->daddr));
17866 + /* single IP is unconditional */
17867 + if (nx_info_flags(nxi, NXF_SINGLE_IP, 0) &&
17868 + (fl4->saddr == INADDR_ANY))
17869 + fl4->saddr = nxi->v4.ip[0].s_addr;
17871 + if (fl4->saddr == INADDR_ANY) {
17872 + struct nx_addr_v4 *ptr;
17873 + __be32 found = 0;
17875 + rt = __ip_route_output_key(net, fl4);
17876 + if (!IS_ERR(rt)) {
17877 + found = fl4->saddr;
17879 + vxdprintk(VXD_CBIT(net, 4),
17880 + "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
17881 + nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(found));
17882 + if (v4_addr_in_nx_info(nxi, found, NXA_MASK_BIND))
17886 + WARN_ON_ONCE(in_irq());
17887 + spin_lock_bh(&nxi->addr_lock);
17888 + for (ptr = &nxi->v4; ptr; ptr = ptr->next) {
17889 + __be32 primary = ptr->ip[0].s_addr;
17890 + __be32 mask = ptr->mask.s_addr;
17891 + __be32 neta = primary & mask;
17893 + vxdprintk(VXD_CBIT(net, 4), "ip_v4_find_src(%p[#%u]) chk: "
17894 + NIPQUAD_FMT "/" NIPQUAD_FMT "/" NIPQUAD_FMT,
17895 + nxi, nxi ? nxi->nx_id : 0, NIPQUAD(primary),
17896 + NIPQUAD(mask), NIPQUAD(neta));
17897 + if ((found & mask) != neta)
17900 + fl4->saddr = primary;
17901 + rt = __ip_route_output_key(net, fl4);
17902 + vxdprintk(VXD_CBIT(net, 4),
17903 + "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
17904 + nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(primary));
17905 + if (!IS_ERR(rt)) {
17906 + found = fl4->saddr;
17908 + if (found == primary)
17909 + goto found_unlock;
17912 + /* still no source ip? */
17913 + found = ipv4_is_loopback(fl4->daddr)
17914 + ? IPI_LOOPBACK : nxi->v4.ip[0].s_addr;
17916 + spin_unlock_bh(&nxi->addr_lock);
17918 + /* assign src ip to flow */
17919 + fl4->saddr = found;
17922 + if (!v4_addr_in_nx_info(nxi, fl4->saddr, NXA_MASK_BIND))
17923 + return ERR_PTR(-EPERM);
17926 + if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0)) {
17927 + if (ipv4_is_loopback(fl4->daddr))
17928 + fl4->daddr = nxi->v4_lback.s_addr;
17929 + if (ipv4_is_loopback(fl4->saddr))
17930 + fl4->saddr = nxi->v4_lback.s_addr;
17931 + } else if (ipv4_is_loopback(fl4->daddr) &&
17932 + !nx_info_flags(nxi, NXF_LBACK_ALLOW, 0))
17933 + return ERR_PTR(-EPERM);
17938 +EXPORT_SYMBOL_GPL(ip_v4_find_src);
17940 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/init.c linux-4.4/kernel/vserver/init.c
17941 --- linux-4.4/kernel/vserver/init.c 1970-01-01 01:00:00.000000000 +0100
17942 +++ linux-4.4/kernel/vserver/init.c 2021-02-24 16:56:24.609490163 +0100
17945 + * linux/kernel/init.c
17947 + * Virtual Server Init
17949 + * Copyright (C) 2004-2007 Herbert P?tzl
17951 + * V0.01 basic structure
17955 +#include <linux/init.h>
17956 +#include <linux/module.h>
17958 +int vserver_register_sysctl(void);
17959 +void vserver_unregister_sysctl(void);
17962 +static int __init init_vserver(void)
17966 +#ifdef CONFIG_VSERVER_DEBUG
17967 + vserver_register_sysctl();
17973 +static void __exit exit_vserver(void)
17976 +#ifdef CONFIG_VSERVER_DEBUG
17977 + vserver_unregister_sysctl();
17982 +/* FIXME: GFP_ZONETYPES gone
17983 +long vx_slab[GFP_ZONETYPES]; */
17987 +module_init(init_vserver);
17988 +module_exit(exit_vserver);
17990 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/inode.c linux-4.4/kernel/vserver/inode.c
17991 --- linux-4.4/kernel/vserver/inode.c 1970-01-01 01:00:00.000000000 +0100
17992 +++ linux-4.4/kernel/vserver/inode.c 2021-02-24 16:56:24.612823601 +0100
17995 + * linux/kernel/vserver/inode.c
17997 + * Virtual Server: File System Support
17999 + * Copyright (C) 2004-2007 Herbert P?tzl
18001 + * V0.01 separated from vcontext V0.05
18002 + * V0.02 moved to tag (instead of xid)
18006 +#include <linux/tty.h>
18007 +#include <linux/proc_fs.h>
18008 +#include <linux/devpts_fs.h>
18009 +#include <linux/fs.h>
18010 +#include <linux/file.h>
18011 +#include <linux/mount.h>
18012 +#include <linux/parser.h>
18013 +#include <linux/namei.h>
18014 +#include <linux/magic.h>
18015 +#include <linux/slab.h>
18016 +#include <linux/vserver/inode.h>
18017 +#include <linux/vserver/inode_cmd.h>
18018 +#include <linux/vs_base.h>
18019 +#include <linux/vs_tag.h>
18021 +#include <asm/uaccess.h>
18022 +#include <../../fs/proc/internal.h>
18025 +static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
18027 + struct proc_dir_entry *entry;
18029 + if (!in || !in->i_sb)
18032 + *flags = IATTR_TAG
18033 + | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0)
18034 + | (IS_IXUNLINK(in) ? IATTR_IXUNLINK : 0)
18035 + | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
18036 + | (IS_COW(in) ? IATTR_COW : 0);
18037 + *mask = IATTR_IXUNLINK | IATTR_IMMUTABLE | IATTR_COW;
18039 + if (S_ISDIR(in->i_mode))
18040 + *mask |= IATTR_BARRIER;
18042 + if (IS_TAGGED(in)) {
18043 + *tag = i_tag_read(in);
18044 + *mask |= IATTR_TAG;
18047 + switch (in->i_sb->s_magic) {
18048 + case PROC_SUPER_MAGIC:
18049 + entry = PROC_I(in)->pde;
18051 + /* check for specific inodes? */
18053 + *mask |= IATTR_FLAGS;
18055 + *flags |= (entry->vx_flags & IATTR_FLAGS);
18057 + *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
18060 + case DEVPTS_SUPER_MAGIC:
18061 + *tag = i_tag_read(in);
18062 + *mask |= IATTR_TAG;
18071 +int vc_get_iattr(void __user *data)
18073 + struct path path;
18074 + struct vcmd_ctx_iattr_v1 vc_data = { .tag = -1 };
18077 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18080 + ret = user_lpath(vc_data.name, &path);
18082 + ret = __vc_get_iattr(path.dentry->d_inode,
18083 + &vc_data.tag, &vc_data.flags, &vc_data.mask);
18089 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18094 +#ifdef CONFIG_COMPAT
18096 +int vc_get_iattr_x32(void __user *data)
18098 + struct path path;
18099 + struct vcmd_ctx_iattr_v1_x32 vc_data = { .tag = -1 };
18102 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18105 + ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
18107 + ret = __vc_get_iattr(path.dentry->d_inode,
18108 + &vc_data.tag, &vc_data.flags, &vc_data.mask);
18114 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18119 +#endif /* CONFIG_COMPAT */
18122 +int vc_fget_iattr(uint32_t fd, void __user *data)
18124 + struct file *filp;
18125 + struct vcmd_ctx_fiattr_v0 vc_data = { .tag = -1 };
18128 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18132 + if (!filp || !filp->f_path.dentry || !filp->f_path.dentry->d_inode)
18135 + ret = __vc_get_iattr(filp->f_path.dentry->d_inode,
18136 + &vc_data.tag, &vc_data.flags, &vc_data.mask);
18140 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18146 +static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
18148 + struct inode *in = de->d_inode;
18149 + int error = 0, is_proc = 0, has_tag = 0;
18150 + struct iattr attr = { 0 };
18152 + if (!in || !in->i_sb)
18155 + is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
18156 + if ((*mask & IATTR_FLAGS) && !is_proc)
18159 + has_tag = IS_TAGGED(in) ||
18160 + (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
18161 + if ((*mask & IATTR_TAG) && !has_tag)
18164 + mutex_lock(&in->i_mutex);
18165 + if (*mask & IATTR_TAG) {
18166 + attr.ia_tag = make_ktag(&init_user_ns, *tag);
18167 + attr.ia_valid |= ATTR_TAG;
18170 + if (*mask & IATTR_FLAGS) {
18171 + struct proc_dir_entry *entry = PROC_I(in)->pde;
18172 + unsigned int iflags = PROC_I(in)->vx_flags;
18174 + iflags = (iflags & ~(*mask & IATTR_FLAGS))
18175 + | (*flags & IATTR_FLAGS);
18176 + PROC_I(in)->vx_flags = iflags;
18178 + entry->vx_flags = iflags;
18181 + if (*mask & (IATTR_IMMUTABLE | IATTR_IXUNLINK |
18182 + IATTR_BARRIER | IATTR_COW)) {
18183 + int iflags = in->i_flags;
18184 + int vflags = in->i_vflags;
18186 + if (*mask & IATTR_IMMUTABLE) {
18187 + if (*flags & IATTR_IMMUTABLE)
18188 + iflags |= S_IMMUTABLE;
18190 + iflags &= ~S_IMMUTABLE;
18192 + if (*mask & IATTR_IXUNLINK) {
18193 + if (*flags & IATTR_IXUNLINK)
18194 + iflags |= S_IXUNLINK;
18196 + iflags &= ~S_IXUNLINK;
18198 + if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
18199 + if (*flags & IATTR_BARRIER)
18200 + vflags |= V_BARRIER;
18202 + vflags &= ~V_BARRIER;
18204 + if (S_ISREG(in->i_mode) && (*mask & IATTR_COW)) {
18205 + if (*flags & IATTR_COW)
18208 + vflags &= ~V_COW;
18210 + if (in->i_op && in->i_op->sync_flags) {
18211 + error = in->i_op->sync_flags(in, iflags, vflags);
18217 + if (attr.ia_valid) {
18218 + if (in->i_op && in->i_op->setattr)
18219 + error = in->i_op->setattr(de, &attr);
18221 + error = inode_change_ok(in, &attr);
18223 + setattr_copy(in, &attr);
18224 + mark_inode_dirty(in);
18230 + mutex_unlock(&in->i_mutex);
18234 +int vc_set_iattr(void __user *data)
18236 + struct path path;
18237 + struct vcmd_ctx_iattr_v1 vc_data;
18240 + if (!capable(CAP_LINUX_IMMUTABLE))
18242 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18245 + ret = user_lpath(vc_data.name, &path);
18247 + ret = __vc_set_iattr(path.dentry,
18248 + &vc_data.tag, &vc_data.flags, &vc_data.mask);
18252 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18257 +#ifdef CONFIG_COMPAT
18259 +int vc_set_iattr_x32(void __user *data)
18261 + struct path path;
18262 + struct vcmd_ctx_iattr_v1_x32 vc_data;
18265 + if (!capable(CAP_LINUX_IMMUTABLE))
18267 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18270 + ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
18272 + ret = __vc_set_iattr(path.dentry,
18273 + &vc_data.tag, &vc_data.flags, &vc_data.mask);
18277 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18282 +#endif /* CONFIG_COMPAT */
18284 +int vc_fset_iattr(uint32_t fd, void __user *data)
18286 + struct file *filp;
18287 + struct vcmd_ctx_fiattr_v0 vc_data;
18290 + if (!capable(CAP_LINUX_IMMUTABLE))
18292 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18296 + if (!filp || !filp->f_path.dentry || !filp->f_path.dentry->d_inode)
18299 + ret = __vc_set_iattr(filp->f_path.dentry, &vc_data.tag,
18300 + &vc_data.flags, &vc_data.mask);
18304 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18310 +enum { Opt_notagcheck, Opt_tag, Opt_notag, Opt_tagid, Opt_err };
18312 +static match_table_t tokens = {
18313 + {Opt_notagcheck, "notagcheck"},
18314 +#ifdef CONFIG_PROPAGATE
18315 + {Opt_notag, "notag"},
18316 + {Opt_tag, "tag"},
18317 + {Opt_tagid, "tagid=%u"},
18323 +static void __dx_parse_remove(char *string, char *opt)
18325 + char *p = strstr(string, opt);
18329 + while (*q != '\0' && *q != ',')
18338 +int dx_parse_tag(char *string, vtag_t *tag, int remove, int *mnt_flags,
18339 + unsigned long *flags)
18342 + substring_t args[MAX_OPT_ARGS];
18344 + char *s, *p, *opts;
18345 +#if defined(CONFIG_PROPAGATE) || defined(CONFIG_VSERVER_DEBUG)
18351 + s = kstrdup(string, GFP_KERNEL | GFP_ATOMIC);
18356 + while ((p = strsep(&opts, ",")) != NULL) {
18357 + token = match_token(p, tokens, args);
18360 +#ifdef CONFIG_PROPAGATE
18365 + __dx_parse_remove(s, "tag");
18366 + *mnt_flags |= MNT_TAGID;
18367 + set |= MNT_TAGID;
18371 + __dx_parse_remove(s, "notag");
18372 + *mnt_flags |= MNT_NOTAG;
18373 + set |= MNT_NOTAG;
18376 + if (tag && !match_int(args, &option))
18379 + __dx_parse_remove(s, "tagid");
18380 + *mnt_flags |= MNT_TAGID;
18381 + set |= MNT_TAGID;
18383 +#endif /* CONFIG_PROPAGATE */
18384 + case Opt_notagcheck:
18386 + __dx_parse_remove(s, "notagcheck");
18387 + *flags |= MS_NOTAGCHECK;
18388 + set |= MS_NOTAGCHECK;
18391 + vxdprintk(VXD_CBIT(tag, 7),
18392 + "dx_parse_tag(" VS_Q("%s") "): %d:#%d",
18393 + p, token, option);
18396 + strcpy(string, s);
18401 +#ifdef CONFIG_PROPAGATE
18403 +void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
18405 + vtag_t new_tag = 0;
18406 + struct vfsmount *mnt;
18411 + mnt = nd->path.mnt;
18415 + propagate = (mnt->mnt_flags & MNT_TAGID);
18417 + new_tag = mnt->mnt_tag;
18419 + vxdprintk(VXD_CBIT(tag, 7),
18420 + "dx_propagate_tag(%p[#%lu.%d]): %d,%d",
18421 + inode, inode->i_ino, inode->i_tag,
18422 + new_tag, (propagate) ? 1 : 0);
18425 + i_tag_write(inode, new_tag);
18428 +#include <linux/module.h>
18430 +EXPORT_SYMBOL_GPL(__dx_propagate_tag);
18432 +#endif /* CONFIG_PROPAGATE */
18434 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/limit.c linux-4.4/kernel/vserver/limit.c
18435 --- linux-4.4/kernel/vserver/limit.c 1970-01-01 01:00:00.000000000 +0100
18436 +++ linux-4.4/kernel/vserver/limit.c 2021-02-24 16:56:24.612823601 +0100
18439 + * linux/kernel/vserver/limit.c
18441 + * Virtual Server: Context Limits
18443 + * Copyright (C) 2004-2010 Herbert P?tzl
18445 + * V0.01 broken out from vcontext V0.05
18446 + * V0.02 changed vcmds to vxi arg
18447 + * V0.03 added memory cgroup support
18451 +#include <linux/sched.h>
18452 +#include <linux/module.h>
18453 +#include <linux/memcontrol.h>
18454 +#include <linux/page_counter.h>
18455 +#include <linux/vs_limit.h>
18456 +#include <linux/vserver/limit.h>
18457 +#include <linux/vserver/limit_cmd.h>
18459 +#include <asm/uaccess.h>
18462 +const char *vlimit_name[NUM_LIMITS] = {
18463 + [RLIMIT_CPU] = "CPU",
18464 + [RLIMIT_NPROC] = "NPROC",
18465 + [RLIMIT_NOFILE] = "NOFILE",
18466 + [RLIMIT_LOCKS] = "LOCKS",
18467 + [RLIMIT_SIGPENDING] = "SIGP",
18468 + [RLIMIT_MSGQUEUE] = "MSGQ",
18470 + [VLIMIT_NSOCK] = "NSOCK",
18471 + [VLIMIT_OPENFD] = "OPENFD",
18472 + [VLIMIT_SHMEM] = "SHMEM",
18473 + [VLIMIT_DENTRY] = "DENTRY",
18476 +EXPORT_SYMBOL_GPL(vlimit_name);
18478 +#define MASK_ENTRY(x) (1 << (x))
18480 +const struct vcmd_ctx_rlimit_mask_v0 vlimit_mask = {
18483 + , /* softlimit */
18486 + MASK_ENTRY( RLIMIT_NPROC ) |
18487 + MASK_ENTRY( RLIMIT_NOFILE ) |
18488 + MASK_ENTRY( RLIMIT_LOCKS ) |
18489 + MASK_ENTRY( RLIMIT_MSGQUEUE ) |
18491 + MASK_ENTRY( VLIMIT_NSOCK ) |
18492 + MASK_ENTRY( VLIMIT_OPENFD ) |
18493 + MASK_ENTRY( VLIMIT_SHMEM ) |
18494 + MASK_ENTRY( VLIMIT_DENTRY ) |
18497 + /* accounting only */
18498 +uint32_t account_mask =
18499 + MASK_ENTRY( VLIMIT_SEMARY ) |
18500 + MASK_ENTRY( VLIMIT_NSEMS ) |
18501 + MASK_ENTRY( VLIMIT_MAPPED ) |
18505 +static int is_valid_vlimit(int id)
18507 + uint32_t mask = vlimit_mask.minimum |
18508 + vlimit_mask.softlimit | vlimit_mask.maximum;
18509 + return mask & (1 << id);
18512 +static int is_accounted_vlimit(int id)
18514 + if (is_valid_vlimit(id))
18516 + return account_mask & (1 << id);
18520 +static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
18522 + rlim_t limit = __rlim_soft(&vxi->limit, id);
18523 + return VX_VLIM(limit);
18526 +static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
18528 + rlim_t limit = __rlim_hard(&vxi->limit, id);
18529 + return VX_VLIM(limit);
18532 +static int do_get_rlimit(struct vx_info *vxi, uint32_t id,
18533 + uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
18535 + if (!is_valid_vlimit(id))
18539 + *minimum = CRLIM_UNSET;
18541 + *softlimit = vc_get_soft(vxi, id);
18543 + *maximum = vc_get_hard(vxi, id);
18547 +int vc_get_rlimit(struct vx_info *vxi, void __user *data)
18549 + struct vcmd_ctx_rlimit_v0 vc_data;
18552 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18555 + ret = do_get_rlimit(vxi, vc_data.id,
18556 + &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
18560 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18565 +static int do_set_rlimit(struct vx_info *vxi, uint32_t id,
18566 + uint64_t minimum, uint64_t softlimit, uint64_t maximum)
18568 + if (!is_valid_vlimit(id))
18571 + if (maximum != CRLIM_KEEP)
18572 + __rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
18573 + if (softlimit != CRLIM_KEEP)
18574 + __rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
18576 + /* clamp soft limit */
18577 + if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
18578 + __rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
18583 +int vc_set_rlimit(struct vx_info *vxi, void __user *data)
18585 + struct vcmd_ctx_rlimit_v0 vc_data;
18587 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18590 + return do_set_rlimit(vxi, vc_data.id,
18591 + vc_data.minimum, vc_data.softlimit, vc_data.maximum);
18594 +#ifdef CONFIG_IA32_EMULATION
18596 +int vc_set_rlimit_x32(struct vx_info *vxi, void __user *data)
18598 + struct vcmd_ctx_rlimit_v0_x32 vc_data;
18600 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18603 + return do_set_rlimit(vxi, vc_data.id,
18604 + vc_data.minimum, vc_data.softlimit, vc_data.maximum);
18607 +int vc_get_rlimit_x32(struct vx_info *vxi, void __user *data)
18609 + struct vcmd_ctx_rlimit_v0_x32 vc_data;
18612 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18615 + ret = do_get_rlimit(vxi, vc_data.id,
18616 + &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
18620 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18625 +#endif /* CONFIG_IA32_EMULATION */
18628 +int vc_get_rlimit_mask(uint32_t id, void __user *data)
18630 + if (copy_to_user(data, &vlimit_mask, sizeof(vlimit_mask)))
18636 +static inline void vx_reset_hits(struct _vx_limit *limit)
18640 + for (lim = 0; lim < NUM_LIMITS; lim++) {
18641 + atomic_set(&__rlim_lhit(limit, lim), 0);
18645 +int vc_reset_hits(struct vx_info *vxi, void __user *data)
18647 + vx_reset_hits(&vxi->limit);
18651 +static inline void vx_reset_minmax(struct _vx_limit *limit)
18656 + for (lim = 0; lim < NUM_LIMITS; lim++) {
18657 + value = __rlim_get(limit, lim);
18658 + __rlim_rmax(limit, lim) = value;
18659 + __rlim_rmin(limit, lim) = value;
18663 +int vc_reset_minmax(struct vx_info *vxi, void __user *data)
18665 + vx_reset_minmax(&vxi->limit);
18670 +int vc_rlimit_stat(struct vx_info *vxi, void __user *data)
18672 + struct vcmd_rlimit_stat_v0 vc_data;
18673 + struct _vx_limit *limit = &vxi->limit;
18676 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18680 + if (!is_accounted_vlimit(id))
18683 + vx_limit_fixup(limit, id);
18684 + vc_data.hits = atomic_read(&__rlim_lhit(limit, id));
18685 + vc_data.value = __rlim_get(limit, id);
18686 + vc_data.minimum = __rlim_rmin(limit, id);
18687 + vc_data.maximum = __rlim_rmax(limit, id);
18689 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18695 +#ifdef CONFIG_MEMCG
18697 +void dump_sysinfo(struct sysinfo *si)
18699 + printk(KERN_INFO "sysinfo: memunit=%u\n"
18700 + "\ttotalram:\t%lu\n"
18701 + "\tfreeram:\t%lu\n"
18702 + "\tsharedram:\t%lu\n"
18703 + "\tbufferram:\t%lu\n"
18704 + "\ttotalswap:\t%lu\n"
18705 + "\tfreeswap:\t%lu\n"
18706 + "\ttotalhigh:\t%lu\n"
18707 + "\tfreehigh:\t%lu\n",
18719 +void vx_vsi_meminfo(struct sysinfo *val)
18721 + struct mem_cgroup *mcg;
18722 + unsigned long res_limit, res_usage;
18725 + if (VXD_CBIT(cvirt, 4))
18726 + dump_sysinfo(val);
18729 + mcg = mem_cgroup_from_task(current);
18730 + if (VXD_CBIT(cvirt, 5))
18731 + dump_mem_cgroup(mcg);
18732 + rcu_read_unlock();
18736 + res_limit = mem_cgroup_mem_limit_pages(mcg);
18737 + res_usage = mem_cgroup_mem_usage_pages(mcg);
18738 + shift = val->mem_unit == 1 ? PAGE_SHIFT : 0;
18740 + if (res_limit != PAGE_COUNTER_MAX)
18741 + val->totalram = res_limit << shift;
18742 + val->freeram = val->totalram - (res_usage << shift);
18743 + val->bufferram = 0;
18744 + val->totalhigh = 0;
18745 + val->freehigh = 0;
18750 +void vx_vsi_swapinfo(struct sysinfo *val)
18752 +#ifdef CONFIG_MEMCG_SWAP
18753 + struct mem_cgroup *mcg;
18754 + unsigned long res_limit, res_usage, memsw_limit, memsw_usage;
18755 + signed long swap_limit, swap_usage;
18758 + if (VXD_CBIT(cvirt, 6))
18759 + dump_sysinfo(val);
18762 + mcg = mem_cgroup_from_task(current);
18763 + if (VXD_CBIT(cvirt, 7))
18764 + dump_mem_cgroup(mcg);
18765 + rcu_read_unlock();
18769 + res_limit = mem_cgroup_mem_limit_pages(mcg);
18771 + /* memory unlimited */
18772 + if (res_limit == PAGE_COUNTER_MAX)
18775 + res_usage = mem_cgroup_mem_usage_pages(mcg);
18776 + memsw_limit = mem_cgroup_memsw_limit_pages(mcg);
18777 + memsw_usage = mem_cgroup_memsw_usage_pages(mcg);
18778 + shift = val->mem_unit == 1 ? PAGE_SHIFT : 0;
18780 + swap_limit = memsw_limit - res_limit;
18781 + /* we have a swap limit? */
18782 + if (memsw_limit != PAGE_COUNTER_MAX)
18783 + val->totalswap = swap_limit << shift;
18785 + /* calculate swap part */
18786 + swap_usage = (memsw_usage > res_usage) ?
18787 + memsw_usage - res_usage : 0;
18789 + /* total shown minus usage gives free swap */
18790 + val->freeswap = (swap_usage < swap_limit) ?
18791 + val->totalswap - (swap_usage << shift) : 0;
18793 +#else /* !CONFIG_MEMCG_SWAP */
18794 + val->totalswap = 0;
18795 + val->freeswap = 0;
18796 +#endif /* !CONFIG_MEMCG_SWAP */
18800 +long vx_vsi_cached(struct sysinfo *val)
18803 +#ifdef CONFIG_MEMCG_BROKEN
18804 + struct mem_cgroup *mcg;
18806 + if (VXD_CBIT(cvirt, 8))
18807 + dump_sysinfo(val);
18810 + mcg = mem_cgroup_from_task(current);
18811 + if (VXD_CBIT(cvirt, 9))
18812 + dump_mem_cgroup(mcg);
18813 + rcu_read_unlock();
18817 + // cache = mem_cgroup_stat_read_cache(mcg);
18822 +#endif /* !CONFIG_MEMCG */
18824 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/limit_init.h linux-4.4/kernel/vserver/limit_init.h
18825 --- linux-4.4/kernel/vserver/limit_init.h 1970-01-01 01:00:00.000000000 +0100
18826 +++ linux-4.4/kernel/vserver/limit_init.h 2021-02-24 16:56:24.612823601 +0100
18830 +static inline void vx_info_init_limit(struct _vx_limit *limit)
18834 + for (lim = 0; lim < NUM_LIMITS; lim++) {
18835 + __rlim_soft(limit, lim) = RLIM_INFINITY;
18836 + __rlim_hard(limit, lim) = RLIM_INFINITY;
18837 + __rlim_set(limit, lim, 0);
18838 + atomic_set(&__rlim_lhit(limit, lim), 0);
18839 + __rlim_rmin(limit, lim) = 0;
18840 + __rlim_rmax(limit, lim) = 0;
18844 +static inline void vx_info_exit_limit(struct _vx_limit *limit)
18849 + for (lim = 0; lim < NUM_LIMITS; lim++) {
18850 + if ((1 << lim) & VLIM_NOCHECK)
18852 + value = __rlim_get(limit, lim);
18853 + vxwprintk_xid(value,
18854 + "!!! limit: %p[%s,%d] = %ld on exit.",
18855 + limit, vlimit_name[lim], lim, (long)value);
18859 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/limit_proc.h linux-4.4/kernel/vserver/limit_proc.h
18860 --- linux-4.4/kernel/vserver/limit_proc.h 1970-01-01 01:00:00.000000000 +0100
18861 +++ linux-4.4/kernel/vserver/limit_proc.h 2021-02-24 16:56:24.612823601 +0100
18863 +#ifndef _VX_LIMIT_PROC_H
18864 +#define _VX_LIMIT_PROC_H
18866 +#include <linux/vserver/limit_int.h>
18869 +#define VX_LIMIT_FMT ":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
18870 +#define VX_LIMIT_TOP \
18871 + "Limit\t current\t min/max\t\t soft/hard\t\thits\n"
18873 +#define VX_LIMIT_ARG(r) \
18874 + (unsigned long)__rlim_get(limit, r), \
18875 + (unsigned long)__rlim_rmin(limit, r), \
18876 + (unsigned long)__rlim_rmax(limit, r), \
18877 + VX_VLIM(__rlim_soft(limit, r)), \
18878 + VX_VLIM(__rlim_hard(limit, r)), \
18879 + atomic_read(&__rlim_lhit(limit, r))
18881 +static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
18883 + vx_limit_fixup(limit, -1);
18884 + return sprintf(buffer, VX_LIMIT_TOP
18885 + "PROC" VX_LIMIT_FMT
18886 + "VM" VX_LIMIT_FMT
18887 + "VML" VX_LIMIT_FMT
18888 + "RSS" VX_LIMIT_FMT
18889 + "ANON" VX_LIMIT_FMT
18890 + "RMAP" VX_LIMIT_FMT
18891 + "FILES" VX_LIMIT_FMT
18892 + "OFD" VX_LIMIT_FMT
18893 + "LOCKS" VX_LIMIT_FMT
18894 + "SOCK" VX_LIMIT_FMT
18895 + "MSGQ" VX_LIMIT_FMT
18896 + "SHM" VX_LIMIT_FMT
18897 + "SEMA" VX_LIMIT_FMT
18898 + "SEMS" VX_LIMIT_FMT
18899 + "DENT" VX_LIMIT_FMT,
18900 + VX_LIMIT_ARG(RLIMIT_NPROC),
18901 + VX_LIMIT_ARG(RLIMIT_AS),
18902 + VX_LIMIT_ARG(RLIMIT_MEMLOCK),
18903 + VX_LIMIT_ARG(RLIMIT_RSS),
18904 + VX_LIMIT_ARG(VLIMIT_ANON),
18905 + VX_LIMIT_ARG(VLIMIT_MAPPED),
18906 + VX_LIMIT_ARG(RLIMIT_NOFILE),
18907 + VX_LIMIT_ARG(VLIMIT_OPENFD),
18908 + VX_LIMIT_ARG(RLIMIT_LOCKS),
18909 + VX_LIMIT_ARG(VLIMIT_NSOCK),
18910 + VX_LIMIT_ARG(RLIMIT_MSGQUEUE),
18911 + VX_LIMIT_ARG(VLIMIT_SHMEM),
18912 + VX_LIMIT_ARG(VLIMIT_SEMARY),
18913 + VX_LIMIT_ARG(VLIMIT_NSEMS),
18914 + VX_LIMIT_ARG(VLIMIT_DENTRY));
18917 +#endif /* _VX_LIMIT_PROC_H */
18920 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/network.c linux-4.4/kernel/vserver/network.c
18921 --- linux-4.4/kernel/vserver/network.c 1970-01-01 01:00:00.000000000 +0100
18922 +++ linux-4.4/kernel/vserver/network.c 2021-02-24 16:56:24.612823601 +0100
18925 + * linux/kernel/vserver/network.c
18927 + * Virtual Server: Network Support
18929 + * Copyright (C) 2003-2007 Herbert P?tzl
18931 + * V0.01 broken out from vcontext V0.05
18932 + * V0.02 cleaned up implementation
18933 + * V0.03 added equiv nx commands
18934 + * V0.04 switch to RCU based hash
18935 + * V0.05 and back to locking again
18936 + * V0.06 changed vcmds to nxi arg
18937 + * V0.07 have __create claim() the nxi
18941 +#include <linux/err.h>
18942 +#include <linux/slab.h>
18943 +#include <linux/rcupdate.h>
18944 +#include <net/ipv6.h>
18946 +#include <linux/vs_network.h>
18947 +#include <linux/vs_pid.h>
18948 +#include <linux/vserver/network_cmd.h>
18951 +atomic_t nx_global_ctotal = ATOMIC_INIT(0);
18952 +atomic_t nx_global_cactive = ATOMIC_INIT(0);
18954 +static struct kmem_cache *nx_addr_v4_cachep = NULL;
18955 +static struct kmem_cache *nx_addr_v6_cachep = NULL;
18958 +static int __init init_network(void)
18960 + nx_addr_v4_cachep = kmem_cache_create("nx_v4_addr_cache",
18961 + sizeof(struct nx_addr_v4), 0,
18962 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
18963 + nx_addr_v6_cachep = kmem_cache_create("nx_v6_addr_cache",
18964 + sizeof(struct nx_addr_v6), 0,
18965 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
18970 +/* __alloc_nx_addr_v4() */
18972 +static inline struct nx_addr_v4 *__alloc_nx_addr_v4(void)
18974 + struct nx_addr_v4 *nxa = kmem_cache_alloc(
18975 + nx_addr_v4_cachep, GFP_KERNEL);
18977 + if (!IS_ERR(nxa))
18978 + memset(nxa, 0, sizeof(*nxa));
18982 +/* __dealloc_nx_addr_v4() */
18984 +static inline void __dealloc_nx_addr_v4(struct nx_addr_v4 *nxa)
18986 + kmem_cache_free(nx_addr_v4_cachep, nxa);
18989 +/* __dealloc_nx_addr_v4_all() */
18991 +static inline void __dealloc_nx_addr_v4_all(struct nx_addr_v4 *nxa)
18994 + struct nx_addr_v4 *next = nxa->next;
18996 + __dealloc_nx_addr_v4(nxa);
19002 +#ifdef CONFIG_IPV6
19004 +/* __alloc_nx_addr_v6() */
19006 +static inline struct nx_addr_v6 *__alloc_nx_addr_v6(void)
19008 + struct nx_addr_v6 *nxa = kmem_cache_alloc(
19009 + nx_addr_v6_cachep, GFP_KERNEL);
19011 + if (!IS_ERR(nxa))
19012 + memset(nxa, 0, sizeof(*nxa));
19016 +/* __dealloc_nx_addr_v6() */
19018 +static inline void __dealloc_nx_addr_v6(struct nx_addr_v6 *nxa)
19020 + kmem_cache_free(nx_addr_v6_cachep, nxa);
19023 +/* __dealloc_nx_addr_v6_all() */
19025 +static inline void __dealloc_nx_addr_v6_all(struct nx_addr_v6 *nxa)
19028 + struct nx_addr_v6 *next = nxa->next;
19030 + __dealloc_nx_addr_v6(nxa);
19035 +#endif /* CONFIG_IPV6 */
19037 +/* __alloc_nx_info()
19039 + * allocate an initialized nx_info struct
19040 + * doesn't make it visible (hash) */
19042 +static struct nx_info *__alloc_nx_info(vnid_t nid)
19044 + struct nx_info *new = NULL;
19046 + vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
19048 + /* would this benefit from a slab cache? */
19049 + new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
19053 + memset(new, 0, sizeof(struct nx_info));
19054 + new->nx_id = nid;
19055 + INIT_HLIST_NODE(&new->nx_hlist);
19056 + atomic_set(&new->nx_usecnt, 0);
19057 + atomic_set(&new->nx_tasks, 0);
19058 + spin_lock_init(&new->addr_lock);
19059 + new->nx_state = 0;
19061 + new->nx_flags = NXF_INIT_SET;
19063 + /* rest of init goes here */
19065 + new->v4_lback.s_addr = htonl(INADDR_LOOPBACK);
19066 + new->v4_bcast.s_addr = htonl(INADDR_BROADCAST);
19068 + vxdprintk(VXD_CBIT(nid, 0),
19069 + "alloc_nx_info(%d) = %p", nid, new);
19070 + atomic_inc(&nx_global_ctotal);
19074 +/* __dealloc_nx_info()
19076 + * final disposal of nx_info */
19078 +static void __dealloc_nx_info(struct nx_info *nxi)
19080 + vxdprintk(VXD_CBIT(nid, 0),
19081 + "dealloc_nx_info(%p)", nxi);
19083 + nxi->nx_hlist.next = LIST_POISON1;
19086 + BUG_ON(atomic_read(&nxi->nx_usecnt));
19087 + BUG_ON(atomic_read(&nxi->nx_tasks));
19089 + __dealloc_nx_addr_v4_all(nxi->v4.next);
19090 +#ifdef CONFIG_IPV6
19091 + __dealloc_nx_addr_v6_all(nxi->v6.next);
19094 + nxi->nx_state |= NXS_RELEASED;
19096 + atomic_dec(&nx_global_ctotal);
19099 +static void __shutdown_nx_info(struct nx_info *nxi)
19101 + nxi->nx_state |= NXS_SHUTDOWN;
19102 + vs_net_change(nxi, VSC_NETDOWN);
19105 +/* exported stuff */
19107 +void free_nx_info(struct nx_info *nxi)
19109 + /* context shutdown is mandatory */
19110 + BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
19112 + /* context must not be hashed */
19113 + BUG_ON(nxi->nx_state & NXS_HASHED);
19115 + BUG_ON(atomic_read(&nxi->nx_usecnt));
19116 + BUG_ON(atomic_read(&nxi->nx_tasks));
19118 + __dealloc_nx_info(nxi);
19122 +void __nx_set_lback(struct nx_info *nxi)
19124 + int nid = nxi->nx_id;
19125 + __be32 lback = htonl(INADDR_LOOPBACK ^ ((nid & 0xFFFF) << 8));
19127 + nxi->v4_lback.s_addr = lback;
19130 +extern int __nx_inet_add_lback(__be32 addr);
19131 +extern int __nx_inet_del_lback(__be32 addr);
19134 +/* hash table for nx_info hash */
19136 +#define NX_HASH_SIZE 13
19138 +struct hlist_head nx_info_hash[NX_HASH_SIZE];
19140 +static DEFINE_SPINLOCK(nx_info_hash_lock);
19143 +static inline unsigned int __hashval(vnid_t nid)
19145 + return (nid % NX_HASH_SIZE);
19150 +/* __hash_nx_info()
19152 + * add the nxi to the global hash table
19153 + * requires the hash_lock to be held */
19155 +static inline void __hash_nx_info(struct nx_info *nxi)
19157 + struct hlist_head *head;
19159 + vxd_assert_lock(&nx_info_hash_lock);
19160 + vxdprintk(VXD_CBIT(nid, 4),
19161 + "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
19163 + /* context must not be hashed */
19164 + BUG_ON(nx_info_state(nxi, NXS_HASHED));
19166 + nxi->nx_state |= NXS_HASHED;
19167 + head = &nx_info_hash[__hashval(nxi->nx_id)];
19168 + hlist_add_head(&nxi->nx_hlist, head);
19169 + atomic_inc(&nx_global_cactive);
19172 +/* __unhash_nx_info()
19174 + * remove the nxi from the global hash table
19175 + * requires the hash_lock to be held */
19177 +static inline void __unhash_nx_info(struct nx_info *nxi)
19179 + vxd_assert_lock(&nx_info_hash_lock);
19180 + vxdprintk(VXD_CBIT(nid, 4),
19181 + "__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
19182 + atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
19184 + /* context must be hashed */
19185 + BUG_ON(!nx_info_state(nxi, NXS_HASHED));
19186 + /* but without tasks */
19187 + BUG_ON(atomic_read(&nxi->nx_tasks));
19189 + nxi->nx_state &= ~NXS_HASHED;
19190 + hlist_del(&nxi->nx_hlist);
19191 + atomic_dec(&nx_global_cactive);
19195 +/* __lookup_nx_info()
19197 + * requires the hash_lock to be held
19198 + * doesn't increment the nx_refcnt */
19200 +static inline struct nx_info *__lookup_nx_info(vnid_t nid)
19202 + struct hlist_head *head = &nx_info_hash[__hashval(nid)];
19203 + struct hlist_node *pos;
19204 + struct nx_info *nxi;
19206 + vxd_assert_lock(&nx_info_hash_lock);
19207 + hlist_for_each(pos, head) {
19208 + nxi = hlist_entry(pos, struct nx_info, nx_hlist);
19210 + if (nxi->nx_id == nid)
19215 + vxdprintk(VXD_CBIT(nid, 0),
19216 + "__lookup_nx_info(#%u): %p[#%u]",
19217 + nid, nxi, nxi ? nxi->nx_id : 0);
19222 +/* __create_nx_info()
19224 + * create the requested context
19225 + * get(), claim() and hash it */
19227 +static struct nx_info *__create_nx_info(int id)
19229 + struct nx_info *new, *nxi = NULL;
19231 + vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
19233 + if (!(new = __alloc_nx_info(id)))
19234 + return ERR_PTR(-ENOMEM);
19236 + /* required to make dynamic xids unique */
19237 + spin_lock(&nx_info_hash_lock);
19239 + /* static context requested */
19240 + if ((nxi = __lookup_nx_info(id))) {
19241 + vxdprintk(VXD_CBIT(nid, 0),
19242 + "create_nx_info(%d) = %p (already there)", id, nxi);
19243 + if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
19244 + nxi = ERR_PTR(-EBUSY);
19246 + nxi = ERR_PTR(-EEXIST);
19249 + /* new context */
19250 + vxdprintk(VXD_CBIT(nid, 0),
19251 + "create_nx_info(%d) = %p (new)", id, new);
19252 + claim_nx_info(new, NULL);
19253 + __nx_set_lback(new);
19254 + __hash_nx_info(get_nx_info(new));
19255 + nxi = new, new = NULL;
19258 + spin_unlock(&nx_info_hash_lock);
19260 + __dealloc_nx_info(new);
19266 +/* exported stuff */
19269 +void unhash_nx_info(struct nx_info *nxi)
19271 + __shutdown_nx_info(nxi);
19272 + spin_lock(&nx_info_hash_lock);
19273 + __unhash_nx_info(nxi);
19274 + spin_unlock(&nx_info_hash_lock);
19277 +/* lookup_nx_info()
19279 + * search for a nx_info and get() it
19280 + * negative id means current */
19282 +struct nx_info *lookup_nx_info(int id)
19284 + struct nx_info *nxi = NULL;
19287 + nxi = get_nx_info(current_nx_info());
19288 + } else if (id > 1) {
19289 + spin_lock(&nx_info_hash_lock);
19290 + nxi = get_nx_info(__lookup_nx_info(id));
19291 + spin_unlock(&nx_info_hash_lock);
19296 +/* nid_is_hashed()
19298 + * verify that nid is still hashed */
19300 +int nid_is_hashed(vnid_t nid)
19304 + spin_lock(&nx_info_hash_lock);
19305 + hashed = (__lookup_nx_info(nid) != NULL);
19306 + spin_unlock(&nx_info_hash_lock);
19311 +#ifdef CONFIG_PROC_FS
19315 + * get a subset of hashed nids for proc
19316 + * assumes size is at least one */
19318 +int get_nid_list(int index, unsigned int *nids, int size)
19320 + int hindex, nr_nids = 0;
19322 + /* only show current and children */
19323 + if (!nx_check(0, VS_ADMIN | VS_WATCH)) {
19326 + nids[nr_nids] = nx_current_nid();
19330 + for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
19331 + struct hlist_head *head = &nx_info_hash[hindex];
19332 + struct hlist_node *pos;
19334 + spin_lock(&nx_info_hash_lock);
19335 + hlist_for_each(pos, head) {
19336 + struct nx_info *nxi;
19341 + nxi = hlist_entry(pos, struct nx_info, nx_hlist);
19342 + nids[nr_nids] = nxi->nx_id;
19343 + if (++nr_nids >= size) {
19344 + spin_unlock(&nx_info_hash_lock);
19348 + /* keep the lock time short */
19349 + spin_unlock(&nx_info_hash_lock);
19358 + * migrate task to new network
19359 + * gets nxi, puts old_nxi on change
19362 +int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
19364 + struct nx_info *old_nxi;
19370 + vxdprintk(VXD_CBIT(nid, 5),
19371 + "nx_migrate_task(%p,%p[#%d.%d.%d])",
19372 + p, nxi, nxi->nx_id,
19373 + atomic_read(&nxi->nx_usecnt),
19374 + atomic_read(&nxi->nx_tasks));
19376 + if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
19377 + !nx_info_flags(nxi, NXF_STATE_SETUP, 0))
19380 + if (nx_info_state(nxi, NXS_SHUTDOWN))
19383 + /* maybe disallow this completely? */
19384 + old_nxi = task_get_nx_info(p);
19385 + if (old_nxi == nxi)
19390 + clr_nx_info(&p->nx_info);
19391 + claim_nx_info(nxi, p);
19392 + set_nx_info(&p->nx_info, nxi);
19393 + p->nid = nxi->nx_id;
19396 + vxdprintk(VXD_CBIT(nid, 5),
19397 + "moved task %p into nxi:%p[#%d]",
19398 + p, nxi, nxi->nx_id);
19401 + release_nx_info(old_nxi, p);
19404 + put_nx_info(old_nxi);
19409 +void nx_set_persistent(struct nx_info *nxi)
19411 + vxdprintk(VXD_CBIT(nid, 6),
19412 + "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
19414 + get_nx_info(nxi);
19415 + claim_nx_info(nxi, NULL);
19418 +void nx_clear_persistent(struct nx_info *nxi)
19420 + vxdprintk(VXD_CBIT(nid, 6),
19421 + "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
19423 + release_nx_info(nxi, NULL);
19424 + put_nx_info(nxi);
19427 +void nx_update_persistent(struct nx_info *nxi)
19429 + if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
19430 + nx_set_persistent(nxi);
19432 + nx_clear_persistent(nxi);
19435 +/* vserver syscall commands below here */
19437 +/* taks nid and nx_info functions */
19439 +#include <asm/uaccess.h>
19442 +int vc_task_nid(uint32_t id)
19447 + struct task_struct *tsk;
19450 + tsk = find_task_by_real_pid(id);
19451 + nid = (tsk) ? tsk->nid : -ESRCH;
19452 + rcu_read_unlock();
19454 + nid = nx_current_nid();
19459 +int vc_nx_info(struct nx_info *nxi, void __user *data)
19461 + struct vcmd_nx_info_v0 vc_data;
19463 + vc_data.nid = nxi->nx_id;
19465 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19471 +/* network functions */
19473 +int vc_net_create(uint32_t nid, void __user *data)
19475 + struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
19476 + struct nx_info *new_nxi;
19479 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19482 + if ((nid > MAX_S_CONTEXT) || (nid < 2))
19485 + new_nxi = __create_nx_info(nid);
19486 + if (IS_ERR(new_nxi))
19487 + return PTR_ERR(new_nxi);
19489 + /* initial flags */
19490 + new_nxi->nx_flags = vc_data.flagword;
19493 + if (vs_net_change(new_nxi, VSC_NETUP))
19496 + ret = nx_migrate_task(current, new_nxi);
19500 + /* return context id on success */
19501 + ret = new_nxi->nx_id;
19503 + /* get a reference for persistent contexts */
19504 + if ((vc_data.flagword & NXF_PERSISTENT))
19505 + nx_set_persistent(new_nxi);
19507 + release_nx_info(new_nxi, NULL);
19508 + put_nx_info(new_nxi);
19513 +int vc_net_migrate(struct nx_info *nxi, void __user *data)
19515 + return nx_migrate_task(current, nxi);
19520 +struct nx_addr_v4 *__find_v4_addr(struct nx_info *nxi,
19521 + __be32 ip, __be32 ip2, __be32 mask, uint16_t type, uint16_t flags,
19522 + struct nx_addr_v4 **prev)
19524 + struct nx_addr_v4 *nxa = &nxi->v4;
19526 + for (; nxa; nxa = nxa->next) {
19527 + if ((nxa->ip[0].s_addr == ip) &&
19528 + (nxa->ip[1].s_addr == ip2) &&
19529 + (nxa->mask.s_addr == mask) &&
19530 + (nxa->type == type) &&
19531 + (nxa->flags == flags))
19534 + /* save previous entry */
19541 +int do_add_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
19542 + uint16_t type, uint16_t flags)
19544 + struct nx_addr_v4 *nxa = NULL;
19545 + struct nx_addr_v4 *new = __alloc_nx_addr_v4();
19546 + unsigned long irqflags;
19547 + int ret = -EEXIST;
19550 + return PTR_ERR(new);
19552 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
19553 + if (__find_v4_addr(nxi, ip, ip2, mask, type, flags, &nxa))
19556 + if (NX_IPV4(nxi)) {
19561 + /* remove single ip for ip list */
19562 + nxi->nx_flags &= ~NXF_SINGLE_IP;
19565 + nxa->ip[0].s_addr = ip;
19566 + nxa->ip[1].s_addr = ip2;
19567 + nxa->mask.s_addr = mask;
19568 + nxa->type = type;
19569 + nxa->flags = flags;
19572 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
19574 + __dealloc_nx_addr_v4(new);
19578 +int do_remove_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
19579 + uint16_t type, uint16_t flags)
19581 + struct nx_addr_v4 *nxa = NULL;
19582 + struct nx_addr_v4 *old = NULL;
19583 + unsigned long irqflags;
19586 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
19588 + case NXA_TYPE_ADDR:
19589 + old = __find_v4_addr(nxi, ip, ip2, mask, type, flags, &nxa);
19592 + nxa->next = old->next;
19593 + old->next = NULL;
19599 + old->next = NULL;
19601 + memset(old, 0, sizeof(*old));
19609 + case NXA_TYPE_ANY:
19612 + memset(nxa, 0, sizeof(*nxa));
19618 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
19619 + __dealloc_nx_addr_v4_all(old);
19624 +int vc_net_add(struct nx_info *nxi, void __user *data)
19626 + struct vcmd_net_addr_v0 vc_data;
19627 + int index, ret = 0;
19629 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19632 + switch (vc_data.type) {
19633 + case NXA_TYPE_IPV4:
19634 + if ((vc_data.count < 1) || (vc_data.count > 4))
19638 + while (index < vc_data.count) {
19639 + ret = do_add_v4_addr(nxi, vc_data.ip[index].s_addr, 0,
19640 + vc_data.mask[index].s_addr, NXA_TYPE_ADDR, 0);
19648 + case NXA_TYPE_IPV4|NXA_MOD_BCAST:
19649 + nxi->v4_bcast = vc_data.ip[0];
19653 + case NXA_TYPE_IPV4|NXA_MOD_LBACK:
19654 + nxi->v4_lback = vc_data.ip[0];
19665 +int vc_net_remove(struct nx_info *nxi, void __user *data)
19667 + struct vcmd_net_addr_v0 vc_data;
19669 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19672 + switch (vc_data.type) {
19673 + case NXA_TYPE_ANY:
19674 + return do_remove_v4_addr(nxi, 0, 0, 0, vc_data.type, 0);
19682 +int vc_net_add_ipv4_v1(struct nx_info *nxi, void __user *data)
19684 + struct vcmd_net_addr_ipv4_v1 vc_data;
19686 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19689 + switch (vc_data.type) {
19690 + case NXA_TYPE_ADDR:
19691 + case NXA_TYPE_MASK:
19692 + return do_add_v4_addr(nxi, vc_data.ip.s_addr, 0,
19693 + vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19695 + case NXA_TYPE_ADDR | NXA_MOD_BCAST:
19696 + nxi->v4_bcast = vc_data.ip;
19699 + case NXA_TYPE_ADDR | NXA_MOD_LBACK:
19700 + nxi->v4_lback = vc_data.ip;
19709 +int vc_net_add_ipv4(struct nx_info *nxi, void __user *data)
19711 + struct vcmd_net_addr_ipv4_v2 vc_data;
19713 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19716 + switch (vc_data.type) {
19717 + case NXA_TYPE_ADDR:
19718 + case NXA_TYPE_MASK:
19719 + case NXA_TYPE_RANGE:
19720 + return do_add_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
19721 + vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19723 + case NXA_TYPE_ADDR | NXA_MOD_BCAST:
19724 + nxi->v4_bcast = vc_data.ip;
19727 + case NXA_TYPE_ADDR | NXA_MOD_LBACK:
19728 + nxi->v4_lback = vc_data.ip;
19737 +int vc_net_rem_ipv4_v1(struct nx_info *nxi, void __user *data)
19739 + struct vcmd_net_addr_ipv4_v1 vc_data;
19741 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19744 + return do_remove_v4_addr(nxi, vc_data.ip.s_addr, 0,
19745 + vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19748 +int vc_net_rem_ipv4(struct nx_info *nxi, void __user *data)
19750 + struct vcmd_net_addr_ipv4_v2 vc_data;
19752 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19755 + return do_remove_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
19756 + vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19759 +#ifdef CONFIG_IPV6
19762 +struct nx_addr_v6 *__find_v6_addr(struct nx_info *nxi,
19763 + struct in6_addr *ip, struct in6_addr *mask,
19764 + uint32_t prefix, uint16_t type, uint16_t flags,
19765 + struct nx_addr_v6 **prev)
19767 + struct nx_addr_v6 *nxa = &nxi->v6;
19769 + for (; nxa; nxa = nxa->next) {
19770 + if (ipv6_addr_equal(&nxa->ip, ip) &&
19771 + ipv6_addr_equal(&nxa->mask, mask) &&
19772 + (nxa->prefix == prefix) &&
19773 + (nxa->type == type) &&
19774 + (nxa->flags == flags))
19777 + /* save previous entry */
19785 +int do_add_v6_addr(struct nx_info *nxi,
19786 + struct in6_addr *ip, struct in6_addr *mask,
19787 + uint32_t prefix, uint16_t type, uint16_t flags)
19789 + struct nx_addr_v6 *nxa = NULL;
19790 + struct nx_addr_v6 *new = __alloc_nx_addr_v6();
19791 + unsigned long irqflags;
19792 + int ret = -EEXIST;
19795 + return PTR_ERR(new);
19797 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
19798 + if (__find_v6_addr(nxi, ip, mask, prefix, type, flags, &nxa))
19801 + if (NX_IPV6(nxi)) {
19808 + nxa->mask = *mask;
19809 + nxa->prefix = prefix;
19810 + nxa->type = type;
19811 + nxa->flags = flags;
19814 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
19816 + __dealloc_nx_addr_v6(new);
19820 +int do_remove_v6_addr(struct nx_info *nxi,
19821 + struct in6_addr *ip, struct in6_addr *mask,
19822 + uint32_t prefix, uint16_t type, uint16_t flags)
19824 + struct nx_addr_v6 *nxa = NULL;
19825 + struct nx_addr_v6 *old = NULL;
19826 + unsigned long irqflags;
19829 + spin_lock_irqsave(&nxi->addr_lock, irqflags);
19831 + case NXA_TYPE_ADDR:
19832 + old = __find_v6_addr(nxi, ip, mask, prefix, type, flags, &nxa);
19835 + nxa->next = old->next;
19836 + old->next = NULL;
19842 + old->next = NULL;
19844 + memset(old, 0, sizeof(*old));
19852 + case NXA_TYPE_ANY:
19855 + memset(nxa, 0, sizeof(*nxa));
19861 + spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
19862 + __dealloc_nx_addr_v6_all(old);
19866 +int vc_net_add_ipv6(struct nx_info *nxi, void __user *data)
19868 + struct vcmd_net_addr_ipv6_v1 vc_data;
19870 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19873 + switch (vc_data.type) {
19874 + case NXA_TYPE_ADDR:
19875 + memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
19876 + /* fallthrough */
19877 + case NXA_TYPE_MASK:
19878 + return do_add_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
19879 + vc_data.prefix, vc_data.type, vc_data.flags);
19886 +int vc_net_remove_ipv6(struct nx_info *nxi, void __user *data)
19888 + struct vcmd_net_addr_ipv6_v1 vc_data;
19890 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19893 + switch (vc_data.type) {
19894 + case NXA_TYPE_ADDR:
19895 + memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
19896 + /* fallthrough */
19897 + case NXA_TYPE_MASK:
19898 + return do_remove_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
19899 + vc_data.prefix, vc_data.type, vc_data.flags);
19900 + case NXA_TYPE_ANY:
19901 + return do_remove_v6_addr(nxi, NULL, NULL, 0, vc_data.type, 0);
19908 +#endif /* CONFIG_IPV6 */
19911 +int vc_get_nflags(struct nx_info *nxi, void __user *data)
19913 + struct vcmd_net_flags_v0 vc_data;
19915 + vc_data.flagword = nxi->nx_flags;
19917 + /* special STATE flag handling */
19918 + vc_data.mask = vs_mask_flags(~0ULL, nxi->nx_flags, NXF_ONE_TIME);
19920 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19925 +int vc_set_nflags(struct nx_info *nxi, void __user *data)
19927 + struct vcmd_net_flags_v0 vc_data;
19928 + uint64_t mask, trigger;
19930 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19933 + /* special STATE flag handling */
19934 + mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
19935 + trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
19937 + nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
19938 + vc_data.flagword, mask);
19939 + if (trigger & NXF_PERSISTENT)
19940 + nx_update_persistent(nxi);
19945 +int vc_get_ncaps(struct nx_info *nxi, void __user *data)
19947 + struct vcmd_net_caps_v0 vc_data;
19949 + vc_data.ncaps = nxi->nx_ncaps;
19950 + vc_data.cmask = ~0ULL;
19952 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19957 +int vc_set_ncaps(struct nx_info *nxi, void __user *data)
19959 + struct vcmd_net_caps_v0 vc_data;
19961 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19964 + nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
19965 + vc_data.ncaps, vc_data.cmask);
19970 +#include <linux/module.h>
19972 +module_init(init_network);
19974 +EXPORT_SYMBOL_GPL(free_nx_info);
19975 +EXPORT_SYMBOL_GPL(unhash_nx_info);
19977 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/proc.c linux-4.4/kernel/vserver/proc.c
19978 --- linux-4.4/kernel/vserver/proc.c 1970-01-01 01:00:00.000000000 +0100
19979 +++ linux-4.4/kernel/vserver/proc.c 2021-02-24 16:56:24.612823601 +0100
19982 + * linux/kernel/vserver/proc.c
19984 + * Virtual Context Support
19986 + * Copyright (C) 2003-2011 Herbert P?tzl
19988 + * V0.01 basic structure
19989 + * V0.02 adaptation vs1.3.0
19990 + * V0.03 proc permissions
19991 + * V0.04 locking/generic
19992 + * V0.05 next generation procfs
19993 + * V0.06 inode validation
19994 + * V0.07 generic rewrite vid
19995 + * V0.08 remove inode type
19996 + * V0.09 added u/wmask info
20000 +#include <linux/proc_fs.h>
20001 +#include <linux/fs_struct.h>
20002 +#include <linux/mount.h>
20003 +#include <linux/namei.h>
20004 +#include <asm/unistd.h>
20006 +#include <linux/vs_context.h>
20007 +#include <linux/vs_network.h>
20008 +#include <linux/vs_cvirt.h>
20010 +#include <linux/in.h>
20011 +#include <linux/inetdevice.h>
20012 +#include <linux/vs_inet.h>
20013 +#include <linux/vs_inet6.h>
20015 +#include <linux/vserver/global.h>
20017 +#include "cvirt_proc.h"
20018 +#include "cacct_proc.h"
20019 +#include "limit_proc.h"
20020 +#include "sched_proc.h"
20021 +#include "vci_config.h"
20023 +#include <../../fs/proc/internal.h>
20026 +static inline char *print_cap_t(char *buffer, kernel_cap_t *c)
20030 + CAP_FOR_EACH_U32(__capi) {
20031 + buffer += sprintf(buffer, "%08x",
20032 + c->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
20038 +static struct proc_dir_entry *proc_virtual;
20040 +static struct proc_dir_entry *proc_virtnet;
20043 +/* first the actual feeds */
20046 +static int proc_vci(char *buffer)
20048 + return sprintf(buffer,
20049 + "VCIVersion:\t%04x:%04x\n"
20050 + "VCISyscall:\t%d\n"
20051 + "VCIKernel:\t%08x\n",
20052 + VCI_VERSION >> 16,
20053 + VCI_VERSION & 0xFFFF,
20055 + vci_kernel_config());
20058 +static int proc_virtual_info(char *buffer)
20060 + return proc_vci(buffer);
20063 +static int proc_virtual_status(char *buffer)
20065 + return sprintf(buffer,
20067 + "#CActive:\t%d\n"
20068 + "#NSProxy:\t%d\t%d %d %d %d %d %d\n"
20069 + "#InitTask:\t%d\t%d %d\n",
20070 + atomic_read(&vx_global_ctotal),
20071 + atomic_read(&vx_global_cactive),
20072 + atomic_read(&vs_global_nsproxy),
20073 + atomic_read(&vs_global_fs),
20074 + atomic_read(&vs_global_mnt_ns),
20075 + atomic_read(&vs_global_uts_ns),
20076 + atomic_read(&nr_ipc_ns),
20077 + atomic_read(&vs_global_user_ns),
20078 + atomic_read(&vs_global_pid_ns),
20079 + atomic_read(&init_task.usage),
20080 + atomic_read(&init_task.nsproxy->count),
20081 + init_task.fs->users);
20085 +int proc_vxi_info(struct vx_info *vxi, char *buffer)
20089 + length = sprintf(buffer,
20097 + vxi->vx_badness_bias);
20101 +int proc_vxi_status(struct vx_info *vxi, char *buffer)
20103 + char *orig = buffer;
20105 + buffer += sprintf(buffer,
20108 + "Flags:\t%016llx\n",
20109 + atomic_read(&vxi->vx_usecnt),
20110 + atomic_read(&vxi->vx_tasks),
20111 + (unsigned long long)vxi->vx_flags);
20113 + buffer += sprintf(buffer, "BCaps:\t");
20114 + buffer = print_cap_t(buffer, &vxi->vx_bcaps);
20115 + buffer += sprintf(buffer, "\n");
20117 + buffer += sprintf(buffer,
20118 + "CCaps:\t%016llx\n"
20119 + "Umask:\t%16llx\n"
20120 + "Wmask:\t%16llx\n"
20121 + "Spaces:\t%08lx %08lx\n",
20122 + (unsigned long long)vxi->vx_ccaps,
20123 + (unsigned long long)vxi->vx_umask,
20124 + (unsigned long long)vxi->vx_wmask,
20125 + vxi->space[0].vx_nsmask, vxi->space[1].vx_nsmask);
20126 + return buffer - orig;
20129 +int proc_vxi_limit(struct vx_info *vxi, char *buffer)
20131 + return vx_info_proc_limit(&vxi->limit, buffer);
20134 +int proc_vxi_sched(struct vx_info *vxi, char *buffer)
20138 + length = vx_info_proc_sched(&vxi->sched, buffer);
20139 + for_each_online_cpu(cpu) {
20140 + length += vx_info_proc_sched_pc(
20141 + &vx_per_cpu(vxi, sched_pc, cpu),
20142 + buffer + length, cpu);
20147 +int proc_vxi_nsproxy0(struct vx_info *vxi, char *buffer)
20149 + return vx_info_proc_nsproxy(vxi->space[0].vx_nsproxy, buffer);
20152 +int proc_vxi_nsproxy1(struct vx_info *vxi, char *buffer)
20154 + return vx_info_proc_nsproxy(vxi->space[1].vx_nsproxy, buffer);
20157 +int proc_vxi_cvirt(struct vx_info *vxi, char *buffer)
20161 + vx_update_load(vxi);
20162 + length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
20163 + for_each_online_cpu(cpu) {
20164 + length += vx_info_proc_cvirt_pc(
20165 + &vx_per_cpu(vxi, cvirt_pc, cpu),
20166 + buffer + length, cpu);
20171 +int proc_vxi_cacct(struct vx_info *vxi, char *buffer)
20173 + return vx_info_proc_cacct(&vxi->cacct, buffer);
20177 +static int proc_virtnet_info(char *buffer)
20179 + return proc_vci(buffer);
20182 +static int proc_virtnet_status(char *buffer)
20184 + return sprintf(buffer,
20186 + "#CActive:\t%d\n",
20187 + atomic_read(&nx_global_ctotal),
20188 + atomic_read(&nx_global_cactive));
20191 +int proc_nxi_info(struct nx_info *nxi, char *buffer)
20193 + struct nx_addr_v4 *v4a;
20194 +#ifdef CONFIG_IPV6
20195 + struct nx_addr_v6 *v6a;
20199 + length = sprintf(buffer,
20202 + "Bcast:\t" NIPQUAD_FMT "\n"
20203 + "Lback:\t" NIPQUAD_FMT "\n",
20206 + NIPQUAD(nxi->v4_bcast.s_addr),
20207 + NIPQUAD(nxi->v4_lback.s_addr));
20209 + if (!NX_IPV4(nxi))
20211 + for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
20212 + length += sprintf(buffer + length, "%d:\t" NXAV4_FMT "\n",
20215 +#ifdef CONFIG_IPV6
20216 + if (!NX_IPV6(nxi))
20218 + for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
20219 + length += sprintf(buffer + length, "%d:\t" NXAV6_FMT "\n",
20226 +int proc_nxi_status(struct nx_info *nxi, char *buffer)
20230 + length = sprintf(buffer,
20233 + "Flags:\t%016llx\n"
20234 + "NCaps:\t%016llx\n",
20235 + atomic_read(&nxi->nx_usecnt),
20236 + atomic_read(&nxi->nx_tasks),
20237 + (unsigned long long)nxi->nx_flags,
20238 + (unsigned long long)nxi->nx_ncaps);
20244 +/* here the inode helpers */
20250 + struct inode_operations *iop;
20251 + struct file_operations *fop;
20252 + union proc_op op;
20255 +static struct inode *vs_proc_make_inode(struct super_block *sb, struct vs_entry *p)
20257 + struct inode *inode = new_inode(sb);
20262 + inode->i_mode = p->mode;
20264 + inode->i_op = p->iop;
20266 + inode->i_fop = p->fop;
20268 + set_nlink(inode, (p->mode & S_IFDIR) ? 2 : 1);
20269 + inode->i_flags |= S_IMMUTABLE;
20271 + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
20273 + i_uid_write(inode, 0);
20274 + i_gid_write(inode, 0);
20275 + i_tag_write(inode, 0);
20280 +static struct dentry *vs_proc_instantiate(struct inode *dir,
20281 + struct dentry *dentry, int id, void *ptr)
20283 + struct vs_entry *p = ptr;
20284 + struct inode *inode = vs_proc_make_inode(dir->i_sb, p);
20285 + struct dentry *error = ERR_PTR(-EINVAL);
20290 + PROC_I(inode)->op = p->op;
20291 + PROC_I(inode)->fd = id;
20292 + d_add(dentry, inode);
20300 +typedef struct dentry *vx_instantiate_t(struct inode *, struct dentry *, int, void *);
20304 + * Fill a directory entry.
20306 + * If possible create the dcache entry and derive our inode number and
20307 + * file type from dcache entry.
20309 + * Since all of the proc inode numbers are dynamically generated, the inode
20310 + * numbers do not exist until the inode is cache. This means creating the
20311 + * the dcache entry in iterate is necessary to keep the inode numbers
20312 + * reported by iterate in sync with the inode numbers reported
20315 +static int vx_proc_fill_cache(struct file *filp, struct dir_context *ctx,
20316 + char *name, int len, vx_instantiate_t instantiate, int id, void *ptr)
20318 + struct dentry *child, *dir = filp->f_path.dentry;
20319 + struct inode *inode;
20320 + struct qstr qname;
20322 + unsigned type = DT_UNKNOWN;
20324 + qname.name = name;
20326 + qname.hash = full_name_hash(name, len);
20328 + child = d_lookup(dir, &qname);
20330 + struct dentry *new;
20331 + new = d_alloc(dir, &qname);
20333 + child = instantiate(dir->d_inode, new, id, ptr);
20340 + if (!child || IS_ERR(child) || !child->d_inode)
20341 + goto end_instantiate;
20342 + inode = child->d_inode;
20344 + ino = inode->i_ino;
20345 + type = inode->i_mode >> 12;
20351 + return !dir_emit(ctx, name, len, ino, type);
20356 +/* get and revalidate vx_info/xid */
20359 +struct vx_info *get_proc_vx_info(struct inode *inode)
20361 + return lookup_vx_info(PROC_I(inode)->fd);
20364 +static int proc_xid_revalidate(struct dentry *dentry, unsigned int flags)
20366 + struct inode *inode = dentry->d_inode;
20367 + vxid_t xid = PROC_I(inode)->fd;
20369 + if (flags & LOOKUP_RCU) /* FIXME: can be dropped? */
20372 + if (!xid || xid_is_hashed(xid))
20379 +/* get and revalidate nx_info/nid */
20381 +static int proc_nid_revalidate(struct dentry *dentry, unsigned int flags)
20383 + struct inode *inode = dentry->d_inode;
20384 + vnid_t nid = PROC_I(inode)->fd;
20386 + if (flags & LOOKUP_RCU) /* FIXME: can be dropped? */
20389 + if (!nid || nid_is_hashed(nid))
20397 +#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
20399 +static ssize_t proc_vs_info_read(struct file *file, char __user *buf,
20400 + size_t count, loff_t *ppos)
20402 + struct inode *inode = file->f_path.dentry->d_inode;
20403 + unsigned long page;
20404 + ssize_t length = 0;
20406 + if (count > PROC_BLOCK_SIZE)
20407 + count = PROC_BLOCK_SIZE;
20409 + /* fade that out as soon as stable */
20410 + WARN_ON(PROC_I(inode)->fd);
20412 + if (!(page = __get_free_page(GFP_KERNEL)))
20415 + BUG_ON(!PROC_I(inode)->op.proc_vs_read);
20416 + length = PROC_I(inode)->op.proc_vs_read((char *)page);
20419 + length = simple_read_from_buffer(buf, count, ppos,
20420 + (char *)page, length);
20426 +static ssize_t proc_vx_info_read(struct file *file, char __user *buf,
20427 + size_t count, loff_t *ppos)
20429 + struct inode *inode = file->f_path.dentry->d_inode;
20430 + struct vx_info *vxi = NULL;
20431 + vxid_t xid = PROC_I(inode)->fd;
20432 + unsigned long page;
20433 + ssize_t length = 0;
20435 + if (count > PROC_BLOCK_SIZE)
20436 + count = PROC_BLOCK_SIZE;
20438 + /* fade that out as soon as stable */
20440 + vxi = lookup_vx_info(xid);
20444 + length = -ENOMEM;
20445 + if (!(page = __get_free_page(GFP_KERNEL)))
20448 + BUG_ON(!PROC_I(inode)->op.proc_vxi_read);
20449 + length = PROC_I(inode)->op.proc_vxi_read(vxi, (char *)page);
20452 + length = simple_read_from_buffer(buf, count, ppos,
20453 + (char *)page, length);
20457 + put_vx_info(vxi);
20462 +static ssize_t proc_nx_info_read(struct file *file, char __user *buf,
20463 + size_t count, loff_t *ppos)
20465 + struct inode *inode = file->f_path.dentry->d_inode;
20466 + struct nx_info *nxi = NULL;
20467 + vnid_t nid = PROC_I(inode)->fd;
20468 + unsigned long page;
20469 + ssize_t length = 0;
20471 + if (count > PROC_BLOCK_SIZE)
20472 + count = PROC_BLOCK_SIZE;
20474 + /* fade that out as soon as stable */
20476 + nxi = lookup_nx_info(nid);
20480 + length = -ENOMEM;
20481 + if (!(page = __get_free_page(GFP_KERNEL)))
20484 + BUG_ON(!PROC_I(inode)->op.proc_nxi_read);
20485 + length = PROC_I(inode)->op.proc_nxi_read(nxi, (char *)page);
20488 + length = simple_read_from_buffer(buf, count, ppos,
20489 + (char *)page, length);
20493 + put_nx_info(nxi);
20500 +/* here comes the lower level */
20503 +#define NOD(NAME, MODE, IOP, FOP, OP) { \
20504 + .len = sizeof(NAME) - 1, \
20505 + .name = (NAME), \
20513 +#define DIR(NAME, MODE, OTYPE) \
20514 + NOD(NAME, (S_IFDIR | (MODE)), \
20515 + &proc_ ## OTYPE ## _inode_operations, \
20516 + &proc_ ## OTYPE ## _file_operations, { } )
20518 +#define INF(NAME, MODE, OTYPE) \
20519 + NOD(NAME, (S_IFREG | (MODE)), NULL, \
20520 + &proc_vs_info_file_operations, \
20521 + { .proc_vs_read = &proc_##OTYPE } )
20523 +#define VINF(NAME, MODE, OTYPE) \
20524 + NOD(NAME, (S_IFREG | (MODE)), NULL, \
20525 + &proc_vx_info_file_operations, \
20526 + { .proc_vxi_read = &proc_##OTYPE } )
20528 +#define NINF(NAME, MODE, OTYPE) \
20529 + NOD(NAME, (S_IFREG | (MODE)), NULL, \
20530 + &proc_nx_info_file_operations, \
20531 + { .proc_nxi_read = &proc_##OTYPE } )
20534 +static struct file_operations proc_vs_info_file_operations = {
20535 + .read = proc_vs_info_read,
20538 +static struct file_operations proc_vx_info_file_operations = {
20539 + .read = proc_vx_info_read,
20542 +static struct dentry_operations proc_xid_dentry_operations = {
20543 + .d_revalidate = proc_xid_revalidate,
20546 +static struct vs_entry vx_base_stuff[] = {
20547 + VINF("info", S_IRUGO, vxi_info),
20548 + VINF("status", S_IRUGO, vxi_status),
20549 + VINF("limit", S_IRUGO, vxi_limit),
20550 + VINF("sched", S_IRUGO, vxi_sched),
20551 + VINF("nsproxy", S_IRUGO, vxi_nsproxy0),
20552 + VINF("nsproxy1",S_IRUGO, vxi_nsproxy1),
20553 + VINF("cvirt", S_IRUGO, vxi_cvirt),
20554 + VINF("cacct", S_IRUGO, vxi_cacct),
20561 +static struct dentry *proc_xid_instantiate(struct inode *dir,
20562 + struct dentry *dentry, int id, void *ptr)
20564 + dentry->d_op = &proc_xid_dentry_operations;
20565 + return vs_proc_instantiate(dir, dentry, id, ptr);
20568 +static struct dentry *proc_xid_lookup(struct inode *dir,
20569 + struct dentry *dentry, unsigned int flags)
20571 + struct vs_entry *p = vx_base_stuff;
20572 + struct dentry *error = ERR_PTR(-ENOENT);
20574 + for (; p->name; p++) {
20575 + if (p->len != dentry->d_name.len)
20577 + if (!memcmp(dentry->d_name.name, p->name, p->len))
20583 + error = proc_xid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
20588 +static int proc_xid_iterate(struct file *filp, struct dir_context *ctx)
20590 + struct dentry *dentry = filp->f_path.dentry;
20591 + struct inode *inode = dentry->d_inode;
20592 + struct vs_entry *p = vx_base_stuff;
20593 + int size = sizeof(vx_base_stuff) / sizeof(struct vs_entry);
20596 + if (!dir_emit_dots(filp, ctx))
20599 + index = ctx->pos - 2;
20600 + if (index < size) {
20601 + for (p += index; p->name; p++) {
20602 + if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
20603 + vs_proc_instantiate, PROC_I(inode)->fd, p))
20613 +static struct file_operations proc_nx_info_file_operations = {
20614 + .read = proc_nx_info_read,
20617 +static struct dentry_operations proc_nid_dentry_operations = {
20618 + .d_revalidate = proc_nid_revalidate,
20621 +static struct vs_entry nx_base_stuff[] = {
20622 + NINF("info", S_IRUGO, nxi_info),
20623 + NINF("status", S_IRUGO, nxi_status),
20628 +static struct dentry *proc_nid_instantiate(struct inode *dir,
20629 + struct dentry *dentry, int id, void *ptr)
20631 + dentry->d_op = &proc_nid_dentry_operations;
20632 + return vs_proc_instantiate(dir, dentry, id, ptr);
20635 +static struct dentry *proc_nid_lookup(struct inode *dir,
20636 + struct dentry *dentry, unsigned int flags)
20638 + struct vs_entry *p = nx_base_stuff;
20639 + struct dentry *error = ERR_PTR(-ENOENT);
20641 + for (; p->name; p++) {
20642 + if (p->len != dentry->d_name.len)
20644 + if (!memcmp(dentry->d_name.name, p->name, p->len))
20650 + error = proc_nid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
20655 +static int proc_nid_iterate(struct file *filp, struct dir_context *ctx)
20657 + struct dentry *dentry = filp->f_path.dentry;
20658 + struct inode *inode = dentry->d_inode;
20659 + struct vs_entry *p = nx_base_stuff;
20660 + int size = sizeof(nx_base_stuff) / sizeof(struct vs_entry);
20663 + if (!dir_emit_dots(filp, ctx))
20666 + index = ctx->pos - 2;
20667 + if (index < size) {
20668 + for (p += index; p->name; p++) {
20669 + if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
20670 + vs_proc_instantiate, PROC_I(inode)->fd, p))
20679 +#define MAX_MULBY10 ((~0U - 9) / 10)
20681 +static inline int atovid(const char *str, int len)
20686 + while (len-- > 0) {
20691 + if (vid >= MAX_MULBY10)
20701 +/* now the upper level (virtual) */
20704 +static struct file_operations proc_xid_file_operations = {
20705 + .read = generic_read_dir,
20706 + .iterate = proc_xid_iterate,
20709 +static struct inode_operations proc_xid_inode_operations = {
20710 + .lookup = proc_xid_lookup,
20713 +static struct vs_entry vx_virtual_stuff[] = {
20714 + INF("info", S_IRUGO, virtual_info),
20715 + INF("status", S_IRUGO, virtual_status),
20716 + DIR(NULL, S_IRUGO | S_IXUGO, xid),
20720 +static struct dentry *proc_virtual_lookup(struct inode *dir,
20721 + struct dentry *dentry, unsigned int flags)
20723 + struct vs_entry *p = vx_virtual_stuff;
20724 + struct dentry *error = ERR_PTR(-ENOENT);
20727 + for (; p->name; p++) {
20728 + if (p->len != dentry->d_name.len)
20730 + if (!memcmp(dentry->d_name.name, p->name, p->len))
20734 + goto instantiate;
20736 + id = atovid(dentry->d_name.name, dentry->d_name.len);
20737 + if ((id < 0) || !xid_is_hashed(id))
20741 + error = proc_xid_instantiate(dir, dentry, id, p);
20746 +static struct file_operations proc_nid_file_operations = {
20747 + .read = generic_read_dir,
20748 + .iterate = proc_nid_iterate,
20751 +static struct inode_operations proc_nid_inode_operations = {
20752 + .lookup = proc_nid_lookup,
20755 +static struct vs_entry nx_virtnet_stuff[] = {
20756 + INF("info", S_IRUGO, virtnet_info),
20757 + INF("status", S_IRUGO, virtnet_status),
20758 + DIR(NULL, S_IRUGO | S_IXUGO, nid),
20762 +static struct dentry *proc_virtnet_lookup(struct inode *dir,
20763 + struct dentry *dentry, unsigned int flags)
20765 + struct vs_entry *p = nx_virtnet_stuff;
20766 + struct dentry *error = ERR_PTR(-ENOENT);
20769 + for (; p->name; p++) {
20770 + if (p->len != dentry->d_name.len)
20772 + if (!memcmp(dentry->d_name.name, p->name, p->len))
20776 + goto instantiate;
20778 + id = atovid(dentry->d_name.name, dentry->d_name.len);
20779 + if ((id < 0) || !nid_is_hashed(id))
20783 + error = proc_nid_instantiate(dir, dentry, id, p);
20789 +#define PROC_MAXVIDS 32
20791 +int proc_virtual_iterate(struct file *filp, struct dir_context *ctx)
20793 + struct vs_entry *p = vx_virtual_stuff;
20794 + int size = sizeof(vx_virtual_stuff) / sizeof(struct vs_entry);
20796 + unsigned int xid_array[PROC_MAXVIDS];
20797 + char buf[PROC_NUMBUF];
20798 + unsigned int nr_xids, i;
20800 + if (!dir_emit_dots(filp, ctx))
20803 + index = ctx->pos - 2;
20804 + if (index < size) {
20805 + for (p += index; p->name; p++) {
20806 + if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
20807 + vs_proc_instantiate, 0, p))
20813 + index = ctx->pos - size;
20814 + p = &vx_virtual_stuff[size - 1];
20815 + nr_xids = get_xid_list(index, xid_array, PROC_MAXVIDS);
20816 + for (i = 0; i < nr_xids; i++) {
20817 + int n, xid = xid_array[i];
20818 + unsigned int j = PROC_NUMBUF;
20822 + buf[--j] = '0' + (n % 10);
20825 + if (vx_proc_fill_cache(filp, ctx,
20826 + buf + j, PROC_NUMBUF - j,
20827 + vs_proc_instantiate, xid, p))
20834 +static int proc_virtual_getattr(struct vfsmount *mnt,
20835 + struct dentry *dentry, struct kstat *stat)
20837 + struct inode *inode = dentry->d_inode;
20839 + generic_fillattr(inode, stat);
20840 + stat->nlink = 2 + atomic_read(&vx_global_cactive);
20844 +static struct file_operations proc_virtual_dir_operations = {
20845 + .read = generic_read_dir,
20846 + .iterate = proc_virtual_iterate,
20849 +static struct inode_operations proc_virtual_dir_inode_operations = {
20850 + .getattr = proc_virtual_getattr,
20851 + .lookup = proc_virtual_lookup,
20856 +int proc_virtnet_iterate(struct file *filp, struct dir_context *ctx)
20858 + struct vs_entry *p = nx_virtnet_stuff;
20859 + int size = sizeof(nx_virtnet_stuff) / sizeof(struct vs_entry);
20861 + unsigned int nid_array[PROC_MAXVIDS];
20862 + char buf[PROC_NUMBUF];
20863 + unsigned int nr_nids, i;
20865 + if (!dir_emit_dots(filp, ctx))
20868 + index = ctx->pos - 2;
20869 + if (index < size) {
20870 + for (p += index; p->name; p++) {
20871 + if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
20872 + vs_proc_instantiate, 0, p))
20878 + index = ctx->pos - size;
20879 + p = &nx_virtnet_stuff[size - 1];
20880 + nr_nids = get_nid_list(index, nid_array, PROC_MAXVIDS);
20881 + for (i = 0; i < nr_nids; i++) {
20882 + int n, nid = nid_array[i];
20883 + unsigned int j = PROC_NUMBUF;
20887 + buf[--j] = '0' + (n % 10);
20890 + if (vx_proc_fill_cache(filp, ctx,
20891 + buf + j, PROC_NUMBUF - j,
20892 + vs_proc_instantiate, nid, p))
20899 +static int proc_virtnet_getattr(struct vfsmount *mnt,
20900 + struct dentry *dentry, struct kstat *stat)
20902 + struct inode *inode = dentry->d_inode;
20904 + generic_fillattr(inode, stat);
20905 + stat->nlink = 2 + atomic_read(&nx_global_cactive);
20909 +static struct file_operations proc_virtnet_dir_operations = {
20910 + .read = generic_read_dir,
20911 + .iterate = proc_virtnet_iterate,
20914 +static struct inode_operations proc_virtnet_dir_inode_operations = {
20915 + .getattr = proc_virtnet_getattr,
20916 + .lookup = proc_virtnet_lookup,
20921 +void proc_vx_init(void)
20923 + struct proc_dir_entry *ent;
20925 + ent = proc_mkdir("virtual", 0);
20927 + ent->proc_fops = &proc_virtual_dir_operations;
20928 + ent->proc_iops = &proc_virtual_dir_inode_operations;
20930 + proc_virtual = ent;
20932 + ent = proc_mkdir("virtnet", 0);
20934 + ent->proc_fops = &proc_virtnet_dir_operations;
20935 + ent->proc_iops = &proc_virtnet_dir_inode_operations;
20937 + proc_virtnet = ent;
20943 +/* per pid info */
20945 +void render_cap_t(struct seq_file *, const char *,
20946 + struct vx_info *, kernel_cap_t *);
20949 +int proc_pid_vx_info(
20950 + struct seq_file *m,
20951 + struct pid_namespace *ns,
20953 + struct task_struct *p)
20955 + struct vx_info *vxi;
20957 + seq_printf(m, "XID:\t%d\n", vx_task_xid(p));
20959 + vxi = task_get_vx_info(p);
20963 + render_cap_t(m, "BCaps:\t", vxi, &vxi->vx_bcaps);
20964 + seq_printf(m, "CCaps:\t%016llx\n",
20965 + (unsigned long long)vxi->vx_ccaps);
20966 + seq_printf(m, "CFlags:\t%016llx\n",
20967 + (unsigned long long)vxi->vx_flags);
20968 + seq_printf(m, "CIPid:\t%d\n", vxi->vx_initpid);
20970 + put_vx_info(vxi);
20975 +int proc_pid_nx_info(
20976 + struct seq_file *m,
20977 + struct pid_namespace *ns,
20979 + struct task_struct *p)
20981 + struct nx_info *nxi;
20982 + struct nx_addr_v4 *v4a;
20983 +#ifdef CONFIG_IPV6
20984 + struct nx_addr_v6 *v6a;
20988 + seq_printf(m, "NID:\t%d\n", nx_task_nid(p));
20990 + nxi = task_get_nx_info(p);
20994 + seq_printf(m, "NCaps:\t%016llx\n",
20995 + (unsigned long long)nxi->nx_ncaps);
20996 + seq_printf(m, "NFlags:\t%016llx\n",
20997 + (unsigned long long)nxi->nx_flags);
20999 + seq_printf(m, "V4Root[bcast]:\t" NIPQUAD_FMT "\n",
21000 + NIPQUAD(nxi->v4_bcast.s_addr));
21001 + seq_printf(m, "V4Root[lback]:\t" NIPQUAD_FMT "\n",
21002 + NIPQUAD(nxi->v4_lback.s_addr));
21003 + if (!NX_IPV4(nxi))
21005 + for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
21006 + seq_printf(m, "V4Root[%d]:\t" NXAV4_FMT "\n",
21009 +#ifdef CONFIG_IPV6
21010 + if (!NX_IPV6(nxi))
21012 + for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
21013 + seq_printf(m, "V6Root[%d]:\t" NXAV6_FMT "\n",
21017 + put_nx_info(nxi);
21021 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/sched.c linux-4.4/kernel/vserver/sched.c
21022 --- linux-4.4/kernel/vserver/sched.c 1970-01-01 01:00:00.000000000 +0100
21023 +++ linux-4.4/kernel/vserver/sched.c 2021-02-24 16:56:24.612823601 +0100
21026 + * linux/kernel/vserver/sched.c
21028 + * Virtual Server: Scheduler Support
21030 + * Copyright (C) 2004-2010 Herbert P?tzl
21032 + * V0.01 adapted Sam Vilains version to 2.6.3
21033 + * V0.02 removed legacy interface
21034 + * V0.03 changed vcmds to vxi arg
21035 + * V0.04 removed older and legacy interfaces
21036 + * V0.05 removed scheduler code/commands
21040 +#include <linux/vs_context.h>
21041 +#include <linux/vs_sched.h>
21042 +#include <linux/cpumask.h>
21043 +#include <linux/vserver/sched_cmd.h>
21045 +#include <asm/uaccess.h>
21048 +void vx_update_sched_param(struct _vx_sched *sched,
21049 + struct _vx_sched_pc *sched_pc)
21051 + sched_pc->prio_bias = sched->prio_bias;
21054 +static int do_set_prio_bias(struct vx_info *vxi, struct vcmd_prio_bias *data)
21058 + if (data->prio_bias > MAX_PRIO_BIAS)
21059 + data->prio_bias = MAX_PRIO_BIAS;
21060 + if (data->prio_bias < MIN_PRIO_BIAS)
21061 + data->prio_bias = MIN_PRIO_BIAS;
21063 + if (data->cpu_id != ~0) {
21064 + vxi->sched.update = *get_cpu_mask(data->cpu_id);
21065 + cpumask_and(&vxi->sched.update, &vxi->sched.update,
21066 + cpu_online_mask);
21068 + cpumask_copy(&vxi->sched.update, cpu_online_mask);
21070 + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)vxi->sched.update)
21071 + vx_update_sched_param(&vxi->sched,
21072 + &vx_per_cpu(vxi, sched_pc, cpu));
21076 +int vc_set_prio_bias(struct vx_info *vxi, void __user *data)
21078 + struct vcmd_prio_bias vc_data;
21080 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21083 + return do_set_prio_bias(vxi, &vc_data);
21086 +int vc_get_prio_bias(struct vx_info *vxi, void __user *data)
21088 + struct vcmd_prio_bias vc_data;
21089 + struct _vx_sched_pc *pcd;
21092 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21095 + cpu = vc_data.cpu_id;
21097 + if (!cpu_possible(cpu))
21100 + pcd = &vx_per_cpu(vxi, sched_pc, cpu);
21101 + vc_data.prio_bias = pcd->prio_bias;
21103 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21108 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/sched_init.h linux-4.4/kernel/vserver/sched_init.h
21109 --- linux-4.4/kernel/vserver/sched_init.h 1970-01-01 01:00:00.000000000 +0100
21110 +++ linux-4.4/kernel/vserver/sched_init.h 2021-02-24 16:56:24.612823601 +0100
21113 +static inline void vx_info_init_sched(struct _vx_sched *sched)
21115 + /* scheduling; hard code starting values as constants */
21116 + sched->prio_bias = 0;
21120 +void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
21122 + sched_pc->prio_bias = 0;
21124 + sched_pc->user_ticks = 0;
21125 + sched_pc->sys_ticks = 0;
21126 + sched_pc->hold_ticks = 0;
21129 +static inline void vx_info_exit_sched(struct _vx_sched *sched)
21135 +void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
21139 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/sched_proc.h linux-4.4/kernel/vserver/sched_proc.h
21140 --- linux-4.4/kernel/vserver/sched_proc.h 1970-01-01 01:00:00.000000000 +0100
21141 +++ linux-4.4/kernel/vserver/sched_proc.h 2021-02-24 16:56:24.612823601 +0100
21143 +#ifndef _VX_SCHED_PROC_H
21144 +#define _VX_SCHED_PROC_H
21148 +int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
21152 + length += sprintf(buffer,
21153 + "PrioBias:\t%8d\n",
21154 + sched->prio_bias);
21159 +int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
21160 + char *buffer, int cpu)
21164 + length += sprintf(buffer + length,
21165 + "cpu %d: %lld %lld %lld", cpu,
21166 + (unsigned long long)sched_pc->user_ticks,
21167 + (unsigned long long)sched_pc->sys_ticks,
21168 + (unsigned long long)sched_pc->hold_ticks);
21169 + length += sprintf(buffer + length,
21170 + " %d\n", sched_pc->prio_bias);
21174 +#endif /* _VX_SCHED_PROC_H */
21175 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/signal.c linux-4.4/kernel/vserver/signal.c
21176 --- linux-4.4/kernel/vserver/signal.c 1970-01-01 01:00:00.000000000 +0100
21177 +++ linux-4.4/kernel/vserver/signal.c 2021-02-24 16:56:24.612823601 +0100
21180 + * linux/kernel/vserver/signal.c
21182 + * Virtual Server: Signal Support
21184 + * Copyright (C) 2003-2007 Herbert P?tzl
21186 + * V0.01 broken out from vcontext V0.05
21187 + * V0.02 changed vcmds to vxi arg
21188 + * V0.03 adjusted siginfo for kill
21192 +#include <asm/uaccess.h>
21194 +#include <linux/vs_context.h>
21195 +#include <linux/vs_pid.h>
21196 +#include <linux/vserver/signal_cmd.h>
21199 +int vx_info_kill(struct vx_info *vxi, int pid, int sig)
21201 + int retval, count = 0;
21202 + struct task_struct *p;
21203 + struct siginfo *sip = SEND_SIG_PRIV;
21206 + vxdprintk(VXD_CBIT(misc, 4),
21207 + "vx_info_kill(%p[#%d],%d,%d)*",
21208 + vxi, vxi->vx_id, pid, sig);
21209 + read_lock(&tasklist_lock);
21213 + for_each_process(p) {
21216 + if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
21217 + (pid && vxi->vx_initpid == p->pid))
21220 + err = group_send_sig_info(sig, sip, p);
21222 + if (err != -EPERM)
21228 + if (vxi->vx_initpid) {
21229 + pid = vxi->vx_initpid;
21230 + /* for now, only SIGINT to private init ... */
21231 + if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
21232 + /* ... as long as there are tasks left */
21233 + (atomic_read(&vxi->vx_tasks) > 1))
21236 + /* fallthrough */
21239 + p = find_task_by_real_pid(pid);
21240 + rcu_read_unlock();
21242 + if (vx_task_xid(p) == vxi->vx_id)
21243 + retval = group_send_sig_info(sig, sip, p);
21247 + read_unlock(&tasklist_lock);
21248 + vxdprintk(VXD_CBIT(misc, 4),
21249 + "vx_info_kill(%p[#%d],%d,%d,%ld) = %d",
21250 + vxi, vxi->vx_id, pid, sig, (long)sip, retval);
21254 +int vc_ctx_kill(struct vx_info *vxi, void __user *data)
21256 + struct vcmd_ctx_kill_v0 vc_data;
21258 + if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21261 + /* special check to allow guest shutdown */
21262 + if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
21263 + /* forbid killall pid=0 when init is present */
21264 + (((vc_data.pid < 1) && vxi->vx_initpid) ||
21265 + (vc_data.pid > 1)))
21268 + return vx_info_kill(vxi, vc_data.pid, vc_data.sig);
21272 +static int __wait_exit(struct vx_info *vxi)
21274 + DECLARE_WAITQUEUE(wait, current);
21277 + add_wait_queue(&vxi->vx_wait, &wait);
21278 + set_current_state(TASK_INTERRUPTIBLE);
21281 + if (vx_info_state(vxi,
21282 + VXS_SHUTDOWN | VXS_HASHED | VXS_HELPER) == VXS_SHUTDOWN)
21284 + if (signal_pending(current)) {
21285 + ret = -ERESTARTSYS;
21292 + set_current_state(TASK_RUNNING);
21293 + remove_wait_queue(&vxi->vx_wait, &wait);
21299 +int vc_wait_exit(struct vx_info *vxi, void __user *data)
21301 + struct vcmd_wait_exit_v0 vc_data;
21304 + ret = __wait_exit(vxi);
21305 + vc_data.reboot_cmd = vxi->reboot_cmd;
21306 + vc_data.exit_code = vxi->exit_code;
21308 + if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21313 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/space.c linux-4.4/kernel/vserver/space.c
21314 --- linux-4.4/kernel/vserver/space.c 1970-01-01 01:00:00.000000000 +0100
21315 +++ linux-4.4/kernel/vserver/space.c 2021-02-24 16:56:24.612823601 +0100
21318 + * linux/kernel/vserver/space.c
21320 + * Virtual Server: Context Space Support
21322 + * Copyright (C) 2003-2010 Herbert P?tzl
21324 + * V0.01 broken out from context.c 0.07
21325 + * V0.02 added task locking for namespace
21326 + * V0.03 broken out vx_enter_namespace
21327 + * V0.04 added *space support and commands
21328 + * V0.05 added credential support
21332 +#include <linux/utsname.h>
21333 +#include <linux/nsproxy.h>
21334 +#include <linux/err.h>
21335 +#include <linux/fs_struct.h>
21336 +#include <linux/cred.h>
21337 +#include <asm/uaccess.h>
21339 +#include <linux/vs_context.h>
21340 +#include <linux/vserver/space.h>
21341 +#include <linux/vserver/space_cmd.h>
21343 +atomic_t vs_global_nsproxy = ATOMIC_INIT(0);
21344 +atomic_t vs_global_fs = ATOMIC_INIT(0);
21345 +atomic_t vs_global_mnt_ns = ATOMIC_INIT(0);
21346 +atomic_t vs_global_uts_ns = ATOMIC_INIT(0);
21347 +atomic_t vs_global_user_ns = ATOMIC_INIT(0);
21348 +atomic_t vs_global_pid_ns = ATOMIC_INIT(0);
21351 +/* namespace functions */
21353 +#include <linux/mnt_namespace.h>
21354 +#include <linux/user_namespace.h>
21355 +#include <linux/pid_namespace.h>
21356 +#include <linux/ipc_namespace.h>
21357 +#include <net/net_namespace.h>
21358 +#include "../fs/mount.h"
21361 +static const struct vcmd_space_mask_v1 space_mask_v0 = {
21362 + .mask = CLONE_FS |
21364 +#ifdef CONFIG_UTS_NS
21367 +#ifdef CONFIG_IPC_NS
21370 +#ifdef CONFIG_USER_NS
21376 +static const struct vcmd_space_mask_v1 space_mask = {
21377 + .mask = CLONE_FS |
21379 +#ifdef CONFIG_UTS_NS
21382 +#ifdef CONFIG_IPC_NS
21385 +#ifdef CONFIG_USER_NS
21388 +#ifdef CONFIG_PID_NS
21391 +#ifdef CONFIG_NET_NS
21397 +static const struct vcmd_space_mask_v1 default_space_mask = {
21398 + .mask = CLONE_FS |
21400 +#ifdef CONFIG_UTS_NS
21403 +#ifdef CONFIG_IPC_NS
21406 +#ifdef CONFIG_USER_NS
21407 +// CLONE_NEWUSER |
21409 +#ifdef CONFIG_PID_NS
21416 + * build a new nsproxy mix
21417 + * assumes that both proxies are 'const'
21418 + * does not touch nsproxy refcounts
21419 + * will hold a reference on the result.
21422 +struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
21423 + struct nsproxy *new_nsproxy, unsigned long mask)
21425 + struct mnt_namespace *old_ns;
21426 + struct uts_namespace *old_uts;
21427 + struct ipc_namespace *old_ipc;
21428 +#ifdef CONFIG_PID_NS
21429 + struct pid_namespace *old_pid;
21431 +#ifdef CONFIG_NET_NS
21432 + struct net *old_net;
21434 + struct nsproxy *nsproxy;
21436 + nsproxy = copy_nsproxy(old_nsproxy);
21440 + if (mask & CLONE_NEWNS) {
21441 + old_ns = nsproxy->mnt_ns;
21442 + nsproxy->mnt_ns = new_nsproxy->mnt_ns;
21443 + if (nsproxy->mnt_ns)
21444 + get_mnt_ns(nsproxy->mnt_ns);
21448 + if (mask & CLONE_NEWUTS) {
21449 + old_uts = nsproxy->uts_ns;
21450 + nsproxy->uts_ns = new_nsproxy->uts_ns;
21451 + if (nsproxy->uts_ns)
21452 + get_uts_ns(nsproxy->uts_ns);
21456 + if (mask & CLONE_NEWIPC) {
21457 + old_ipc = nsproxy->ipc_ns;
21458 + nsproxy->ipc_ns = new_nsproxy->ipc_ns;
21459 + if (nsproxy->ipc_ns)
21460 + get_ipc_ns(nsproxy->ipc_ns);
21464 +#ifdef CONFIG_PID_NS
21465 + if (mask & CLONE_NEWPID) {
21466 + old_pid = nsproxy->pid_ns_for_children;
21467 + nsproxy->pid_ns_for_children = new_nsproxy->pid_ns_for_children;
21468 + if (nsproxy->pid_ns_for_children)
21469 + get_pid_ns(nsproxy->pid_ns_for_children);
21473 +#ifdef CONFIG_NET_NS
21474 + if (mask & CLONE_NEWNET) {
21475 + old_net = nsproxy->net_ns;
21476 + nsproxy->net_ns = new_nsproxy->net_ns;
21477 + if (nsproxy->net_ns)
21478 + get_net(nsproxy->net_ns);
21483 + put_mnt_ns(old_ns);
21485 + put_uts_ns(old_uts);
21487 + put_ipc_ns(old_ipc);
21488 +#ifdef CONFIG_PID_NS
21490 + put_pid_ns(old_pid);
21492 +#ifdef CONFIG_NET_NS
21494 + put_net(old_net);
21502 + * merge two nsproxy structs into a new one.
21503 + * will hold a reference on the result.
21507 +struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
21508 + struct nsproxy *proxy, unsigned long mask)
21510 + struct nsproxy null_proxy = { .mnt_ns = NULL };
21516 + /* vs_mix_nsproxy returns with reference */
21517 + return vs_mix_nsproxy(old ? old : &null_proxy,
21520 + get_nsproxy(proxy);
21525 +int vx_enter_space(struct vx_info *vxi, unsigned long mask, unsigned index)
21527 + struct nsproxy *proxy, *proxy_cur, *proxy_new;
21528 + struct fs_struct *fs_cur, *fs = NULL;
21529 + struct _vx_space *space;
21530 + int ret, kill = 0;
21532 + vxdprintk(VXD_CBIT(space, 8), "vx_enter_space(%p[#%u],0x%08lx,%d)",
21533 + vxi, vxi->vx_id, mask, index);
21535 + if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
21538 + if (index >= VX_SPACES)
21541 + space = &vxi->space[index];
21544 + mask = space->vx_nsmask;
21546 + if ((mask & space->vx_nsmask) != mask)
21549 + if (mask & CLONE_FS) {
21550 + fs = copy_fs_struct(space->vx_fs);
21554 + proxy = space->vx_nsproxy;
21556 + vxdprintk(VXD_CBIT(space, 9),
21557 + "vx_enter_space(%p[#%u],0x%08lx,%d) -> (%p,%p)",
21558 + vxi, vxi->vx_id, mask, index, proxy, fs);
21560 + task_lock(current);
21561 + fs_cur = current->fs;
21563 + if (mask & CLONE_FS) {
21564 + spin_lock(&fs_cur->lock);
21565 + current->fs = fs;
21566 + kill = !--fs_cur->users;
21567 + spin_unlock(&fs_cur->lock);
21570 + proxy_cur = current->nsproxy;
21571 + get_nsproxy(proxy_cur);
21572 + task_unlock(current);
21575 + free_fs_struct(fs_cur);
21577 + proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
21578 + if (IS_ERR(proxy_new)) {
21579 + ret = PTR_ERR(proxy_new);
21583 + proxy_new = xchg(¤t->nsproxy, proxy_new);
21585 + if (mask & CLONE_NEWUSER) {
21586 + struct cred *cred;
21588 + vxdprintk(VXD_CBIT(space, 10),
21589 + "vx_enter_space(%p[#%u],%p) cred (%p,%p)",
21590 + vxi, vxi->vx_id, space->vx_cred,
21591 + current->real_cred, current->cred);
21593 + if (space->vx_cred) {
21594 + cred = __prepare_creds(space->vx_cred);
21596 + commit_creds(cred);
21603 + put_nsproxy(proxy_new);
21606 + put_nsproxy(proxy_cur);
21611 +int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index)
21613 + struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
21614 + struct fs_struct *fs_vxi, *fs = NULL;
21615 + struct _vx_space *space;
21616 + int ret, kill = 0;
21618 + vxdprintk(VXD_CBIT(space, 8), "vx_set_space(%p[#%u],0x%08lx,%d)",
21619 + vxi, vxi->vx_id, mask, index);
21621 + if ((mask & space_mask.mask) != mask)
21624 + if (index >= VX_SPACES)
21627 + space = &vxi->space[index];
21629 + proxy_vxi = space->vx_nsproxy;
21630 + fs_vxi = space->vx_fs;
21632 + if (mask & CLONE_FS) {
21633 + fs = copy_fs_struct(current->fs);
21638 + task_lock(current);
21640 + if (mask & CLONE_FS) {
21641 + spin_lock(&fs_vxi->lock);
21642 + space->vx_fs = fs;
21643 + kill = !--fs_vxi->users;
21644 + spin_unlock(&fs_vxi->lock);
21647 + proxy_cur = current->nsproxy;
21648 + get_nsproxy(proxy_cur);
21649 + task_unlock(current);
21652 + free_fs_struct(fs_vxi);
21654 + proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
21655 + if (IS_ERR(proxy_new)) {
21656 + ret = PTR_ERR(proxy_new);
21660 + proxy_new = xchg(&space->vx_nsproxy, proxy_new);
21661 + space->vx_nsmask |= mask;
21663 + if (mask & CLONE_NEWUSER) {
21664 + struct cred *cred;
21666 + vxdprintk(VXD_CBIT(space, 10),
21667 + "vx_set_space(%p[#%u],%p) cred (%p,%p)",
21668 + vxi, vxi->vx_id, space->vx_cred,
21669 + current->real_cred, current->cred);
21671 + cred = prepare_creds();
21672 + cred = (struct cred *)xchg(&space->vx_cred, cred);
21674 + abort_creds(cred);
21680 + put_nsproxy(proxy_new);
21683 + put_nsproxy(proxy_cur);
21688 +int vc_enter_space_v1(struct vx_info *vxi, void __user *data)
21690 + struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
21692 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21695 + return vx_enter_space(vxi, vc_data.mask, 0);
21698 +int vc_enter_space(struct vx_info *vxi, void __user *data)
21700 + struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
21702 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21705 + if (vc_data.index >= VX_SPACES)
21708 + return vx_enter_space(vxi, vc_data.mask, vc_data.index);
21711 +int vc_set_space_v1(struct vx_info *vxi, void __user *data)
21713 + struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
21715 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21718 + return vx_set_space(vxi, vc_data.mask, 0);
21721 +int vc_set_space(struct vx_info *vxi, void __user *data)
21723 + struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
21725 + if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21728 + if (vc_data.index >= VX_SPACES)
21731 + return vx_set_space(vxi, vc_data.mask, vc_data.index);
21734 +int vc_get_space_mask(void __user *data, int type)
21736 + const struct vcmd_space_mask_v1 *mask;
21739 + mask = &space_mask_v0;
21740 + else if (type == 1)
21741 + mask = &space_mask;
21743 + mask = &default_space_mask;
21745 + vxdprintk(VXD_CBIT(space, 10),
21746 + "vc_get_space_mask(%d) = %08llx", type, mask->mask);
21748 + if (copy_to_user(data, mask, sizeof(*mask)))
21753 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/switch.c linux-4.4/kernel/vserver/switch.c
21754 --- linux-4.4/kernel/vserver/switch.c 1970-01-01 01:00:00.000000000 +0100
21755 +++ linux-4.4/kernel/vserver/switch.c 2021-02-24 16:56:24.612823601 +0100
21758 + * linux/kernel/vserver/switch.c
21760 + * Virtual Server: Syscall Switch
21762 + * Copyright (C) 2003-2011 Herbert P?tzl
21764 + * V0.01 syscall switch
21765 + * V0.02 added signal to context
21766 + * V0.03 added rlimit functions
21767 + * V0.04 added iattr, task/xid functions
21768 + * V0.05 added debug/history stuff
21769 + * V0.06 added compat32 layer
21770 + * V0.07 vcmd args and perms
21771 + * V0.08 added status commands
21772 + * V0.09 added tag commands
21773 + * V0.10 added oom bias
21774 + * V0.11 added device commands
21775 + * V0.12 added warn mask
21779 +#include <linux/vs_context.h>
21780 +#include <linux/vs_network.h>
21781 +#include <linux/vserver/switch.h>
21783 +#include "vci_config.h"
21787 +int vc_get_version(uint32_t id)
21789 + return VCI_VERSION;
21793 +int vc_get_vci(uint32_t id)
21795 + return vci_kernel_config();
21798 +#include <linux/vserver/context_cmd.h>
21799 +#include <linux/vserver/cvirt_cmd.h>
21800 +#include <linux/vserver/cacct_cmd.h>
21801 +#include <linux/vserver/limit_cmd.h>
21802 +#include <linux/vserver/network_cmd.h>
21803 +#include <linux/vserver/sched_cmd.h>
21804 +#include <linux/vserver/debug_cmd.h>
21805 +#include <linux/vserver/inode_cmd.h>
21806 +#include <linux/vserver/dlimit_cmd.h>
21807 +#include <linux/vserver/signal_cmd.h>
21808 +#include <linux/vserver/space_cmd.h>
21809 +#include <linux/vserver/tag_cmd.h>
21810 +#include <linux/vserver/device_cmd.h>
21812 +#include <linux/vserver/inode.h>
21813 +#include <linux/vserver/dlimit.h>
21816 +#ifdef CONFIG_COMPAT
21817 +#define __COMPAT(name, id, data, compat) \
21818 + (compat) ? name ## _x32(id, data) : name(id, data)
21819 +#define __COMPAT_NO_ID(name, data, compat) \
21820 + (compat) ? name ## _x32(data) : name(data)
21822 +#define __COMPAT(name, id, data, compat) \
21824 +#define __COMPAT_NO_ID(name, data, compat) \
21830 +long do_vcmd(uint32_t cmd, uint32_t id,
21831 + struct vx_info *vxi, struct nx_info *nxi,
21832 + void __user *data, int compat)
21836 + case VCMD_get_version:
21837 + return vc_get_version(id);
21838 + case VCMD_get_vci:
21839 + return vc_get_vci(id);
21841 + case VCMD_task_xid:
21842 + return vc_task_xid(id);
21843 + case VCMD_vx_info:
21844 + return vc_vx_info(vxi, data);
21846 + case VCMD_task_nid:
21847 + return vc_task_nid(id);
21848 + case VCMD_nx_info:
21849 + return vc_nx_info(nxi, data);
21851 + case VCMD_task_tag:
21852 + return vc_task_tag(id);
21854 + case VCMD_set_space_v1:
21855 + return vc_set_space_v1(vxi, data);
21856 + /* this is version 2 */
21857 + case VCMD_set_space:
21858 + return vc_set_space(vxi, data);
21860 + case VCMD_get_space_mask_v0:
21861 + return vc_get_space_mask(data, 0);
21862 + /* this is version 1 */
21863 + case VCMD_get_space_mask:
21864 + return vc_get_space_mask(data, 1);
21866 + case VCMD_get_space_default:
21867 + return vc_get_space_mask(data, -1);
21869 + case VCMD_set_umask:
21870 + return vc_set_umask(vxi, data);
21872 + case VCMD_get_umask:
21873 + return vc_get_umask(vxi, data);
21875 + case VCMD_set_wmask:
21876 + return vc_set_wmask(vxi, data);
21878 + case VCMD_get_wmask:
21879 + return vc_get_wmask(vxi, data);
21880 +#ifdef CONFIG_IA32_EMULATION
21881 + case VCMD_get_rlimit:
21882 + return __COMPAT(vc_get_rlimit, vxi, data, compat);
21883 + case VCMD_set_rlimit:
21884 + return __COMPAT(vc_set_rlimit, vxi, data, compat);
21886 + case VCMD_get_rlimit:
21887 + return vc_get_rlimit(vxi, data);
21888 + case VCMD_set_rlimit:
21889 + return vc_set_rlimit(vxi, data);
21891 + case VCMD_get_rlimit_mask:
21892 + return vc_get_rlimit_mask(id, data);
21893 + case VCMD_reset_hits:
21894 + return vc_reset_hits(vxi, data);
21895 + case VCMD_reset_minmax:
21896 + return vc_reset_minmax(vxi, data);
21898 + case VCMD_get_vhi_name:
21899 + return vc_get_vhi_name(vxi, data);
21900 + case VCMD_set_vhi_name:
21901 + return vc_set_vhi_name(vxi, data);
21903 + case VCMD_ctx_stat:
21904 + return vc_ctx_stat(vxi, data);
21905 + case VCMD_virt_stat:
21906 + return vc_virt_stat(vxi, data);
21907 + case VCMD_sock_stat:
21908 + return vc_sock_stat(vxi, data);
21909 + case VCMD_rlimit_stat:
21910 + return vc_rlimit_stat(vxi, data);
21912 + case VCMD_set_cflags:
21913 + return vc_set_cflags(vxi, data);
21914 + case VCMD_get_cflags:
21915 + return vc_get_cflags(vxi, data);
21917 + /* this is version 1 */
21918 + case VCMD_set_ccaps:
21919 + return vc_set_ccaps(vxi, data);
21920 + /* this is version 1 */
21921 + case VCMD_get_ccaps:
21922 + return vc_get_ccaps(vxi, data);
21923 + case VCMD_set_bcaps:
21924 + return vc_set_bcaps(vxi, data);
21925 + case VCMD_get_bcaps:
21926 + return vc_get_bcaps(vxi, data);
21928 + case VCMD_set_badness:
21929 + return vc_set_badness(vxi, data);
21930 + case VCMD_get_badness:
21931 + return vc_get_badness(vxi, data);
21933 + case VCMD_set_nflags:
21934 + return vc_set_nflags(nxi, data);
21935 + case VCMD_get_nflags:
21936 + return vc_get_nflags(nxi, data);
21938 + case VCMD_set_ncaps:
21939 + return vc_set_ncaps(nxi, data);
21940 + case VCMD_get_ncaps:
21941 + return vc_get_ncaps(nxi, data);
21943 + case VCMD_set_prio_bias:
21944 + return vc_set_prio_bias(vxi, data);
21945 + case VCMD_get_prio_bias:
21946 + return vc_get_prio_bias(vxi, data);
21947 + case VCMD_add_dlimit:
21948 + return __COMPAT(vc_add_dlimit, id, data, compat);
21949 + case VCMD_rem_dlimit:
21950 + return __COMPAT(vc_rem_dlimit, id, data, compat);
21951 + case VCMD_set_dlimit:
21952 + return __COMPAT(vc_set_dlimit, id, data, compat);
21953 + case VCMD_get_dlimit:
21954 + return __COMPAT(vc_get_dlimit, id, data, compat);
21956 + case VCMD_ctx_kill:
21957 + return vc_ctx_kill(vxi, data);
21959 + case VCMD_wait_exit:
21960 + return vc_wait_exit(vxi, data);
21962 + case VCMD_get_iattr:
21963 + return __COMPAT_NO_ID(vc_get_iattr, data, compat);
21964 + case VCMD_set_iattr:
21965 + return __COMPAT_NO_ID(vc_set_iattr, data, compat);
21967 + case VCMD_fget_iattr:
21968 + return vc_fget_iattr(id, data);
21969 + case VCMD_fset_iattr:
21970 + return vc_fset_iattr(id, data);
21972 + case VCMD_enter_space_v0:
21973 + return vc_enter_space_v1(vxi, NULL);
21974 + case VCMD_enter_space_v1:
21975 + return vc_enter_space_v1(vxi, data);
21976 + /* this is version 2 */
21977 + case VCMD_enter_space:
21978 + return vc_enter_space(vxi, data);
21980 + case VCMD_ctx_create_v0:
21981 + return vc_ctx_create(id, NULL);
21982 + case VCMD_ctx_create:
21983 + return vc_ctx_create(id, data);
21984 + case VCMD_ctx_migrate_v0:
21985 + return vc_ctx_migrate(vxi, NULL);
21986 + case VCMD_ctx_migrate:
21987 + return vc_ctx_migrate(vxi, data);
21989 + case VCMD_net_create_v0:
21990 + return vc_net_create(id, NULL);
21991 + case VCMD_net_create:
21992 + return vc_net_create(id, data);
21993 + case VCMD_net_migrate:
21994 + return vc_net_migrate(nxi, data);
21996 + case VCMD_tag_migrate:
21997 + return vc_tag_migrate(id);
21999 + case VCMD_net_add:
22000 + return vc_net_add(nxi, data);
22001 + case VCMD_net_remove:
22002 + return vc_net_remove(nxi, data);
22004 + case VCMD_net_add_ipv4_v1:
22005 + return vc_net_add_ipv4_v1(nxi, data);
22006 + /* this is version 2 */
22007 + case VCMD_net_add_ipv4:
22008 + return vc_net_add_ipv4(nxi, data);
22010 + case VCMD_net_rem_ipv4_v1:
22011 + return vc_net_rem_ipv4_v1(nxi, data);
22012 + /* this is version 2 */
22013 + case VCMD_net_rem_ipv4:
22014 + return vc_net_rem_ipv4(nxi, data);
22015 +#ifdef CONFIG_IPV6
22016 + case VCMD_net_add_ipv6:
22017 + return vc_net_add_ipv6(nxi, data);
22018 + case VCMD_net_remove_ipv6:
22019 + return vc_net_remove_ipv6(nxi, data);
22021 +/* case VCMD_add_match_ipv4:
22022 + return vc_add_match_ipv4(nxi, data);
22023 + case VCMD_get_match_ipv4:
22024 + return vc_get_match_ipv4(nxi, data);
22025 +#ifdef CONFIG_IPV6
22026 + case VCMD_add_match_ipv6:
22027 + return vc_add_match_ipv6(nxi, data);
22028 + case VCMD_get_match_ipv6:
22029 + return vc_get_match_ipv6(nxi, data);
22032 +#ifdef CONFIG_VSERVER_DEVICE
22033 + case VCMD_set_mapping:
22034 + return __COMPAT(vc_set_mapping, vxi, data, compat);
22035 + case VCMD_unset_mapping:
22036 + return __COMPAT(vc_unset_mapping, vxi, data, compat);
22038 +#ifdef CONFIG_VSERVER_HISTORY
22039 + case VCMD_dump_history:
22040 + return vc_dump_history(id);
22041 + case VCMD_read_history:
22042 + return __COMPAT(vc_read_history, id, data, compat);
22045 + vxwprintk_task(1, "unimplemented VCMD_%02d_%d[%d]",
22046 + VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
22052 +#define __VCMD(vcmd, _perm, _args, _flags) \
22053 + case VCMD_ ## vcmd: perm = _perm; \
22054 + args = _args; flags = _flags; break
22057 +#define VCA_NONE 0x00
22058 +#define VCA_VXI 0x01
22059 +#define VCA_NXI 0x02
22061 +#define VCF_NONE 0x00
22062 +#define VCF_INFO 0x01
22063 +#define VCF_ADMIN 0x02
22064 +#define VCF_ARES 0x06 /* includes admin */
22065 +#define VCF_SETUP 0x08
22067 +#define VCF_ZIDOK 0x10 /* zero id okay */
22071 +long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
22074 + int permit = -1, state = 0;
22075 + int perm = -1, args = 0, flags = 0;
22076 + struct vx_info *vxi = NULL;
22077 + struct nx_info *nxi = NULL;
22080 + /* unpriviledged commands */
22081 + __VCMD(get_version, 0, VCA_NONE, 0);
22082 + __VCMD(get_vci, 0, VCA_NONE, 0);
22083 + __VCMD(get_rlimit_mask, 0, VCA_NONE, 0);
22084 + __VCMD(get_space_mask_v0,0, VCA_NONE, 0);
22085 + __VCMD(get_space_mask, 0, VCA_NONE, 0);
22086 + __VCMD(get_space_default,0, VCA_NONE, 0);
22088 + /* info commands */
22089 + __VCMD(task_xid, 2, VCA_NONE, 0);
22090 + __VCMD(reset_hits, 2, VCA_VXI, 0);
22091 + __VCMD(reset_minmax, 2, VCA_VXI, 0);
22092 + __VCMD(vx_info, 3, VCA_VXI, VCF_INFO);
22093 + __VCMD(get_bcaps, 3, VCA_VXI, VCF_INFO);
22094 + __VCMD(get_ccaps, 3, VCA_VXI, VCF_INFO);
22095 + __VCMD(get_cflags, 3, VCA_VXI, VCF_INFO);
22096 + __VCMD(get_umask, 3, VCA_VXI, VCF_INFO);
22097 + __VCMD(get_wmask, 3, VCA_VXI, VCF_INFO);
22098 + __VCMD(get_badness, 3, VCA_VXI, VCF_INFO);
22099 + __VCMD(get_vhi_name, 3, VCA_VXI, VCF_INFO);
22100 + __VCMD(get_rlimit, 3, VCA_VXI, VCF_INFO);
22102 + __VCMD(ctx_stat, 3, VCA_VXI, VCF_INFO);
22103 + __VCMD(virt_stat, 3, VCA_VXI, VCF_INFO);
22104 + __VCMD(sock_stat, 3, VCA_VXI, VCF_INFO);
22105 + __VCMD(rlimit_stat, 3, VCA_VXI, VCF_INFO);
22107 + __VCMD(task_nid, 2, VCA_NONE, 0);
22108 + __VCMD(nx_info, 3, VCA_NXI, VCF_INFO);
22109 + __VCMD(get_ncaps, 3, VCA_NXI, VCF_INFO);
22110 + __VCMD(get_nflags, 3, VCA_NXI, VCF_INFO);
22112 + __VCMD(task_tag, 2, VCA_NONE, 0);
22114 + __VCMD(get_iattr, 2, VCA_NONE, 0);
22115 + __VCMD(fget_iattr, 2, VCA_NONE, 0);
22116 + __VCMD(get_dlimit, 3, VCA_NONE, VCF_INFO);
22117 + __VCMD(get_prio_bias, 3, VCA_VXI, VCF_INFO);
22119 + /* lower admin commands */
22120 + __VCMD(wait_exit, 4, VCA_VXI, VCF_INFO);
22121 + __VCMD(ctx_create_v0, 5, VCA_NONE, 0);
22122 + __VCMD(ctx_create, 5, VCA_NONE, 0);
22123 + __VCMD(ctx_migrate_v0, 5, VCA_VXI, VCF_ADMIN);
22124 + __VCMD(ctx_migrate, 5, VCA_VXI, VCF_ADMIN);
22125 + __VCMD(enter_space_v0, 5, VCA_VXI, VCF_ADMIN);
22126 + __VCMD(enter_space_v1, 5, VCA_VXI, VCF_ADMIN);
22127 + __VCMD(enter_space, 5, VCA_VXI, VCF_ADMIN);
22129 + __VCMD(net_create_v0, 5, VCA_NONE, 0);
22130 + __VCMD(net_create, 5, VCA_NONE, 0);
22131 + __VCMD(net_migrate, 5, VCA_NXI, VCF_ADMIN);
22133 + __VCMD(tag_migrate, 5, VCA_NONE, VCF_ADMIN);
22135 + /* higher admin commands */
22136 + __VCMD(ctx_kill, 6, VCA_VXI, VCF_ARES);
22137 + __VCMD(set_space_v1, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22138 + __VCMD(set_space, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22140 + __VCMD(set_ccaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22141 + __VCMD(set_bcaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22142 + __VCMD(set_cflags, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22143 + __VCMD(set_umask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22144 + __VCMD(set_wmask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22145 + __VCMD(set_badness, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22147 + __VCMD(set_vhi_name, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22148 + __VCMD(set_rlimit, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22149 + __VCMD(set_prio_bias, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22151 + __VCMD(set_ncaps, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
22152 + __VCMD(set_nflags, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
22153 + __VCMD(net_add, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22154 + __VCMD(net_remove, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22155 + __VCMD(net_add_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22156 + __VCMD(net_rem_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22157 + __VCMD(net_add_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22158 + __VCMD(net_rem_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22159 +#ifdef CONFIG_IPV6
22160 + __VCMD(net_add_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22161 + __VCMD(net_remove_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22163 + __VCMD(set_iattr, 7, VCA_NONE, 0);
22164 + __VCMD(fset_iattr, 7, VCA_NONE, 0);
22165 + __VCMD(set_dlimit, 7, VCA_NONE, VCF_ARES);
22166 + __VCMD(add_dlimit, 8, VCA_NONE, VCF_ARES);
22167 + __VCMD(rem_dlimit, 8, VCA_NONE, VCF_ARES);
22169 +#ifdef CONFIG_VSERVER_DEVICE
22170 + __VCMD(set_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
22171 + __VCMD(unset_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
22173 + /* debug level admin commands */
22174 +#ifdef CONFIG_VSERVER_HISTORY
22175 + __VCMD(dump_history, 9, VCA_NONE, 0);
22176 + __VCMD(read_history, 9, VCA_NONE, 0);
22183 + vxdprintk(VXD_CBIT(switch, 0),
22184 + "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
22185 + VC_CATEGORY(cmd), VC_COMMAND(cmd),
22186 + VC_VERSION(cmd), id, data, compat,
22187 + perm, args, flags);
22194 + if (!capable(CAP_CONTEXT))
22198 + /* moved here from the individual commands */
22200 + if ((perm > 1) && !capable(CAP_SYS_ADMIN))
22204 + /* vcmd involves resource management */
22206 + if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
22210 + /* various legacy exceptions */
22212 + /* will go away when spectator is a cap */
22213 + case VCMD_ctx_migrate_v0:
22214 + case VCMD_ctx_migrate:
22216 + current->xid = 1;
22222 + /* will go away when spectator is a cap */
22223 + case VCMD_net_migrate:
22225 + current->nid = 1;
22232 + /* vcmds are fine by default */
22235 + /* admin type vcmds require admin ... */
22236 + if (flags & VCF_ADMIN)
22237 + permit = vx_check(0, VS_ADMIN) ? 1 : 0;
22239 + /* ... but setup type vcmds override that */
22240 + if (!permit && (flags & VCF_SETUP))
22241 + permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
22249 + if (!id && (flags & VCF_ZIDOK))
22253 + if (args & VCA_VXI) {
22254 + vxi = lookup_vx_info(id);
22258 + if ((flags & VCF_ADMIN) &&
22259 + /* special case kill for shutdown */
22260 + (cmd != VCMD_ctx_kill) &&
22261 + /* can context be administrated? */
22262 + !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
22268 + if (args & VCA_NXI) {
22269 + nxi = lookup_nx_info(id);
22273 + if ((flags & VCF_ADMIN) &&
22274 + /* can context be administrated? */
22275 + !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
22282 + ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
22285 + if ((args & VCA_NXI) && nxi)
22286 + put_nx_info(nxi);
22288 + if ((args & VCA_VXI) && vxi)
22289 + put_vx_info(vxi);
22291 + vxdprintk(VXD_CBIT(switch, 1),
22292 + "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
22293 + VC_CATEGORY(cmd), VC_COMMAND(cmd),
22294 + VC_VERSION(cmd), ret, ret, state, permit);
22299 +sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
22301 + return do_vserver(cmd, id, data, 0);
22304 +#ifdef CONFIG_COMPAT
22307 +sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
22309 + return do_vserver(cmd, id, data, 1);
22312 +#endif /* CONFIG_COMPAT */
22313 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/sysctl.c linux-4.4/kernel/vserver/sysctl.c
22314 --- linux-4.4/kernel/vserver/sysctl.c 1970-01-01 01:00:00.000000000 +0100
22315 +++ linux-4.4/kernel/vserver/sysctl.c 2021-02-24 16:56:24.612823601 +0100
22318 + * kernel/vserver/sysctl.c
22320 + * Virtual Context Support
22322 + * Copyright (C) 2004-2007 Herbert P?tzl
22324 + * V0.01 basic structure
22328 +#include <linux/module.h>
22329 +#include <linux/ctype.h>
22330 +#include <linux/sysctl.h>
22331 +#include <linux/parser.h>
22332 +#include <asm/uaccess.h>
22335 + CTL_DEBUG_ERROR = 0,
22336 + CTL_DEBUG_SWITCH = 1,
22352 +unsigned int vs_debug_switch = 0;
22353 +unsigned int vs_debug_xid = 0;
22354 +unsigned int vs_debug_nid = 0;
22355 +unsigned int vs_debug_tag = 0;
22356 +unsigned int vs_debug_net = 0;
22357 +unsigned int vs_debug_limit = 0;
22358 +unsigned int vs_debug_cres = 0;
22359 +unsigned int vs_debug_dlim = 0;
22360 +unsigned int vs_debug_quota = 0;
22361 +unsigned int vs_debug_cvirt = 0;
22362 +unsigned int vs_debug_space = 0;
22363 +unsigned int vs_debug_perm = 0;
22364 +unsigned int vs_debug_misc = 0;
22367 +static struct ctl_table_header *vserver_table_header;
22368 +static struct ctl_table vserver_root_table[];
22371 +void vserver_register_sysctl(void)
22373 + if (!vserver_table_header) {
22374 + vserver_table_header = register_sysctl_table(vserver_root_table);
22379 +void vserver_unregister_sysctl(void)
22381 + if (vserver_table_header) {
22382 + unregister_sysctl_table(vserver_table_header);
22383 + vserver_table_header = NULL;
22388 +static int proc_dodebug(struct ctl_table *table, int write,
22389 + void __user *buffer, size_t *lenp, loff_t *ppos)
22391 + char tmpbuf[20], *p, c;
22392 + unsigned int value;
22393 + size_t left, len;
22395 + if ((*ppos && !write) || !*lenp) {
22403 + if (!access_ok(VERIFY_READ, buffer, left))
22405 + p = (char *)buffer;
22406 + while (left && __get_user(c, p) >= 0 && isspace(c))
22411 + if (left > sizeof(tmpbuf) - 1)
22413 + if (copy_from_user(tmpbuf, p, left))
22415 + tmpbuf[left] = '\0';
22417 + for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
22418 + value = 10 * value + (*p - '0');
22419 + if (*p && !isspace(*p))
22421 + while (left && isspace(*p))
22423 + *(unsigned int *)table->data = value;
22425 + if (!access_ok(VERIFY_WRITE, buffer, left))
22427 + len = sprintf(tmpbuf, "%d", *(unsigned int *)table->data);
22430 + if (__copy_to_user(buffer, tmpbuf, len))
22432 + if ((left -= len) > 0) {
22433 + if (put_user('\n', (char *)buffer + len))
22447 +#define CTL_ENTRY(ctl, name) \
22449 + .procname = #name, \
22450 + .data = &vs_ ## name, \
22451 + .maxlen = sizeof(int), \
22453 + .proc_handler = &proc_dodebug, \
22454 + .extra1 = &zero, \
22455 + .extra2 = &zero, \
22458 +static struct ctl_table vserver_debug_table[] = {
22459 + CTL_ENTRY(CTL_DEBUG_SWITCH, debug_switch),
22460 + CTL_ENTRY(CTL_DEBUG_XID, debug_xid),
22461 + CTL_ENTRY(CTL_DEBUG_NID, debug_nid),
22462 + CTL_ENTRY(CTL_DEBUG_TAG, debug_tag),
22463 + CTL_ENTRY(CTL_DEBUG_NET, debug_net),
22464 + CTL_ENTRY(CTL_DEBUG_LIMIT, debug_limit),
22465 + CTL_ENTRY(CTL_DEBUG_CRES, debug_cres),
22466 + CTL_ENTRY(CTL_DEBUG_DLIM, debug_dlim),
22467 + CTL_ENTRY(CTL_DEBUG_QUOTA, debug_quota),
22468 + CTL_ENTRY(CTL_DEBUG_CVIRT, debug_cvirt),
22469 + CTL_ENTRY(CTL_DEBUG_SPACE, debug_space),
22470 + CTL_ENTRY(CTL_DEBUG_PERM, debug_perm),
22471 + CTL_ENTRY(CTL_DEBUG_MISC, debug_misc),
22475 +static struct ctl_table vserver_root_table[] = {
22477 + .procname = "vserver",
22479 + .child = vserver_debug_table
22485 +static match_table_t tokens = {
22486 + { CTL_DEBUG_SWITCH, "switch=%x" },
22487 + { CTL_DEBUG_XID, "xid=%x" },
22488 + { CTL_DEBUG_NID, "nid=%x" },
22489 + { CTL_DEBUG_TAG, "tag=%x" },
22490 + { CTL_DEBUG_NET, "net=%x" },
22491 + { CTL_DEBUG_LIMIT, "limit=%x" },
22492 + { CTL_DEBUG_CRES, "cres=%x" },
22493 + { CTL_DEBUG_DLIM, "dlim=%x" },
22494 + { CTL_DEBUG_QUOTA, "quota=%x" },
22495 + { CTL_DEBUG_CVIRT, "cvirt=%x" },
22496 + { CTL_DEBUG_SPACE, "space=%x" },
22497 + { CTL_DEBUG_PERM, "perm=%x" },
22498 + { CTL_DEBUG_MISC, "misc=%x" },
22499 + { CTL_DEBUG_ERROR, NULL }
22502 +#define HANDLE_CASE(id, name, val) \
22503 + case CTL_DEBUG_ ## id: \
22504 + vs_debug_ ## name = val; \
22505 + printk("vs_debug_" #name "=0x%x\n", val); \
22509 +static int __init vs_debug_setup(char *str)
22514 + printk("vs_debug_setup(%s)\n", str);
22515 + while ((p = strsep(&str, ",")) != NULL) {
22516 + substring_t args[MAX_OPT_ARGS];
22517 + unsigned int value;
22522 + token = match_token(p, tokens, args);
22523 + value = (token > 0) ? simple_strtoul(args[0].from, NULL, 0) : 0;
22526 + HANDLE_CASE(SWITCH, switch, value);
22527 + HANDLE_CASE(XID, xid, value);
22528 + HANDLE_CASE(NID, nid, value);
22529 + HANDLE_CASE(TAG, tag, value);
22530 + HANDLE_CASE(NET, net, value);
22531 + HANDLE_CASE(LIMIT, limit, value);
22532 + HANDLE_CASE(CRES, cres, value);
22533 + HANDLE_CASE(DLIM, dlim, value);
22534 + HANDLE_CASE(QUOTA, quota, value);
22535 + HANDLE_CASE(CVIRT, cvirt, value);
22536 + HANDLE_CASE(SPACE, space, value);
22537 + HANDLE_CASE(PERM, perm, value);
22538 + HANDLE_CASE(MISC, misc, value);
22547 +__setup("vsdebug=", vs_debug_setup);
22551 +EXPORT_SYMBOL_GPL(vs_debug_switch);
22552 +EXPORT_SYMBOL_GPL(vs_debug_xid);
22553 +EXPORT_SYMBOL_GPL(vs_debug_nid);
22554 +EXPORT_SYMBOL_GPL(vs_debug_net);
22555 +EXPORT_SYMBOL_GPL(vs_debug_limit);
22556 +EXPORT_SYMBOL_GPL(vs_debug_cres);
22557 +EXPORT_SYMBOL_GPL(vs_debug_dlim);
22558 +EXPORT_SYMBOL_GPL(vs_debug_quota);
22559 +EXPORT_SYMBOL_GPL(vs_debug_cvirt);
22560 +EXPORT_SYMBOL_GPL(vs_debug_space);
22561 +EXPORT_SYMBOL_GPL(vs_debug_perm);
22562 +EXPORT_SYMBOL_GPL(vs_debug_misc);
22564 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/tag.c linux-4.4/kernel/vserver/tag.c
22565 --- linux-4.4/kernel/vserver/tag.c 1970-01-01 01:00:00.000000000 +0100
22566 +++ linux-4.4/kernel/vserver/tag.c 2021-02-24 16:56:24.612823601 +0100
22569 + * linux/kernel/vserver/tag.c
22571 + * Virtual Server: Shallow Tag Space
22573 + * Copyright (C) 2007 Herbert P?tzl
22575 + * V0.01 basic implementation
22579 +#include <linux/sched.h>
22580 +#include <linux/vserver/debug.h>
22581 +#include <linux/vs_pid.h>
22582 +#include <linux/vs_tag.h>
22584 +#include <linux/vserver/tag_cmd.h>
22587 +int dx_migrate_task(struct task_struct *p, vtag_t tag)
22592 + vxdprintk(VXD_CBIT(tag, 5),
22593 + "dx_migrate_task(%p[#%d],#%d)", p, p->tag, tag);
22599 + vxdprintk(VXD_CBIT(tag, 5),
22600 + "moved task %p into [#%d]", p, tag);
22604 +/* vserver syscall commands below here */
22606 +/* taks xid and vx_info functions */
22609 +int vc_task_tag(uint32_t id)
22614 + struct task_struct *tsk;
22616 + tsk = find_task_by_real_pid(id);
22617 + tag = (tsk) ? tsk->tag : -ESRCH;
22618 + rcu_read_unlock();
22620 + tag = dx_current_tag();
22625 +int vc_tag_migrate(uint32_t tag)
22627 + return dx_migrate_task(current, tag & 0xFFFF);
22631 diff -urNp -x '*.orig' linux-4.4/kernel/vserver/vci_config.h linux-4.4/kernel/vserver/vci_config.h
22632 --- linux-4.4/kernel/vserver/vci_config.h 1970-01-01 01:00:00.000000000 +0100
22633 +++ linux-4.4/kernel/vserver/vci_config.h 2021-02-24 16:56:24.612823601 +0100
22636 +/* interface version */
22638 +#define VCI_VERSION 0x00020308
22642 + VCI_KCBIT_NO_DYNAMIC = 0,
22644 + VCI_KCBIT_PROC_SECURE = 4,
22645 + /* VCI_KCBIT_HARDCPU = 5, */
22646 + /* VCI_KCBIT_IDLELIMIT = 6, */
22647 + /* VCI_KCBIT_IDLETIME = 7, */
22649 + VCI_KCBIT_COWBL = 8,
22650 + VCI_KCBIT_FULLCOWBL = 9,
22651 + VCI_KCBIT_SPACES = 10,
22652 + VCI_KCBIT_NETV2 = 11,
22653 + VCI_KCBIT_MEMCG = 12,
22654 + VCI_KCBIT_MEMCG_SWAP = 13,
22656 + VCI_KCBIT_DEBUG = 16,
22657 + VCI_KCBIT_HISTORY = 20,
22658 + VCI_KCBIT_TAGGED = 24,
22659 + VCI_KCBIT_PPTAG = 28,
22661 + VCI_KCBIT_MORE = 31,
22665 +static inline uint32_t vci_kernel_config(void)
22668 + (1 << VCI_KCBIT_NO_DYNAMIC) |
22670 + /* configured features */
22671 +#ifdef CONFIG_VSERVER_PROC_SECURE
22672 + (1 << VCI_KCBIT_PROC_SECURE) |
22674 +#ifdef CONFIG_VSERVER_COWBL
22675 + (1 << VCI_KCBIT_COWBL) |
22676 + (1 << VCI_KCBIT_FULLCOWBL) |
22678 + (1 << VCI_KCBIT_SPACES) |
22679 + (1 << VCI_KCBIT_NETV2) |
22680 +#ifdef CONFIG_MEMCG
22681 + (1 << VCI_KCBIT_MEMCG) |
22683 +#ifdef CONFIG_MEMCG_SWAP
22684 + (1 << VCI_KCBIT_MEMCG_SWAP) |
22687 + /* debug options */
22688 +#ifdef CONFIG_VSERVER_DEBUG
22689 + (1 << VCI_KCBIT_DEBUG) |
22691 +#ifdef CONFIG_VSERVER_HISTORY
22692 + (1 << VCI_KCBIT_HISTORY) |
22695 + /* inode context tagging */
22696 +#if defined(CONFIG_TAGGING_NONE)
22697 + (0 << VCI_KCBIT_TAGGED) |
22698 +#elif defined(CONFIG_TAGGING_UID16)
22699 + (1 << VCI_KCBIT_TAGGED) |
22700 +#elif defined(CONFIG_TAGGING_GID16)
22701 + (2 << VCI_KCBIT_TAGGED) |
22702 +#elif defined(CONFIG_TAGGING_ID24)
22703 + (3 << VCI_KCBIT_TAGGED) |
22704 +#elif defined(CONFIG_TAGGING_INTERN)
22705 + (4 << VCI_KCBIT_TAGGED) |
22706 +#elif defined(CONFIG_TAGGING_RUNTIME)
22707 + (5 << VCI_KCBIT_TAGGED) |
22709 + (7 << VCI_KCBIT_TAGGED) |
22711 + (1 << VCI_KCBIT_PPTAG) |
22715 diff -urNp -x '*.orig' linux-4.4/mm/memcontrol.c linux-4.4/mm/memcontrol.c
22716 --- linux-4.4/mm/memcontrol.c 2021-02-24 16:56:12.009093341 +0100
22717 +++ linux-4.4/mm/memcontrol.c 2021-02-24 16:56:24.616157039 +0100
22718 @@ -2907,6 +2907,42 @@ static u64 mem_cgroup_read_u64(struct cg
22722 +unsigned long mem_cgroup_mem_usage_pages(struct mem_cgroup *memcg)
22724 + return mem_cgroup_usage(memcg, false);
22727 +unsigned long mem_cgroup_mem_limit_pages(struct mem_cgroup *memcg)
22729 + return (u64)memcg->memory.limit;
22732 +unsigned long mem_cgroup_memsw_usage_pages(struct mem_cgroup *memcg)
22734 + return mem_cgroup_usage(memcg, true);
22737 +unsigned long mem_cgroup_memsw_limit_pages(struct mem_cgroup *memcg)
22739 + return (u64)memcg->memsw.limit;
22742 +void dump_mem_cgroup(struct mem_cgroup *memcg)
22744 + printk(KERN_INFO "memcg: %p/%d:\n"
22745 + "\tmemory:\t%lu/%lu %lu/%lu\n"
22746 + "\tmemsw:\t%lu/%lu %lu/%lu\n"
22747 + "\tkmem:\t%lu/%lu %lu/%lu\n",
22748 + memcg, memcg->id.id,
22749 + page_counter_read(&memcg->memory), memcg->memory.limit,
22750 + memcg->memory.watermark, memcg->memory.failcnt,
22751 + page_counter_read(&memcg->memsw), memcg->memsw.limit,
22752 + memcg->memsw.watermark, memcg->memsw.failcnt,
22753 + page_counter_read(&memcg->kmem), memcg->kmem.limit,
22754 + memcg->kmem.watermark, memcg->kmem.failcnt);
22758 #ifdef CONFIG_MEMCG_KMEM
22759 static int memcg_activate_kmem(struct mem_cgroup *memcg,
22760 unsigned long nr_pages)
22761 diff -urNp -x '*.orig' linux-4.4/mm/oom_kill.c linux-4.4/mm/oom_kill.c
22762 --- linux-4.4/mm/oom_kill.c 2021-02-24 16:56:12.012426780 +0100
22763 +++ linux-4.4/mm/oom_kill.c 2021-02-24 16:56:24.616157039 +0100
22765 #include <linux/freezer.h>
22766 #include <linux/ftrace.h>
22767 #include <linux/ratelimit.h>
22768 +#include <linux/reboot.h>
22769 +#include <linux/vs_context.h>
22771 #define CREATE_TRACE_POINTS
22772 #include <trace/events/oom.h>
22773 @@ -131,11 +133,18 @@ static inline bool is_sysrq_oom(struct o
22774 static bool oom_unkillable_task(struct task_struct *p,
22775 struct mem_cgroup *memcg, const nodemask_t *nodemask)
22777 - if (is_global_init(p))
22778 + unsigned xid = vx_current_xid();
22780 + /* skip the init task, global and per guest */
22781 + if (task_is_init(p))
22783 if (p->flags & PF_KTHREAD)
22786 + /* skip other guest and host processes if oom in guest */
22787 + if (xid && vx_task_xid(p) != xid)
22790 /* When mem_cgroup_out_of_memory() and p is not member of the group */
22791 if (memcg && !task_in_mem_cgroup(p, memcg))
22793 @@ -534,8 +543,8 @@ void oom_kill_process(struct oom_control
22794 if (__ratelimit(&oom_rs))
22795 dump_header(oc, p, memcg);
22797 - pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
22798 - message, task_pid_nr(p), p->comm, points);
22799 + pr_err("%s: Kill process %d:#%u (%s) score %d or sacrifice child\n",
22800 + message, task_pid_nr(p), p->xid, p->comm, points);
22803 * If any of p's children has a different mm and is eligible for kill,
22804 @@ -593,8 +602,8 @@ void oom_kill_process(struct oom_control
22806 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
22807 mark_oom_victim(victim);
22808 - pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
22809 - task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
22810 + pr_err("Killed process %d:%u (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
22811 + task_pid_nr(victim), victim->xid, victim->comm, K(victim->mm->total_vm),
22812 K(get_mm_counter(victim->mm, MM_ANONPAGES)),
22813 K(get_mm_counter(victim->mm, MM_FILEPAGES)));
22814 task_unlock(victim);
22815 @@ -630,6 +639,8 @@ void oom_kill_process(struct oom_control
22819 +long vs_oom_action(unsigned int);
22822 * Determines whether the kernel must panic because of the panic_on_oom sysctl.
22824 @@ -730,7 +741,12 @@ bool out_of_memory(struct oom_control *o
22825 /* Found nothing?!?! Either we hang forever, or we panic. */
22826 if (!p && !is_sysrq_oom(oc)) {
22827 dump_header(oc, NULL, NULL);
22828 - panic("Out of memory and no killable processes...\n");
22830 + /* avoid panic for guest OOM */
22831 + if (vx_current_xid())
22832 + vs_oom_action(LINUX_REBOOT_CMD_OOM);
22834 + panic("Out of memory and no killable processes...\n");
22836 if (p && p != (void *)-1UL) {
22837 oom_kill_process(oc, p, points, totalpages, NULL,
22838 diff -urNp -x '*.orig' linux-4.4/mm/page_alloc.c linux-4.4/mm/page_alloc.c
22839 --- linux-4.4/mm/page_alloc.c 2021-02-24 16:56:12.015760218 +0100
22840 +++ linux-4.4/mm/page_alloc.c 2021-02-24 16:56:24.616157039 +0100
22842 #include <linux/sched/rt.h>
22843 #include <linux/page_owner.h>
22844 #include <linux/kthread.h>
22845 +#include <linux/vs_base.h>
22846 +#include <linux/vs_limit.h>
22848 #include <asm/sections.h>
22849 #include <asm/tlbflush.h>
22850 @@ -3682,14 +3684,17 @@ long si_mem_available(void)
22852 pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
22853 pagecache -= min(pagecache / 2, wmark_low);
22854 - available += pagecache;
22855 + if (!vx_flags(VXF_VIRT_MEM, 0))
22856 + available += pagecache;
22859 * Part of the reclaimable slab consists of items that are in use,
22860 * and cannot be freed. Cap this estimate at the low watermark.
22862 - available += global_page_state(NR_SLAB_RECLAIMABLE) -
22863 - min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
22864 + if (!vx_flags(VXF_VIRT_MEM, 0))
22865 + available += global_page_state(NR_SLAB_RECLAIMABLE) -
22866 + min(global_page_state(NR_SLAB_RECLAIMABLE) / 2,
22871 @@ -3706,6 +3711,9 @@ void si_meminfo(struct sysinfo *val)
22872 val->totalhigh = totalhigh_pages;
22873 val->freehigh = nr_free_highpages();
22874 val->mem_unit = PAGE_SIZE;
22876 + if (vx_flags(VXF_VIRT_MEM, 0))
22877 + vx_vsi_meminfo(val);
22880 EXPORT_SYMBOL(si_meminfo);
22881 @@ -3731,6 +3739,9 @@ void si_meminfo_node(struct sysinfo *val
22884 val->mem_unit = PAGE_SIZE;
22886 + if (vx_flags(VXF_VIRT_MEM, 0))
22887 + vx_vsi_meminfo(val);
22891 diff -urNp -x '*.orig' linux-4.4/mm/pgtable-generic.c linux-4.4/mm/pgtable-generic.c
22892 --- linux-4.4/mm/pgtable-generic.c 2021-02-24 16:56:12.015760218 +0100
22893 +++ linux-4.4/mm/pgtable-generic.c 2021-02-24 16:56:24.616157039 +0100
22895 * Copyright (C) 2010 Linus Torvalds
22898 +#include <linux/mm.h>
22900 #include <linux/pagemap.h>
22901 #include <asm/tlb.h>
22902 #include <asm-generic/pgtable.h>
22903 diff -urNp -x '*.orig' linux-4.4/mm/shmem.c linux-4.4/mm/shmem.c
22904 --- linux-4.4/mm/shmem.c 2021-02-24 16:56:12.019093656 +0100
22905 +++ linux-4.4/mm/shmem.c 2021-02-24 16:56:24.616157039 +0100
22906 @@ -2200,7 +2200,7 @@ static int shmem_statfs(struct dentry *d
22908 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
22910 - buf->f_type = TMPFS_MAGIC;
22911 + buf->f_type = TMPFS_SUPER_MAGIC;
22912 buf->f_bsize = PAGE_CACHE_SIZE;
22913 buf->f_namelen = NAME_MAX;
22914 if (sbinfo->max_blocks) {
22915 @@ -3050,7 +3050,7 @@ int shmem_fill_super(struct super_block
22916 sb->s_maxbytes = MAX_LFS_FILESIZE;
22917 sb->s_blocksize = PAGE_CACHE_SIZE;
22918 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
22919 - sb->s_magic = TMPFS_MAGIC;
22920 + sb->s_magic = TMPFS_SUPER_MAGIC;
22921 sb->s_op = &shmem_ops;
22922 sb->s_time_gran = 1;
22923 #ifdef CONFIG_TMPFS_XATTR
22924 diff -urNp -x '*.orig' linux-4.4/mm/slab.c linux-4.4/mm/slab.c
22925 --- linux-4.4/mm/slab.c 2021-02-24 16:56:12.019093656 +0100
22926 +++ linux-4.4/mm/slab.c 2021-02-24 16:56:24.616157039 +0100
22927 @@ -337,6 +337,8 @@ static void kmem_cache_node_init(struct
22928 #define STATS_INC_FREEMISS(x) do { } while (0)
22931 +#include "slab_vs.h"
22936 @@ -3186,6 +3188,7 @@ slab_alloc_node(struct kmem_cache *cache
22937 /* ___cache_alloc_node can fall back to other nodes */
22938 ptr = ____cache_alloc_node(cachep, flags, nodeid);
22940 + vx_slab_alloc(cachep, flags);
22941 local_irq_restore(save_flags);
22942 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
22943 kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
22944 @@ -3374,6 +3377,7 @@ static inline void __cache_free(struct k
22946 kmemleak_free_recursive(objp, cachep->flags);
22947 objp = cache_free_debugcheck(cachep, objp, caller);
22948 + vx_slab_free(cachep);
22950 kmemcheck_slab_free(cachep, objp, cachep->object_size);
22952 diff -urNp -x '*.orig' linux-4.4/mm/slab_vs.h linux-4.4/mm/slab_vs.h
22953 --- linux-4.4/mm/slab_vs.h 1970-01-01 01:00:00.000000000 +0100
22954 +++ linux-4.4/mm/slab_vs.h 2021-02-24 16:56:24.619490478 +0100
22957 +#include <linux/vserver/context.h>
22959 +#include <linux/vs_context.h>
22962 +void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
22964 + int what = gfp_zone(cachep->allocflags);
22965 + struct vx_info *vxi = current_vx_info();
22970 + atomic_add(cachep->size, &vxi->cacct.slab[what]);
22974 +void vx_slab_free(struct kmem_cache *cachep)
22976 + int what = gfp_zone(cachep->allocflags);
22977 + struct vx_info *vxi = current_vx_info();
22982 + atomic_sub(cachep->size, &vxi->cacct.slab[what]);
22985 diff -urNp -x '*.orig' linux-4.4/mm/swapfile.c linux-4.4/mm/swapfile.c
22986 --- linux-4.4/mm/swapfile.c 2021-02-24 16:56:12.019093656 +0100
22987 +++ linux-4.4/mm/swapfile.c 2021-02-24 16:56:24.619490478 +0100
22989 #include <asm/tlbflush.h>
22990 #include <linux/swapops.h>
22991 #include <linux/swap_cgroup.h>
22992 +#include <linux/vs_base.h>
22994 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
22996 @@ -2070,6 +2071,16 @@ static int swap_show(struct seq_file *sw
22998 if (si == SEQ_START_TOKEN) {
22999 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
23000 + if (vx_flags(VXF_VIRT_MEM, 0)) {
23001 + struct sysinfo si = { 0 };
23003 + vx_vsi_swapinfo(&si);
23004 + if (si.totalswap < (1 << 10))
23006 + seq_printf(swap, "%s\t\t\t\t\t%s\t%lu\t%lu\t%d\n",
23007 + "hdv0", "partition", si.totalswap >> 10,
23008 + (si.totalswap - si.freeswap) >> 10, -1);
23013 @@ -2627,6 +2638,8 @@ void si_swapinfo(struct sysinfo *val)
23014 val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
23015 val->totalswap = total_swap_pages + nr_to_be_unused;
23016 spin_unlock(&swap_lock);
23017 + if (vx_flags(VXF_VIRT_MEM, 0))
23018 + vx_vsi_swapinfo(val);
23022 diff -urNp -x '*.orig' linux-4.4/net/bridge/br_multicast.c linux-4.4/net/bridge/br_multicast.c
23023 --- linux-4.4/net/bridge/br_multicast.c 2021-02-24 16:56:12.045761163 +0100
23024 +++ linux-4.4/net/bridge/br_multicast.c 2021-02-24 16:56:24.619490478 +0100
23025 @@ -462,7 +462,7 @@ static struct sk_buff *br_ip6_multicast_
23026 ip6h->hop_limit = 1;
23027 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
23028 if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
23030 + &ip6h->saddr, NULL)) {
23032 br->has_ipv6_addr = 0;
23034 diff -urNp -x '*.orig' linux-4.4/net/core/dev.c linux-4.4/net/core/dev.c
23035 --- linux-4.4/net/core/dev.c 2021-02-24 16:56:12.545776909 +0100
23036 +++ linux-4.4/net/core/dev.c 2021-02-24 16:56:24.619490478 +0100
23037 @@ -125,6 +125,7 @@
23038 #include <linux/in.h>
23039 #include <linux/jhash.h>
23040 #include <linux/random.h>
23041 +#include <linux/vs_inet.h>
23042 #include <trace/events/napi.h>
23043 #include <trace/events/net.h>
23044 #include <trace/events/skb.h>
23045 @@ -730,7 +731,8 @@ struct net_device *__dev_get_by_name(str
23046 struct hlist_head *head = dev_name_hash(net, name);
23048 hlist_for_each_entry(dev, head, name_hlist)
23049 - if (!strncmp(dev->name, name, IFNAMSIZ))
23050 + if (!strncmp(dev->name, name, IFNAMSIZ) &&
23051 + nx_dev_visible(current_nx_info(), dev))
23055 @@ -755,7 +757,8 @@ struct net_device *dev_get_by_name_rcu(s
23056 struct hlist_head *head = dev_name_hash(net, name);
23058 hlist_for_each_entry_rcu(dev, head, name_hlist)
23059 - if (!strncmp(dev->name, name, IFNAMSIZ))
23060 + if (!strncmp(dev->name, name, IFNAMSIZ) &&
23061 + nx_dev_visible(current_nx_info(), dev))
23065 @@ -805,7 +808,8 @@ struct net_device *__dev_get_by_index(st
23066 struct hlist_head *head = dev_index_hash(net, ifindex);
23068 hlist_for_each_entry(dev, head, index_hlist)
23069 - if (dev->ifindex == ifindex)
23070 + if ((dev->ifindex == ifindex) &&
23071 + nx_dev_visible(current_nx_info(), dev))
23075 @@ -823,7 +827,7 @@ EXPORT_SYMBOL(__dev_get_by_index);
23076 * about locking. The caller must hold RCU lock.
23079 -struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
23080 +struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex)
23082 struct net_device *dev;
23083 struct hlist_head *head = dev_index_hash(net, ifindex);
23084 @@ -834,6 +838,16 @@ struct net_device *dev_get_by_index_rcu(
23088 +EXPORT_SYMBOL(dev_get_by_index_real_rcu);
23090 +struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
23092 + struct net_device *dev = dev_get_by_index_real_rcu(net, ifindex);
23094 + if (nx_dev_visible(current_nx_info(), dev))
23098 EXPORT_SYMBOL(dev_get_by_index_rcu);
23101 @@ -911,7 +925,8 @@ struct net_device *dev_getbyhwaddr_rcu(s
23103 for_each_netdev_rcu(net, dev)
23104 if (dev->type == type &&
23105 - !memcmp(dev->dev_addr, ha, dev->addr_len))
23106 + !memcmp(dev->dev_addr, ha, dev->addr_len) &&
23107 + nx_dev_visible(current_nx_info(), dev))
23111 @@ -923,9 +938,11 @@ struct net_device *__dev_getfirstbyhwtyp
23112 struct net_device *dev;
23115 - for_each_netdev(net, dev)
23116 - if (dev->type == type)
23117 + for_each_netdev(net, dev) {
23118 + if ((dev->type == type) &&
23119 + nx_dev_visible(current_nx_info(), dev))
23125 @@ -937,7 +954,8 @@ struct net_device *dev_getfirstbyhwtype(
23128 for_each_netdev_rcu(net, dev)
23129 - if (dev->type == type) {
23130 + if ((dev->type == type) &&
23131 + nx_dev_visible(current_nx_info(), dev)) {
23135 @@ -967,7 +985,8 @@ struct net_device *__dev_get_by_flags(st
23138 for_each_netdev(net, dev) {
23139 - if (((dev->flags ^ if_flags) & mask) == 0) {
23140 + if ((((dev->flags ^ if_flags) & mask) == 0) &&
23141 + nx_dev_visible(current_nx_info(), dev)) {
23145 @@ -1045,6 +1064,8 @@ static int __dev_alloc_name(struct net *
23147 if (i < 0 || i >= max_netdevices)
23149 + if (!nx_dev_visible(current_nx_info(), d))
23152 /* avoid cases where sscanf is not exact inverse of printf */
23153 snprintf(buf, IFNAMSIZ, name, i);
23154 diff -urNp -x '*.orig' linux-4.4/net/core/net-procfs.c linux-4.4/net/core/net-procfs.c
23155 --- linux-4.4/net/core/net-procfs.c 2016-01-11 00:01:32.000000000 +0100
23156 +++ linux-4.4/net/core/net-procfs.c 2021-02-24 16:56:24.619490478 +0100
23158 #include <linux/netdevice.h>
23159 #include <linux/proc_fs.h>
23160 #include <linux/seq_file.h>
23161 +#include <linux/vs_inet.h>
23162 #include <net/wext.h>
23164 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
23165 @@ -77,8 +78,13 @@ static void dev_seq_stop(struct seq_file
23166 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
23168 struct rtnl_link_stats64 temp;
23169 - const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
23170 + const struct rtnl_link_stats64 *stats;
23172 + /* device visible inside network context? */
23173 + if (!nx_dev_visible(current_nx_info(), dev))
23176 + stats = dev_get_stats(dev, &temp);
23177 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
23178 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
23179 dev->name, stats->rx_bytes, stats->rx_packets,
23180 diff -urNp -x '*.orig' linux-4.4/net/core/rtnetlink.c linux-4.4/net/core/rtnetlink.c
23181 --- linux-4.4/net/core/rtnetlink.c 2021-02-24 16:56:12.059094916 +0100
23182 +++ linux-4.4/net/core/rtnetlink.c 2021-02-24 16:56:24.619490478 +0100
23183 @@ -1457,6 +1457,8 @@ static int rtnl_dump_ifinfo(struct sk_bu
23184 hlist_for_each_entry(dev, head, index_hlist) {
23187 + if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
23189 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
23190 NETLINK_CB(cb->skb).portid,
23191 cb->nlh->nlmsg_seq, 0,
23192 @@ -2585,6 +2587,9 @@ void rtmsg_ifinfo(int type, struct net_d
23194 struct sk_buff *skb;
23196 + if (!nx_dev_visible(current_nx_info(), dev))
23199 if (dev->reg_state != NETREG_REGISTERED)
23202 diff -urNp -x '*.orig' linux-4.4/net/core/sock.c linux-4.4/net/core/sock.c
23203 --- linux-4.4/net/core/sock.c 2021-02-24 16:56:12.059094916 +0100
23204 +++ linux-4.4/net/core/sock.c 2021-02-24 16:56:24.619490478 +0100
23205 @@ -134,6 +134,10 @@
23206 #include <linux/sock_diag.h>
23208 #include <linux/filter.h>
23209 +#include <linux/vs_socket.h>
23210 +#include <linux/vs_limit.h>
23211 +#include <linux/vs_context.h>
23212 +#include <linux/vs_network.h>
23214 #include <trace/events/sock.h>
23216 @@ -1365,6 +1369,8 @@ static struct sock *sk_prot_alloc(struct
23218 sk_tx_queue_clear(sk);
23220 + sock_vx_init(sk);
23221 + sock_nx_init(sk);
23225 @@ -1472,6 +1478,11 @@ void sk_destruct(struct sock *sk)
23226 put_pid(sk->sk_peer_pid);
23227 if (likely(sk->sk_net_refcnt))
23228 put_net(sock_net(sk));
23230 + clr_vx_info(&sk->sk_vx_info);
23232 + clr_nx_info(&sk->sk_nx_info);
23234 sk_prot_free(sk->sk_prot_creator, sk);
23237 @@ -1524,6 +1535,8 @@ struct sock *sk_clone_lock(const struct
23239 if (likely(newsk->sk_net_refcnt))
23240 get_net(sock_net(newsk));
23241 + sock_vx_init(newsk);
23242 + sock_nx_init(newsk);
23243 sk_node_init(&newsk->sk_node);
23244 sock_lock_init(newsk);
23245 bh_lock_sock(newsk);
23246 @@ -1589,6 +1602,12 @@ struct sock *sk_clone_lock(const struct
23248 atomic_set(&newsk->sk_refcnt, 2);
23250 + set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
23251 + newsk->sk_xid = sk->sk_xid;
23252 + vx_sock_inc(newsk);
23253 + set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
23254 + newsk->sk_nid = sk->sk_nid;
23257 * Increment the counter in the same struct proto as the master
23258 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
23259 @@ -2452,6 +2471,12 @@ void sock_init_data(struct socket *sock,
23260 seqlock_init(&sk->sk_stamp_seq);
23263 + set_vx_info(&sk->sk_vx_info, current_vx_info());
23264 + sk->sk_xid = vx_current_xid();
23266 + set_nx_info(&sk->sk_nx_info, current_nx_info());
23267 + sk->sk_nid = nx_current_nid();
23269 #ifdef CONFIG_NET_RX_BUSY_POLL
23270 sk->sk_napi_id = 0;
23271 sk->sk_ll_usec = sysctl_net_busy_read;
23272 diff -urNp -x '*.orig' linux-4.4/net/ipv4/af_inet.c linux-4.4/net/ipv4/af_inet.c
23273 --- linux-4.4/net/ipv4/af_inet.c 2021-02-24 16:56:12.069095231 +0100
23274 +++ linux-4.4/net/ipv4/af_inet.c 2021-02-24 16:56:24.622823916 +0100
23275 @@ -308,10 +308,15 @@ lookup_protocol:
23279 + if ((protocol == IPPROTO_ICMP) &&
23280 + nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
23283 if (sock->type == SOCK_RAW && !kern &&
23284 !ns_capable(net->user_ns, CAP_NET_RAW))
23285 goto out_rcu_unlock;
23288 sock->ops = answer->ops;
23289 answer_prot = answer->prot;
23290 answer_flags = answer->flags;
23291 @@ -425,6 +430,7 @@ int inet_bind(struct socket *sock, struc
23292 struct sock *sk = sock->sk;
23293 struct inet_sock *inet = inet_sk(sk);
23294 struct net *net = sock_net(sk);
23295 + struct nx_v4_sock_addr nsa;
23296 unsigned short snum;
23298 u32 tb_id = RT_TABLE_LOCAL;
23299 @@ -450,7 +456,11 @@ int inet_bind(struct socket *sock, struc
23302 tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
23303 - chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
23304 + err = v4_map_sock_addr(inet, addr, &nsa);
23308 + chk_addr_ret = inet_addr_type_table(net, nsa.saddr, tb_id);
23310 /* Not specified by any standard per-se, however it breaks too
23311 * many applications when removed. It is unfortunate since
23312 @@ -462,7 +472,7 @@ int inet_bind(struct socket *sock, struc
23313 err = -EADDRNOTAVAIL;
23314 if (!net->ipv4.sysctl_ip_nonlocal_bind &&
23315 !(inet->freebind || inet->transparent) &&
23316 - addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
23317 + nsa.saddr != htonl(INADDR_ANY) &&
23318 chk_addr_ret != RTN_LOCAL &&
23319 chk_addr_ret != RTN_MULTICAST &&
23320 chk_addr_ret != RTN_BROADCAST)
23321 @@ -488,7 +498,7 @@ int inet_bind(struct socket *sock, struc
23322 if (sk->sk_state != TCP_CLOSE || inet->inet_num)
23323 goto out_release_sock;
23325 - inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
23326 + v4_set_sock_addr(inet, &nsa);
23327 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
23328 inet->inet_saddr = 0; /* Use device */
23330 @@ -708,11 +718,13 @@ int inet_getname(struct socket *sock, st
23333 sin->sin_port = inet->inet_dport;
23334 - sin->sin_addr.s_addr = inet->inet_daddr;
23335 + sin->sin_addr.s_addr =
23336 + nx_map_sock_lback(sk->sk_nx_info, inet->inet_daddr);
23338 __be32 addr = inet->inet_rcv_saddr;
23340 addr = inet->inet_saddr;
23341 + addr = nx_map_sock_lback(sk->sk_nx_info, addr);
23342 sin->sin_port = inet->inet_sport;
23343 sin->sin_addr.s_addr = addr;
23345 @@ -896,6 +908,7 @@ static int inet_compat_ioctl(struct sock
23349 +#include <linux/vs_limit.h>
23351 const struct proto_ops inet_stream_ops = {
23353 diff -urNp -x '*.orig' linux-4.4/net/ipv4/arp.c linux-4.4/net/ipv4/arp.c
23354 --- linux-4.4/net/ipv4/arp.c 2021-02-24 16:56:12.069095231 +0100
23355 +++ linux-4.4/net/ipv4/arp.c 2021-02-24 16:56:24.622823916 +0100
23356 @@ -1307,6 +1307,7 @@ static void arp_format_neigh_entry(struc
23357 struct net_device *dev = n->dev;
23358 int hatype = dev->type;
23360 + /* FIXME: check for network context */
23361 read_lock(&n->lock);
23362 /* Convert hardware address to XX:XX:XX:XX ... form. */
23363 #if IS_ENABLED(CONFIG_AX25)
23364 @@ -1338,6 +1339,7 @@ static void arp_format_pneigh_entry(stru
23365 int hatype = dev ? dev->type : 0;
23368 + /* FIXME: check for network context */
23369 sprintf(tbuf, "%pI4", n->key);
23370 seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
23371 tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
23372 diff -urNp -x '*.orig' linux-4.4/net/ipv4/devinet.c linux-4.4/net/ipv4/devinet.c
23373 --- linux-4.4/net/ipv4/devinet.c 2021-02-24 16:56:12.069095231 +0100
23374 +++ linux-4.4/net/ipv4/devinet.c 2021-02-24 16:56:24.622823916 +0100
23375 @@ -547,6 +547,7 @@ struct in_device *inetdev_by_index(struc
23377 EXPORT_SYMBOL(inetdev_by_index);
23380 /* Called only from RTNL semaphored context. No locks. */
23382 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
23383 @@ -1006,6 +1007,8 @@ int devinet_ioctl(struct net *net, unsig
23385 in_dev = __in_dev_get_rtnl(dev);
23387 + struct nx_info *nxi = current_nx_info();
23389 if (tryaddrmatch) {
23390 /* Matthias Andree */
23391 /* compare label and address (4.4BSD style) */
23392 @@ -1014,6 +1017,8 @@ int devinet_ioctl(struct net *net, unsig
23393 This is checked above. */
23394 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
23395 ifap = &ifa->ifa_next) {
23396 + if (!nx_v4_ifa_visible(nxi, ifa))
23398 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
23399 sin_orig.sin_addr.s_addr ==
23401 @@ -1026,9 +1031,12 @@ int devinet_ioctl(struct net *net, unsig
23402 comparing just the label */
23404 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
23405 - ifap = &ifa->ifa_next)
23406 + ifap = &ifa->ifa_next) {
23407 + if (!nx_v4_ifa_visible(nxi, ifa))
23409 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
23415 @@ -1182,6 +1190,8 @@ static int inet_gifconf(struct net_devic
23418 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
23419 + if (!nx_v4_ifa_visible(current_nx_info(), ifa))
23422 done += sizeof(ifr);
23424 @@ -1582,6 +1592,7 @@ static int inet_dump_ifaddr(struct sk_bu
23425 struct net_device *dev;
23426 struct in_device *in_dev;
23427 struct in_ifaddr *ifa;
23428 + struct sock *sk = skb->sk;
23429 struct hlist_head *head;
23432 @@ -1605,6 +1616,8 @@ static int inet_dump_ifaddr(struct sk_bu
23434 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
23435 ifa = ifa->ifa_next, ip_idx++) {
23436 + if (sk && !nx_v4_ifa_visible(sk->sk_nx_info, ifa))
23438 if (ip_idx < s_ip_idx)
23440 if (inet_fill_ifaddr(skb, ifa,
23441 diff -urNp -x '*.orig' linux-4.4/net/ipv4/fib_trie.c linux-4.4/net/ipv4/fib_trie.c
23442 --- linux-4.4/net/ipv4/fib_trie.c 2021-02-24 16:56:12.072428669 +0100
23443 +++ linux-4.4/net/ipv4/fib_trie.c 2021-02-24 16:56:24.622823916 +0100
23444 @@ -2606,6 +2606,7 @@ static int fib_route_seq_show(struct seq
23446 seq_setwidth(seq, 127);
23448 + /* FIXME: check for network context? */
23451 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
23452 diff -urNp -x '*.orig' linux-4.4/net/ipv4/inet_connection_sock.c linux-4.4/net/ipv4/inet_connection_sock.c
23453 --- linux-4.4/net/ipv4/inet_connection_sock.c 2021-02-24 16:56:12.072428669 +0100
23454 +++ linux-4.4/net/ipv4/inet_connection_sock.c 2021-02-24 16:56:24.622823916 +0100
23455 @@ -43,6 +43,37 @@ void inet_get_local_port_range(struct ne
23457 EXPORT_SYMBOL(inet_get_local_port_range);
23459 +int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
23461 + __be32 sk1_rcv_saddr = sk1->sk_rcv_saddr,
23462 + sk2_rcv_saddr = sk2->sk_rcv_saddr;
23464 + if (inet_v6_ipv6only(sk2))
23467 + if (sk1_rcv_saddr &&
23469 + sk1_rcv_saddr == sk2_rcv_saddr)
23472 + if (sk1_rcv_saddr &&
23473 + !sk2_rcv_saddr &&
23474 + v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, NXA_MASK_BIND))
23477 + if (sk2_rcv_saddr &&
23478 + !sk1_rcv_saddr &&
23479 + v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, NXA_MASK_BIND))
23482 + if (!sk1_rcv_saddr &&
23483 + !sk2_rcv_saddr &&
23484 + nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info))
23490 int inet_csk_bind_conflict(const struct sock *sk,
23491 const struct inet_bind_bucket *tb, bool relax)
23493 @@ -70,15 +101,13 @@ int inet_csk_bind_conflict(const struct
23494 (sk2->sk_state != TCP_TIME_WAIT &&
23495 !uid_eq(uid, sock_i_uid(sk2))))) {
23497 - if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
23498 - sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
23499 + if (ipv4_rcv_saddr_equal(sk, sk2))
23502 if (!relax && reuse && sk2->sk_reuse &&
23503 sk2->sk_state != TCP_LISTEN) {
23505 - if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
23506 - sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
23507 + if (ipv4_rcv_saddr_equal(sk, sk2))
23511 diff -urNp -x '*.orig' linux-4.4/net/ipv4/inet_diag.c linux-4.4/net/ipv4/inet_diag.c
23512 --- linux-4.4/net/ipv4/inet_diag.c 2021-02-24 16:56:12.072428669 +0100
23513 +++ linux-4.4/net/ipv4/inet_diag.c 2021-02-24 16:56:24.622823916 +0100
23516 #include <linux/inet.h>
23517 #include <linux/stddef.h>
23518 +#include <linux/vs_network.h>
23519 +#include <linux/vs_inet.h>
23521 #include <linux/inet_diag.h>
23522 #include <linux/sock_diag.h>
23523 @@ -85,8 +87,8 @@ static void inet_diag_msg_common_fill(st
23524 memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
23525 memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
23527 - r->id.idiag_src[0] = sk->sk_rcv_saddr;
23528 - r->id.idiag_dst[0] = sk->sk_daddr;
23529 + r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, sk->sk_rcv_saddr);
23530 + r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, sk->sk_daddr);
23534 @@ -768,6 +770,9 @@ void inet_diag_dump_icsk(struct inet_has
23535 if (!net_eq(sock_net(sk), net))
23538 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23544 @@ -829,6 +834,8 @@ skip_listen_ht:
23546 if (!net_eq(sock_net(sk), net))
23548 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23552 state = (sk->sk_state == TCP_TIME_WAIT) ?
23553 diff -urNp -x '*.orig' linux-4.4/net/ipv4/inet_hashtables.c linux-4.4/net/ipv4/inet_hashtables.c
23554 --- linux-4.4/net/ipv4/inet_hashtables.c 2021-02-24 16:56:12.072428669 +0100
23555 +++ linux-4.4/net/ipv4/inet_hashtables.c 2021-02-24 16:56:24.622823916 +0100
23557 #include <net/inet_connection_sock.h>
23558 #include <net/inet_hashtables.h>
23559 #include <net/secure_seq.h>
23560 +#include <net/route.h>
23561 #include <net/ip.h>
23563 static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
23564 @@ -184,6 +185,11 @@ static inline int compute_score(struct s
23565 if (rcv_saddr != daddr)
23569 + /* block non nx_info ips */
23570 + if (!v4_addr_in_nx_info(sk->sk_nx_info,
23571 + daddr, NXA_MASK_BIND))
23574 if (sk->sk_bound_dev_if) {
23575 if (sk->sk_bound_dev_if != dif)
23576 @@ -203,7 +209,6 @@ static inline int compute_score(struct s
23577 * wildcarded during the search since they can never be otherwise.
23581 struct sock *__inet_lookup_listener(struct net *net,
23582 struct inet_hashinfo *hashinfo,
23583 const __be32 saddr, __be16 sport,
23584 @@ -239,6 +244,7 @@ begin:
23585 phash = next_pseudo_random32(phash);
23590 * if the nulls value we got at the end of this lookup is
23591 * not the expected one, we must restart lookup.
23592 diff -urNp -x '*.orig' linux-4.4/net/ipv4/netfilter.c linux-4.4/net/ipv4/netfilter.c
23593 --- linux-4.4/net/ipv4/netfilter.c 2021-02-24 16:56:12.079095546 +0100
23594 +++ linux-4.4/net/ipv4/netfilter.c 2021-02-24 16:56:24.622823916 +0100
23596 #include <linux/skbuff.h>
23597 #include <linux/gfp.h>
23598 #include <linux/export.h>
23599 -#include <net/route.h>
23600 +// #include <net/route.h>
23601 #include <net/xfrm.h>
23602 #include <net/ip.h>
23603 #include <net/netfilter/nf_queue.h>
23604 diff -urNp -x '*.orig' linux-4.4/net/ipv4/raw.c linux-4.4/net/ipv4/raw.c
23605 --- linux-4.4/net/ipv4/raw.c 2021-02-24 16:56:12.082428984 +0100
23606 +++ linux-4.4/net/ipv4/raw.c 2021-02-24 16:56:24.622823916 +0100
23607 @@ -126,7 +126,7 @@ static struct sock *__raw_v4_lookup(stru
23609 if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
23610 !(inet->inet_daddr && inet->inet_daddr != raddr) &&
23611 - !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
23612 + v4_sock_addr_match(sk->sk_nx_info, inet, laddr) &&
23613 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
23614 goto found; /* gotcha */
23616 @@ -416,6 +416,12 @@ static int raw_send_hdrinc(struct sock *
23617 skb_transport_header(skb))->type);
23621 + if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
23622 + sk->sk_nx_info &&
23623 + !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
23626 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
23627 net, sk, skb, NULL, rt->dst.dev,
23629 @@ -628,6 +634,16 @@ static int raw_sendmsg(struct sock *sk,
23633 + if (sk->sk_nx_info) {
23634 + rt = ip_v4_find_src(sock_net(sk), sk->sk_nx_info, &fl4);
23635 + if (IS_ERR(rt)) {
23636 + err = PTR_ERR(rt);
23643 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
23644 rt = ip_route_output_flow(net, &fl4, sk);
23646 @@ -706,17 +722,19 @@ static int raw_bind(struct sock *sk, str
23648 struct inet_sock *inet = inet_sk(sk);
23649 struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
23650 + struct nx_v4_sock_addr nsa = { 0 };
23654 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
23656 - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
23657 + v4_map_sock_addr(inet, addr, &nsa);
23658 + chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
23659 ret = -EADDRNOTAVAIL;
23660 - if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
23661 + if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
23662 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
23664 - inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
23665 + v4_set_sock_addr(inet, &nsa);
23666 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
23667 inet->inet_saddr = 0; /* Use device */
23669 @@ -765,7 +783,8 @@ static int raw_recvmsg(struct sock *sk,
23670 /* Copy the address. */
23672 sin->sin_family = AF_INET;
23673 - sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
23674 + sin->sin_addr.s_addr =
23675 + nx_map_sock_lback(sk->sk_nx_info, ip_hdr(skb)->saddr);
23677 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
23678 *addr_len = sizeof(*sin);
23679 @@ -961,7 +980,8 @@ static struct sock *raw_get_first(struct
23680 for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
23682 sk_for_each(sk, &state->h->ht[state->bucket])
23683 - if (sock_net(sk) == seq_file_net(seq))
23684 + if ((sock_net(sk) == seq_file_net(seq)) &&
23685 + nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23689 @@ -977,7 +997,8 @@ static struct sock *raw_get_next(struct
23693 - } while (sk && sock_net(sk) != seq_file_net(seq));
23694 + } while (sk && ((sock_net(sk) != seq_file_net(seq)) ||
23695 + !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
23697 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
23698 sk = sk_head(&state->h->ht[state->bucket]);
23699 diff -urNp -x '*.orig' linux-4.4/net/ipv4/route.c linux-4.4/net/ipv4/route.c
23700 --- linux-4.4/net/ipv4/route.c 2021-02-24 16:56:12.082428984 +0100
23701 +++ linux-4.4/net/ipv4/route.c 2021-02-24 16:56:24.626157354 +0100
23702 @@ -2282,7 +2282,7 @@ struct rtable *__ip_route_output_key_has
23705 if (fl4->flowi4_oif) {
23706 - dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
23707 + dev_out = dev_get_by_index_real_rcu(net, fl4->flowi4_oif);
23708 rth = ERR_PTR(-ENODEV);
23711 diff -urNp -x '*.orig' linux-4.4/net/ipv4/tcp.c linux-4.4/net/ipv4/tcp.c
23712 --- linux-4.4/net/ipv4/tcp.c 2021-02-24 16:56:12.082428984 +0100
23713 +++ linux-4.4/net/ipv4/tcp.c 2021-02-24 16:56:24.626157354 +0100
23714 @@ -269,6 +269,7 @@
23715 #include <linux/crypto.h>
23716 #include <linux/time.h>
23717 #include <linux/slab.h>
23718 +#include <linux/in.h>
23720 #include <net/icmp.h>
23721 #include <net/inet_common.h>
23722 diff -urNp -x '*.orig' linux-4.4/net/ipv4/tcp_ipv4.c linux-4.4/net/ipv4/tcp_ipv4.c
23723 --- linux-4.4/net/ipv4/tcp_ipv4.c 2021-02-24 16:56:12.085762422 +0100
23724 +++ linux-4.4/net/ipv4/tcp_ipv4.c 2021-02-24 16:56:24.626157354 +0100
23725 @@ -1900,6 +1900,10 @@ static void *listening_get_next(struct s
23726 sk = sk_nulls_next(sk);
23728 sk_nulls_for_each_from(sk, node) {
23729 + vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
23730 + sk, sk->sk_nid, nx_current_nid());
23731 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23733 if (!net_eq(sock_net(sk), net))
23735 if (sk->sk_family == st->family) {
23736 @@ -1964,6 +1968,11 @@ static void *established_get_first(struc
23738 spin_lock_bh(lock);
23739 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
23740 + vxdprintk(VXD_CBIT(net, 6),
23741 + "sk,egf: %p [#%d] (from %d)",
23742 + sk, sk->sk_nid, nx_current_nid());
23743 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23745 if (sk->sk_family != st->family ||
23746 !net_eq(sock_net(sk), net)) {
23748 @@ -1990,6 +1999,11 @@ static void *established_get_next(struct
23749 sk = sk_nulls_next(sk);
23751 sk_nulls_for_each_from(sk, node) {
23752 + vxdprintk(VXD_CBIT(net, 6),
23753 + "sk,egn: %p [#%d] (from %d)",
23754 + sk, sk->sk_nid, nx_current_nid());
23755 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23757 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
23760 @@ -2181,9 +2195,9 @@ static void get_openreq4(const struct re
23761 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
23762 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
23764 - ireq->ir_loc_addr,
23765 + nx_map_sock_lback(current_nx_info(), ireq->ir_loc_addr),
23767 - ireq->ir_rmt_addr,
23768 + nx_map_sock_lback(current_nx_info(), ireq->ir_rmt_addr),
23769 ntohs(ireq->ir_rmt_port),
23771 0, 0, /* could print option size, but that is af dependent. */
23772 @@ -2206,8 +2220,8 @@ static void get_tcp4_sock(struct sock *s
23773 const struct inet_connection_sock *icsk = inet_csk(sk);
23774 const struct inet_sock *inet = inet_sk(sk);
23775 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
23776 - __be32 dest = inet->inet_daddr;
23777 - __be32 src = inet->inet_rcv_saddr;
23778 + __be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
23779 + __be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
23780 __u16 destp = ntohs(inet->inet_dport);
23781 __u16 srcp = ntohs(inet->inet_sport);
23783 @@ -2266,8 +2280,8 @@ static void get_timewait4_sock(const str
23787 - dest = tw->tw_daddr;
23788 - src = tw->tw_rcv_saddr;
23789 + dest = nx_map_sock_lback(current_nx_info(), tw->tw_daddr);
23790 + src = nx_map_sock_lback(current_nx_info(), tw->tw_rcv_saddr);
23791 destp = ntohs(tw->tw_dport);
23792 srcp = ntohs(tw->tw_sport);
23794 diff -urNp -x '*.orig' linux-4.4/net/ipv4/tcp_minisocks.c linux-4.4/net/ipv4/tcp_minisocks.c
23795 --- linux-4.4/net/ipv4/tcp_minisocks.c 2021-02-24 16:56:12.089095860 +0100
23796 +++ linux-4.4/net/ipv4/tcp_minisocks.c 2021-02-24 16:56:24.626157354 +0100
23798 #include <linux/slab.h>
23799 #include <linux/sysctl.h>
23800 #include <linux/workqueue.h>
23801 +#include <linux/vs_limit.h>
23802 +#include <linux/vs_socket.h>
23803 +#include <linux/vs_context.h>
23804 #include <net/tcp.h>
23805 #include <net/inet_common.h>
23806 #include <net/xfrm.h>
23807 @@ -292,6 +295,11 @@ void tcp_time_wait(struct sock *sk, int
23808 tcptw->tw_ts_offset = tp->tsoffset;
23809 tcptw->tw_last_oow_ack_time = 0;
23811 + tw->tw_xid = sk->sk_xid;
23812 + tw->tw_vx_info = NULL;
23813 + tw->tw_nid = sk->sk_nid;
23814 + tw->tw_nx_info = NULL;
23816 #if IS_ENABLED(CONFIG_IPV6)
23817 if (tw->tw_family == PF_INET6) {
23818 struct ipv6_pinfo *np = inet6_sk(sk);
23819 diff -urNp -x '*.orig' linux-4.4/net/ipv4/udp.c linux-4.4/net/ipv4/udp.c
23820 --- linux-4.4/net/ipv4/udp.c 2021-02-24 16:56:12.089095860 +0100
23821 +++ linux-4.4/net/ipv4/udp.c 2021-02-24 16:56:24.626157354 +0100
23822 @@ -309,14 +309,7 @@ fail:
23824 EXPORT_SYMBOL(udp_lib_get_port);
23826 -static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
23828 - struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
23830 - return (!ipv6_only_sock(sk2) &&
23831 - (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
23832 - inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
23834 +extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
23836 static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
23838 @@ -355,6 +348,11 @@ static inline int compute_score(struct s
23839 if (inet->inet_rcv_saddr != daddr)
23843 + /* block non nx_info ips */
23844 + if (!v4_addr_in_nx_info(sk->sk_nx_info,
23845 + daddr, NXA_MASK_BIND))
23849 if (inet->inet_daddr) {
23850 @@ -489,6 +487,7 @@ begin:
23855 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
23856 * harder than this. -DaveM
23858 @@ -535,6 +534,11 @@ begin:
23859 sk_nulls_for_each_rcu(sk, node, &hslot->head) {
23860 score = compute_score(sk, net, saddr, hnum, sport,
23861 daddr, dport, dif);
23862 + /* FIXME: disabled?
23863 + if (score == 9) {
23867 if (score > badness) {
23870 @@ -559,6 +563,7 @@ begin:
23871 if (get_nulls_value(node) != slot)
23876 if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
23878 @@ -568,6 +573,7 @@ begin:
23886 @@ -602,7 +608,7 @@ static inline bool __udp_is_mcast_sock(s
23887 udp_sk(sk)->udp_port_hash != hnum ||
23888 (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
23889 (inet->inet_dport != rmt_port && inet->inet_dport) ||
23890 - (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
23891 + !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr) ||
23892 ipv6_only_sock(sk) ||
23893 (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
23895 @@ -1035,6 +1041,16 @@ int udp_sendmsg(struct sock *sk, struct
23899 + if (sk->sk_nx_info) {
23900 + rt = ip_v4_find_src(net, sk->sk_nx_info, fl4);
23901 + if (IS_ERR(rt)) {
23902 + err = PTR_ERR(rt);
23909 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
23910 rt = ip_route_output_flow(net, fl4, sk);
23912 @@ -1340,7 +1356,8 @@ try_again:
23914 sin->sin_family = AF_INET;
23915 sin->sin_port = udp_hdr(skb)->source;
23916 - sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
23917 + sin->sin_addr.s_addr = nx_map_sock_lback(
23918 + skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
23919 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
23920 *addr_len = sizeof(*sin);
23922 @@ -2329,6 +2346,8 @@ static struct sock *udp_get_first(struct
23923 sk_nulls_for_each(sk, node, &hslot->head) {
23924 if (!net_eq(sock_net(sk), net))
23926 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23928 if (sk->sk_family == state->family)
23931 @@ -2346,7 +2365,9 @@ static struct sock *udp_get_next(struct
23934 sk = sk_nulls_next(sk);
23935 - } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
23936 + } while (sk && (!net_eq(sock_net(sk), net) ||
23937 + sk->sk_family != state->family ||
23938 + !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
23941 if (state->bucket <= state->udp_table->mask)
23942 @@ -2442,8 +2463,8 @@ static void udp4_format_sock(struct sock
23945 struct inet_sock *inet = inet_sk(sp);
23946 - __be32 dest = inet->inet_daddr;
23947 - __be32 src = inet->inet_rcv_saddr;
23948 + __be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
23949 + __be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
23950 __u16 destp = ntohs(inet->inet_dport);
23951 __u16 srcp = ntohs(inet->inet_sport);
23953 diff -urNp -x '*.orig' linux-4.4/net/ipv4/udp_diag.c linux-4.4/net/ipv4/udp_diag.c
23954 --- linux-4.4/net/ipv4/udp_diag.c 2016-01-11 00:01:32.000000000 +0100
23955 +++ linux-4.4/net/ipv4/udp_diag.c 2021-02-24 16:56:24.626157354 +0100
23956 @@ -118,6 +118,8 @@ static void udp_dump(struct udp_table *t
23958 if (!net_eq(sock_net(sk), net))
23960 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23964 if (!(r->idiag_states & (1 << sk->sk_state)))
23965 diff -urNp -x '*.orig' linux-4.4/net/ipv6/addrconf.c linux-4.4/net/ipv6/addrconf.c
23966 --- linux-4.4/net/ipv6/addrconf.c 2021-02-24 16:56:12.092429299 +0100
23967 +++ linux-4.4/net/ipv6/addrconf.c 2021-02-24 16:56:24.626157354 +0100
23969 #include <linux/proc_fs.h>
23970 #include <linux/seq_file.h>
23971 #include <linux/export.h>
23972 +#include <linux/vs_network.h>
23973 +#include <linux/vs_inet6.h>
23975 /* Set to 3 to get tracing... */
23976 #define ACONF_DEBUG 2
23977 @@ -1453,7 +1455,8 @@ static int __ipv6_dev_get_saddr(struct n
23978 struct ipv6_saddr_dst *dst,
23979 struct inet6_dev *idev,
23980 struct ipv6_saddr_score *scores,
23983 + struct nx_info *nxi)
23985 struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
23987 @@ -1483,6 +1486,8 @@ static int __ipv6_dev_get_saddr(struct n
23991 + if (!v6_addr_in_nx_info(nxi, &score->ifa->addr, -1))
23995 bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
23996 @@ -1530,7 +1535,7 @@ out:
23998 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
23999 const struct in6_addr *daddr, unsigned int prefs,
24000 - struct in6_addr *saddr)
24001 + struct in6_addr *saddr, struct nx_info *nxi)
24003 struct ipv6_saddr_score scores[2], *hiscore;
24004 struct ipv6_saddr_dst dst;
24005 @@ -1579,13 +1584,15 @@ int ipv6_dev_get_saddr(struct net *net,
24007 if (use_oif_addr) {
24009 - hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
24010 + hiscore_idx = __ipv6_dev_get_saddr(net, &dst,
24011 + idev, scores, hiscore_idx, nxi);
24013 for_each_netdev_rcu(net, dev) {
24014 idev = __in6_dev_get(dev);
24017 - hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
24018 + hiscore_idx = __ipv6_dev_get_saddr(net, &dst,
24019 + idev, scores, hiscore_idx, nxi);
24023 @@ -3866,7 +3873,10 @@ static void if6_seq_stop(struct seq_file
24024 static int if6_seq_show(struct seq_file *seq, void *v)
24026 struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
24027 - seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
24029 + if (nx_check(0, VS_ADMIN|VS_WATCH) ||
24030 + v6_addr_in_nx_info(current_nx_info(), &ifp->addr, -1))
24031 + seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
24033 ifp->idev->dev->ifindex,
24035 @@ -4450,6 +4460,11 @@ static int in6_dump_addrs(struct inet6_d
24036 struct ifacaddr6 *ifaca;
24038 int ip_idx = *p_ip_idx;
24039 + struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
24041 + /* disable ipv6 on non v6 guests */
24042 + if (nxi && !nx_info_has_v6(nxi))
24045 read_lock_bh(&idev->lock);
24047 @@ -4460,6 +4475,8 @@ static int in6_dump_addrs(struct inet6_d
24048 list_for_each_entry(ifa, &idev->addr_list, if_list) {
24049 if (ip_idx < s_ip_idx)
24051 + if (!v6_addr_in_nx_info(nxi, &ifa->addr, -1))
24053 err = inet6_fill_ifaddr(skb, ifa,
24054 NETLINK_CB(cb->skb).portid,
24055 cb->nlh->nlmsg_seq,
24056 @@ -4479,6 +4496,8 @@ next:
24057 ifmca = ifmca->next, ip_idx++) {
24058 if (ip_idx < s_ip_idx)
24060 + if (!v6_addr_in_nx_info(nxi, &ifmca->mca_addr, -1))
24062 err = inet6_fill_ifmcaddr(skb, ifmca,
24063 NETLINK_CB(cb->skb).portid,
24064 cb->nlh->nlmsg_seq,
24065 @@ -4494,6 +4513,8 @@ next:
24066 ifaca = ifaca->aca_next, ip_idx++) {
24067 if (ip_idx < s_ip_idx)
24069 + if (!v6_addr_in_nx_info(nxi, &ifaca->aca_addr, -1))
24071 err = inet6_fill_ifacaddr(skb, ifaca,
24072 NETLINK_CB(cb->skb).portid,
24073 cb->nlh->nlmsg_seq,
24074 @@ -4522,6 +4543,10 @@ static int inet6_dump_addr(struct sk_buf
24075 struct inet6_dev *idev;
24076 struct hlist_head *head;
24078 + /* FIXME: maybe disable ipv6 on non v6 guests?
24079 + if (skb->sk && skb->sk->sk_vx_info)
24080 + return skb->len; */
24083 s_idx = idx = cb->args[1];
24084 s_ip_idx = ip_idx = cb->args[2];
24085 @@ -5030,6 +5055,7 @@ static int inet6_dump_ifinfo(struct sk_b
24086 struct net_device *dev;
24087 struct inet6_dev *idev;
24088 struct hlist_head *head;
24089 + struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
24092 s_idx = cb->args[1];
24093 @@ -5041,6 +5067,8 @@ static int inet6_dump_ifinfo(struct sk_b
24094 hlist_for_each_entry_rcu(dev, head, index_hlist) {
24097 + if (!v6_dev_in_nx_info(dev, nxi))
24099 idev = __in6_dev_get(dev);
24102 diff -urNp -x '*.orig' linux-4.4/net/ipv6/af_inet6.c linux-4.4/net/ipv6/af_inet6.c
24103 --- linux-4.4/net/ipv6/af_inet6.c 2021-02-24 16:56:12.092429299 +0100
24104 +++ linux-4.4/net/ipv6/af_inet6.c 2021-02-24 16:56:24.626157354 +0100
24106 #include <linux/netdevice.h>
24107 #include <linux/icmpv6.h>
24108 #include <linux/netfilter_ipv6.h>
24109 +#include <linux/vs_inet.h>
24110 +#include <linux/vs_inet6.h>
24112 #include <net/ip.h>
24113 #include <net/ipv6.h>
24114 @@ -158,10 +160,13 @@ lookup_protocol:
24118 + if ((protocol == IPPROTO_ICMPV6) &&
24119 + nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
24121 if (sock->type == SOCK_RAW && !kern &&
24122 !ns_capable(net->user_ns, CAP_NET_RAW))
24123 goto out_rcu_unlock;
24126 sock->ops = answer->ops;
24127 answer_prot = answer->prot;
24128 answer_flags = answer->flags;
24129 @@ -259,6 +264,7 @@ int inet6_bind(struct socket *sock, stru
24130 struct inet_sock *inet = inet_sk(sk);
24131 struct ipv6_pinfo *np = inet6_sk(sk);
24132 struct net *net = sock_net(sk);
24133 + struct nx_v6_sock_addr nsa;
24135 unsigned short snum;
24137 @@ -274,6 +280,10 @@ int inet6_bind(struct socket *sock, stru
24138 if (addr->sin6_family != AF_INET6)
24139 return -EAFNOSUPPORT;
24141 + err = v6_map_sock_addr(inet, addr, &nsa);
24145 addr_type = ipv6_addr_type(&addr->sin6_addr);
24146 if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
24148 @@ -326,6 +336,10 @@ int inet6_bind(struct socket *sock, stru
24149 err = -EADDRNOTAVAIL;
24152 + if (!v4_addr_in_nx_info(sk->sk_nx_info, v4addr, NXA_MASK_BIND)) {
24153 + err = -EADDRNOTAVAIL;
24157 if (addr_type != IPV6_ADDR_ANY) {
24158 struct net_device *dev = NULL;
24159 @@ -355,6 +369,11 @@ int inet6_bind(struct socket *sock, stru
24163 + if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
24164 + err = -EADDRNOTAVAIL;
24168 /* ipv4 addr of the socket is invalid. Only the
24169 * unspecified and mapped address have a v4 equivalent.
24171 @@ -372,6 +391,9 @@ int inet6_bind(struct socket *sock, stru
24175 + /* what's that for? */
24176 + v6_set_sock_addr(inet, &nsa);
24178 inet->inet_rcv_saddr = v4addr;
24179 inet->inet_saddr = v4addr;
24181 @@ -476,9 +498,11 @@ int inet6_getname(struct socket *sock, s
24183 sin->sin6_port = inet->inet_dport;
24184 sin->sin6_addr = sk->sk_v6_daddr;
24185 + /* FIXME: remap lback? */
24187 sin->sin6_flowinfo = np->flow_label;
24189 + /* FIXME: remap lback? */
24190 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
24191 sin->sin6_addr = np->saddr;
24193 diff -urNp -x '*.orig' linux-4.4/net/ipv6/datagram.c linux-4.4/net/ipv6/datagram.c
24194 --- linux-4.4/net/ipv6/datagram.c 2021-02-24 16:56:12.092429299 +0100
24195 +++ linux-4.4/net/ipv6/datagram.c 2021-02-24 16:56:24.629490793 +0100
24196 @@ -737,7 +737,7 @@ int ip6_datagram_send_ctl(struct net *ne
24199 if (fl6->flowi6_oif) {
24200 - dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
24201 + dev = dev_get_by_index_real_rcu(net, fl6->flowi6_oif);
24205 diff -urNp -x '*.orig' linux-4.4/net/ipv6/fib6_rules.c linux-4.4/net/ipv6/fib6_rules.c
24206 --- linux-4.4/net/ipv6/fib6_rules.c 2021-02-24 16:56:12.092429299 +0100
24207 +++ linux-4.4/net/ipv6/fib6_rules.c 2021-02-24 16:56:24.629490793 +0100
24208 @@ -97,7 +97,7 @@ static int fib6_rule_action(struct fib_r
24209 ip6_dst_idev(&rt->dst)->dev,
24211 rt6_flags2srcprefs(flags),
24215 if (!ipv6_prefix_equal(&saddr, &r->src.addr,
24217 diff -urNp -x '*.orig' linux-4.4/net/ipv6/inet6_hashtables.c linux-4.4/net/ipv6/inet6_hashtables.c
24218 --- linux-4.4/net/ipv6/inet6_hashtables.c 2016-01-11 00:01:32.000000000 +0100
24219 +++ linux-4.4/net/ipv6/inet6_hashtables.c 2021-02-24 16:56:24.629490793 +0100
24222 #include <linux/module.h>
24223 #include <linux/random.h>
24224 +#include <linux/vs_inet6.h>
24226 #include <net/inet_connection_sock.h>
24227 #include <net/inet_hashtables.h>
24228 @@ -66,7 +67,6 @@ struct sock *__inet6_lookup_established(
24229 unsigned int slot = hash & hashinfo->ehash_mask;
24230 struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
24235 sk_nulls_for_each_rcu(sk, node, &head->chain) {
24236 @@ -108,6 +108,9 @@ static inline int compute_score(struct s
24237 if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
24241 + if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
24244 if (sk->sk_bound_dev_if) {
24245 if (sk->sk_bound_dev_if != dif)
24246 diff -urNp -x '*.orig' linux-4.4/net/ipv6/ip6_fib.c linux-4.4/net/ipv6/ip6_fib.c
24247 --- linux-4.4/net/ipv6/ip6_fib.c 2021-02-24 16:56:12.095762737 +0100
24248 +++ linux-4.4/net/ipv6/ip6_fib.c 2021-02-24 16:56:24.629490793 +0100
24249 @@ -1969,6 +1969,7 @@ static int ipv6_route_seq_show(struct se
24250 struct rt6_info *rt = v;
24251 struct ipv6_route_iter *iter = seq->private;
24253 + /* FIXME: check for network context? */
24254 seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
24256 #ifdef CONFIG_IPV6_SUBTREES
24257 diff -urNp -x '*.orig' linux-4.4/net/ipv6/ip6_output.c linux-4.4/net/ipv6/ip6_output.c
24258 --- linux-4.4/net/ipv6/ip6_output.c 2021-02-24 16:56:12.549110347 +0100
24259 +++ linux-4.4/net/ipv6/ip6_output.c 2021-02-24 16:56:24.629490793 +0100
24260 @@ -949,7 +949,8 @@ static int ip6_dst_lookup_tail(struct ne
24261 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
24262 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
24263 sk ? inet6_sk(sk)->srcprefs : 0,
24266 + sk ? sk->sk_nx_info : NULL);
24268 goto out_err_release;
24270 diff -urNp -x '*.orig' linux-4.4/net/ipv6/ndisc.c linux-4.4/net/ipv6/ndisc.c
24271 --- linux-4.4/net/ipv6/ndisc.c 2021-02-24 16:56:12.099096175 +0100
24272 +++ linux-4.4/net/ipv6/ndisc.c 2021-02-24 16:56:24.629490793 +0100
24273 @@ -501,7 +501,7 @@ void ndisc_send_na(struct net_device *de
24275 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
24276 inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
24280 src_addr = &tmpaddr;
24282 diff -urNp -x '*.orig' linux-4.4/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c linux-4.4/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
24283 --- linux-4.4/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c 2016-01-11 00:01:32.000000000 +0100
24284 +++ linux-4.4/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c 2021-02-24 16:56:24.629490793 +0100
24285 @@ -35,7 +35,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *s
24286 ctinfo == IP_CT_RELATED_REPLY));
24288 if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
24289 - &ipv6_hdr(skb)->daddr, 0, &src) < 0)
24290 + &ipv6_hdr(skb)->daddr, 0, &src, NULL) < 0)
24293 nfct_nat(ct)->masq_index = out->ifindex;
24294 diff -urNp -x '*.orig' linux-4.4/net/ipv6/raw.c linux-4.4/net/ipv6/raw.c
24295 --- linux-4.4/net/ipv6/raw.c 2021-02-24 16:56:12.102429614 +0100
24296 +++ linux-4.4/net/ipv6/raw.c 2021-02-24 16:56:24.629490793 +0100
24298 #include <linux/icmpv6.h>
24299 #include <linux/netfilter.h>
24300 #include <linux/netfilter_ipv6.h>
24301 +#include <linux/vs_inet6.h>
24302 #include <linux/skbuff.h>
24303 #include <linux/compat.h>
24304 #include <linux/uaccess.h>
24305 @@ -293,6 +294,13 @@ static int rawv6_bind(struct sock *sk, s
24309 + if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
24310 + err = -EADDRNOTAVAIL;
24316 /* ipv4 addr of the socket is invalid. Only the
24317 * unspecified and mapped address have a v4 equivalent.
24319 diff -urNp -x '*.orig' linux-4.4/net/ipv6/route.c linux-4.4/net/ipv6/route.c
24320 --- linux-4.4/net/ipv6/route.c 2021-02-24 16:56:12.102429614 +0100
24321 +++ linux-4.4/net/ipv6/route.c 2021-02-24 16:56:24.629490793 +0100
24323 #include <net/lwtunnel.h>
24324 #include <net/ip_tunnels.h>
24325 #include <net/l3mdev.h>
24326 +#include <linux/vs_inet6.h>
24328 #include <asm/uaccess.h>
24330 @@ -2582,16 +2583,18 @@ int ip6_route_get_saddr(struct net *net,
24331 struct rt6_info *rt,
24332 const struct in6_addr *daddr,
24333 unsigned int prefs,
24334 - struct in6_addr *saddr)
24335 + struct in6_addr *saddr,
24336 + struct nx_info *nxi)
24338 struct inet6_dev *idev =
24339 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
24341 - if (rt && rt->rt6i_prefsrc.plen)
24342 + if (rt && rt->rt6i_prefsrc.plen && (!nxi ||
24343 + v6_addr_in_nx_info(nxi, &rt->rt6i_prefsrc.addr, NXA_TYPE_ADDR)))
24344 *saddr = rt->rt6i_prefsrc.addr;
24346 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
24347 - daddr, prefs, saddr);
24348 + daddr, prefs, saddr, nxi);
24352 @@ -3208,7 +3211,8 @@ static int rt6_fill_node(struct net *net
24353 goto nla_put_failure;
24355 struct in6_addr saddr_buf;
24356 - if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
24357 + if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf,
24358 + (skb->sk ? skb->sk->sk_nx_info : NULL)) == 0 &&
24359 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
24360 goto nla_put_failure;
24362 diff -urNp -x '*.orig' linux-4.4/net/ipv6/tcp_ipv6.c linux-4.4/net/ipv6/tcp_ipv6.c
24363 --- linux-4.4/net/ipv6/tcp_ipv6.c 2021-02-24 16:56:12.105763052 +0100
24364 +++ linux-4.4/net/ipv6/tcp_ipv6.c 2021-02-24 16:56:24.629490793 +0100
24367 #include <linux/crypto.h>
24368 #include <linux/scatterlist.h>
24369 +#include <linux/vs_inet6.h>
24371 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
24372 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
24373 @@ -150,11 +151,18 @@ static int tcp_v6_connect(struct sock *s
24376 if (ipv6_addr_any(&usin->sin6_addr)) {
24377 - if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
24378 - ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
24379 - &usin->sin6_addr);
24381 - usin->sin6_addr = in6addr_loopback;
24382 + struct nx_info *nxi = sk->sk_nx_info;
24384 + if (nxi && nx_info_has_v6(nxi))
24385 + /* FIXME: remap lback? */
24386 + usin->sin6_addr = nxi->v6.ip;
24388 + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
24389 + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
24390 + &usin->sin6_addr);
24392 + usin->sin6_addr = in6addr_loopback;
24396 addr_type = ipv6_addr_type(&usin->sin6_addr);
24397 diff -urNp -x '*.orig' linux-4.4/net/ipv6/udp.c linux-4.4/net/ipv6/udp.c
24398 --- linux-4.4/net/ipv6/udp.c 2021-02-24 16:56:12.105763052 +0100
24399 +++ linux-4.4/net/ipv6/udp.c 2021-02-24 16:56:24.629490793 +0100
24401 #include <net/xfrm.h>
24402 #include <net/inet6_hashtables.h>
24403 #include <net/busy_poll.h>
24404 +#include <linux/vs_inet6.h>
24406 #include <linux/proc_fs.h>
24407 #include <linux/seq_file.h>
24408 @@ -76,32 +77,60 @@ static u32 udp6_ehashfn(const struct net
24409 udp_ipv6_hash_secret + net_hash_mix(net));
24412 -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
24413 +int ipv6_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
24415 + const struct in6_addr *sk1_rcv_saddr6 = inet6_rcv_saddr(sk1);
24416 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
24417 + __be32 sk1_rcv_saddr = sk1->sk_rcv_saddr;
24418 + __be32 sk2_rcv_saddr = sk2->sk_rcv_saddr;
24419 int sk2_ipv6only = inet_v6_ipv6only(sk2);
24420 - int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
24421 + int addr_type1 = ipv6_addr_type(sk1_rcv_saddr6);
24422 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
24424 /* if both are mapped, treat as IPv4 */
24425 - if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
24426 - return (!sk2_ipv6only &&
24427 - (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
24428 - sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
24429 + if (addr_type1 == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
24430 + if (!sk2_ipv6only &&
24431 + (!sk1->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
24432 + sk1->sk_rcv_saddr == sk2->sk_rcv_saddr))
24438 if (addr_type2 == IPV6_ADDR_ANY &&
24439 - !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
24441 + !(sk2_ipv6only && addr_type1 == IPV6_ADDR_MAPPED))
24444 - if (addr_type == IPV6_ADDR_ANY &&
24445 - !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
24447 + if (addr_type1 == IPV6_ADDR_ANY &&
24448 + !(ipv6_only_sock(sk1) && addr_type2 == IPV6_ADDR_MAPPED))
24451 if (sk2_rcv_saddr6 &&
24452 - ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
24454 + ipv6_addr_equal(&sk1->sk_v6_rcv_saddr, sk2_rcv_saddr6))
24460 + if (!sk1_rcv_saddr && !sk2_rcv_saddr)
24461 + return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24462 + if (!sk2_rcv_saddr)
24463 + return v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, -1);
24464 + if (!sk1_rcv_saddr)
24465 + return v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, -1);
24468 + if (addr_type2 == IPV6_ADDR_ANY && addr_type1 == IPV6_ADDR_ANY)
24469 + return nx_v6_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24470 + else if (addr_type2 == IPV6_ADDR_ANY)
24471 + return v6_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr6, -1);
24472 + else if (addr_type1 == IPV6_ADDR_ANY) {
24473 + if (addr_type2 == IPV6_ADDR_MAPPED)
24474 + return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24476 + return v6_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr6, -1);
24481 static u32 udp6_portaddr_hash(const struct net *net,
24482 @@ -162,6 +191,10 @@ static inline int compute_score(struct s
24483 if (inet->inet_dport != sport)
24487 + /* block non nx_info ips */
24488 + if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
24492 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
24493 diff -urNp -x '*.orig' linux-4.4/net/ipv6/xfrm6_policy.c linux-4.4/net/ipv6/xfrm6_policy.c
24494 --- linux-4.4/net/ipv6/xfrm6_policy.c 2021-02-24 16:56:12.105763052 +0100
24495 +++ linux-4.4/net/ipv6/xfrm6_policy.c 2021-02-24 16:56:24.632824231 +0100
24496 @@ -64,7 +64,8 @@ static int xfrm6_get_saddr(struct net *n
24497 return -EHOSTUNREACH;
24499 dev = ip6_dst_idev(dst)->dev;
24500 - ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
24501 + ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6,
24502 + 0, &saddr->in6, NULL);
24506 diff -urNp -x '*.orig' linux-4.4/net/netfilter/ipvs/ip_vs_xmit.c linux-4.4/net/netfilter/ipvs/ip_vs_xmit.c
24507 --- linux-4.4/net/netfilter/ipvs/ip_vs_xmit.c 2016-01-11 00:01:32.000000000 +0100
24508 +++ linux-4.4/net/netfilter/ipvs/ip_vs_xmit.c 2021-02-24 16:56:24.632824231 +0100
24509 @@ -381,7 +381,7 @@ __ip_vs_route_output_v6(struct net *net,
24511 if (ipv6_addr_any(&fl6.saddr) &&
24512 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
24513 - &fl6.daddr, 0, &fl6.saddr) < 0)
24514 + &fl6.daddr, 0, &fl6.saddr, NULL) < 0)
24517 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
24518 diff -urNp -x '*.orig' linux-4.4/net/netlink/af_netlink.c linux-4.4/net/netlink/af_netlink.c
24519 --- linux-4.4/net/netlink/af_netlink.c 2021-02-24 16:56:12.139097435 +0100
24520 +++ linux-4.4/net/netlink/af_netlink.c 2021-02-24 16:56:24.632824231 +0100
24522 #include <linux/hash.h>
24523 #include <linux/genetlink.h>
24524 #include <linux/nospec.h>
24525 +#include <linux/vs_context.h>
24526 +#include <linux/vs_network.h>
24528 #include <net/net_namespace.h>
24529 #include <net/sock.h>
24530 @@ -2473,7 +2475,8 @@ static void *__netlink_seq_next(struct s
24532 return ERR_PTR(err);
24534 - } while (sock_net(&nlk->sk) != seq_file_net(seq));
24535 + } while ((sock_net(&nlk->sk) != seq_file_net(seq)) ||
24536 + !nx_check(nlk->sk.sk_nid, VS_WATCH_P | VS_IDENT));
24540 diff -urNp -x '*.orig' linux-4.4/net/packet/diag.c linux-4.4/net/packet/diag.c
24541 --- linux-4.4/net/packet/diag.c 2016-01-11 00:01:32.000000000 +0100
24542 +++ linux-4.4/net/packet/diag.c 2021-02-24 16:56:24.632824231 +0100
24544 #include <linux/netdevice.h>
24545 #include <linux/packet_diag.h>
24546 #include <linux/percpu.h>
24547 +#include <linux/vs_network.h>
24548 #include <net/net_namespace.h>
24549 #include <net/sock.h>
24551 @@ -201,6 +202,8 @@ static int packet_diag_dump(struct sk_bu
24552 sk_for_each(sk, &net->packet.sklist) {
24553 if (!net_eq(sock_net(sk), net))
24555 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24560 diff -urNp -x '*.orig' linux-4.4/net/socket.c linux-4.4/net/socket.c
24561 --- linux-4.4/net/socket.c 2021-02-24 16:56:12.169098380 +0100
24562 +++ linux-4.4/net/socket.c 2021-02-24 16:56:24.632824231 +0100
24563 @@ -100,10 +100,12 @@
24565 #include <net/sock.h>
24566 #include <linux/netfilter.h>
24567 +#include <linux/vs_socket.h>
24568 +#include <linux/vs_inet.h>
24569 +#include <linux/vs_inet6.h>
24571 #include <linux/if_tun.h>
24572 #include <linux/ipv6_route.h>
24573 -#include <linux/route.h>
24574 #include <linux/sockios.h>
24575 #include <linux/atalk.h>
24576 #include <net/busy_poll.h>
24577 @@ -597,8 +599,24 @@ EXPORT_SYMBOL(__sock_tx_timestamp);
24579 static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
24581 - int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
24582 - BUG_ON(ret == -EIOCBQUEUED);
24583 + size_t size = msg_data_left(msg);
24584 + int ret = sock->ops->sendmsg(sock, msg, size);
24588 + vx_sock_fail(sock->sk, size);
24590 + vx_sock_send(sock->sk, size);
24593 + vxdprintk(VXD_CBIT(net, 7),
24594 + "sock_sendmsg_nosec: %p[%p,%p,%p;%d/%d]:%zu/%zu",
24596 + (sock->sk)?sock->sk->sk_nx_info:0,
24597 + (sock->sk)?sock->sk->sk_vx_info:0,
24598 + (sock->sk)?sock->sk->sk_xid:0,
24599 + (sock->sk)?sock->sk->sk_nid:0,
24600 + size, msg_data_left(msg));
24604 @@ -1089,6 +1107,13 @@ int __sock_create(struct net *net, int f
24605 if (type < 0 || type >= SOCK_MAX)
24608 + if (!nx_check(0, VS_ADMIN)) {
24609 + if (family == PF_INET && !current_nx_info_has_v4())
24610 + return -EAFNOSUPPORT;
24611 + if (family == PF_INET6 && !current_nx_info_has_v6())
24612 + return -EAFNOSUPPORT;
24617 This uglymoron is moved from INET layer to here to avoid
24618 @@ -1223,6 +1248,7 @@ SYSCALL_DEFINE3(socket, int, family, int
24622 + set_bit(SOCK_USER_SOCKET, &sock->flags);
24623 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
24626 @@ -1264,10 +1290,12 @@ SYSCALL_DEFINE4(socketpair, int, family,
24627 err = sock_create(family, type, protocol, &sock1);
24630 + set_bit(SOCK_USER_SOCKET, &sock1->flags);
24632 err = sock_create(family, type, protocol, &sock2);
24634 goto out_release_1;
24635 + set_bit(SOCK_USER_SOCKET, &sock2->flags);
24637 err = sock1->ops->socketpair(sock1, sock2);
24639 diff -urNp -x '*.orig' linux-4.4/net/sunrpc/auth.c linux-4.4/net/sunrpc/auth.c
24640 --- linux-4.4/net/sunrpc/auth.c 2016-01-11 00:01:32.000000000 +0100
24641 +++ linux-4.4/net/sunrpc/auth.c 2021-02-24 16:56:24.632824231 +0100
24643 #include <linux/sunrpc/clnt.h>
24644 #include <linux/sunrpc/gss_api.h>
24645 #include <linux/spinlock.h>
24646 +#include <linux/vs_tag.h>
24648 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
24649 # define RPCDBG_FACILITY RPCDBG_AUTH
24650 @@ -630,6 +631,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
24651 memset(&acred, 0, sizeof(acred));
24652 acred.uid = cred->fsuid;
24653 acred.gid = cred->fsgid;
24654 + acred.tag = make_ktag(&init_user_ns, dx_current_tag());
24655 acred.group_info = cred->group_info;
24656 ret = auth->au_ops->lookup_cred(auth, &acred, flags);
24658 @@ -669,6 +671,7 @@ rpcauth_bind_root_cred(struct rpc_task *
24659 struct auth_cred acred = {
24660 .uid = GLOBAL_ROOT_UID,
24661 .gid = GLOBAL_ROOT_GID,
24662 + .tag = KTAGT_INIT(dx_current_tag()),
24665 dprintk("RPC: %5u looking up %s cred\n",
24666 diff -urNp -x '*.orig' linux-4.4/net/sunrpc/auth_unix.c linux-4.4/net/sunrpc/auth_unix.c
24667 --- linux-4.4/net/sunrpc/auth_unix.c 2016-01-11 00:01:32.000000000 +0100
24668 +++ linux-4.4/net/sunrpc/auth_unix.c 2021-02-24 16:56:24.632824231 +0100
24669 @@ -13,11 +13,13 @@
24670 #include <linux/sunrpc/clnt.h>
24671 #include <linux/sunrpc/auth.h>
24672 #include <linux/user_namespace.h>
24673 +#include <linux/vs_tag.h>
24675 #define NFS_NGROUPS 16
24678 struct rpc_cred uc_base;
24681 kgid_t uc_gids[NFS_NGROUPS];
24683 @@ -80,6 +82,7 @@ unx_create_cred(struct rpc_auth *auth, s
24684 groups = NFS_NGROUPS;
24686 cred->uc_gid = acred->gid;
24687 + cred->uc_tag = acred->tag;
24688 for (i = 0; i < groups; i++)
24689 cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
24690 if (i < NFS_NGROUPS)
24691 @@ -121,7 +124,9 @@ unx_match(struct auth_cred *acred, struc
24695 - if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid))
24696 + if (!uid_eq(cred->uc_uid, acred->uid) ||
24697 + !gid_eq(cred->uc_gid, acred->gid) ||
24698 + !tag_eq(cred->uc_tag, acred->tag))
24701 if (acred->group_info != NULL)
24702 @@ -146,7 +151,7 @@ unx_marshal(struct rpc_task *task, __be3
24703 struct rpc_clnt *clnt = task->tk_client;
24704 struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
24705 __be32 *base, *hold;
24709 *p++ = htonl(RPC_AUTH_UNIX);
24711 @@ -157,8 +162,11 @@ unx_marshal(struct rpc_task *task, __be3
24713 p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
24715 - *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
24716 - *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
24717 + tag = task->tk_client->cl_tag;
24718 + *p++ = htonl((u32) from_kuid(&init_user_ns,
24719 + TAGINO_KUID(tag, cred->uc_uid, cred->uc_tag)));
24720 + *p++ = htonl((u32) from_kgid(&init_user_ns,
24721 + TAGINO_KGID(tag, cred->uc_gid, cred->uc_tag)));
24723 for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++)
24724 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
24725 diff -urNp -x '*.orig' linux-4.4/net/sunrpc/clnt.c linux-4.4/net/sunrpc/clnt.c
24726 --- linux-4.4/net/sunrpc/clnt.c 2021-02-24 16:56:12.172431818 +0100
24727 +++ linux-4.4/net/sunrpc/clnt.c 2021-02-24 16:56:24.632824231 +0100
24729 #include <linux/in.h>
24730 #include <linux/in6.h>
24731 #include <linux/un.h>
24732 +#include <linux/vs_cvirt.h>
24734 #include <linux/sunrpc/clnt.h>
24735 #include <linux/sunrpc/addr.h>
24736 @@ -477,6 +478,9 @@ static struct rpc_clnt *rpc_create_xprt(
24737 if (!(args->flags & RPC_CLNT_CREATE_QUIET))
24738 clnt->cl_chatty = 1;
24740 + /* TODO: handle RPC_CLNT_CREATE_TAGGED
24741 + if (args->flags & RPC_CLNT_CREATE_TAGGED)
24742 + clnt->cl_tag = 1; */
24746 diff -urNp -x '*.orig' linux-4.4/net/unix/af_unix.c linux-4.4/net/unix/af_unix.c
24747 --- linux-4.4/net/unix/af_unix.c 2021-02-24 16:56:12.179098695 +0100
24748 +++ linux-4.4/net/unix/af_unix.c 2021-02-24 16:56:24.632824231 +0100
24749 @@ -117,6 +117,8 @@
24750 #include <net/checksum.h>
24751 #include <linux/security.h>
24752 #include <linux/freezer.h>
24753 +#include <linux/vs_context.h>
24754 +#include <linux/vs_limit.h>
24758 @@ -290,6 +292,8 @@ static struct sock *__unix_find_socket_b
24759 if (!net_eq(sock_net(s), net))
24762 + if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
24764 if (u->addr->len == len &&
24765 !memcmp(u->addr->name, sunname, len))
24767 @@ -2757,6 +2761,8 @@ static struct sock *unix_from_bucket(str
24768 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
24769 if (sock_net(sk) != seq_file_net(seq))
24771 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24773 if (++count == offset)
24776 @@ -2774,6 +2780,8 @@ static struct sock *unix_next_socket(str
24780 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24782 if (sock_net(sk) == seq_file_net(seq))
24785 diff -urNp -x '*.orig' linux-4.4/net/unix/diag.c linux-4.4/net/unix/diag.c
24786 --- linux-4.4/net/unix/diag.c 2021-02-24 16:56:12.179098695 +0100
24787 +++ linux-4.4/net/unix/diag.c 2021-02-24 16:56:24.632824231 +0100
24789 #include <linux/unix_diag.h>
24790 #include <linux/skbuff.h>
24791 #include <linux/module.h>
24792 +#include <linux/vs_network.h>
24793 #include <net/netlink.h>
24794 #include <net/af_unix.h>
24795 #include <net/tcp_states.h>
24796 @@ -200,6 +201,8 @@ static int unix_diag_dump(struct sk_buff
24797 sk_for_each(sk, &unix_socket_table[slot]) {
24798 if (!net_eq(sock_net(sk), net))
24800 + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24804 if (!(req->udiag_states & (1 << sk->sk_state)))
24805 diff -urNp -x '*.orig' linux-4.4/scripts/checksyscalls.sh linux-4.4/scripts/checksyscalls.sh
24806 --- linux-4.4/scripts/checksyscalls.sh 2016-01-11 00:01:32.000000000 +0100
24807 +++ linux-4.4/scripts/checksyscalls.sh 2021-02-24 16:56:24.636157669 +0100
24808 @@ -196,7 +196,6 @@ cat << EOF
24809 #define __IGNORE_afs_syscall
24810 #define __IGNORE_getpmsg
24811 #define __IGNORE_putpmsg
24812 -#define __IGNORE_vserver
24816 diff -urNp -x '*.orig' linux-4.4/security/commoncap.c linux-4.4/security/commoncap.c
24817 --- linux-4.4/security/commoncap.c 2021-02-24 16:56:12.205766201 +0100
24818 +++ linux-4.4/security/commoncap.c 2021-02-24 16:56:24.636157669 +0100
24819 @@ -71,6 +71,7 @@ static void warn_setuid_and_fcaps_mixed(
24820 int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
24821 int cap, int audit)
24823 + struct vx_info *vxi = current_vx_info(); /* FIXME: get vxi from cred? */
24824 struct user_namespace *ns = targ_ns;
24826 /* See if cred has the capability in the target user namespace
24827 @@ -79,8 +80,12 @@ int cap_capable(const struct cred *cred,
24830 /* Do we have the necessary capabilities? */
24831 - if (ns == cred->user_ns)
24832 - return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
24833 + if (ns == cred->user_ns) {
24834 + if (vx_info_flags(vxi, VXF_STATE_SETUP, 0) &&
24835 + cap_raised(cred->cap_effective, cap))
24837 + return vx_cap_raised(vxi, cred->cap_effective, cap) ? 0 : -EPERM;
24840 /* Have we tried all of the parent namespaces? */
24841 if (ns == &init_user_ns)
24842 @@ -665,7 +670,7 @@ int cap_inode_setxattr(struct dentry *de
24844 if (!strncmp(name, XATTR_SECURITY_PREFIX,
24845 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
24846 - !capable(CAP_SYS_ADMIN))
24847 + !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
24851 @@ -691,7 +696,7 @@ int cap_inode_removexattr(struct dentry
24853 if (!strncmp(name, XATTR_SECURITY_PREFIX,
24854 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
24855 - !capable(CAP_SYS_ADMIN))
24856 + !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
24860 diff -urNp -x '*.orig' linux-4.4/security/selinux/hooks.c linux-4.4/security/selinux/hooks.c
24861 --- linux-4.4/security/selinux/hooks.c 2021-02-24 16:56:12.212433078 +0100
24862 +++ linux-4.4/security/selinux/hooks.c 2021-02-24 16:56:24.636157669 +0100
24864 #include <linux/dccp.h>
24865 #include <linux/quota.h>
24866 #include <linux/un.h> /* for Unix socket types */
24867 -#include <net/af_unix.h> /* for Unix socket types */
24868 #include <linux/parser.h>
24869 #include <linux/nfs_mount.h>
24870 #include <net/ipv6.h>