]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-vserver-2.3.patch
Up to 4.4.302 (4.4 line is EOL now).
[packages/kernel.git] / kernel-vserver-2.3.patch
CommitLineData
c2806d43
AM
1diff -urNp -x '*.orig' linux-4.4/Documentation/vserver/debug.txt linux-4.4/Documentation/vserver/debug.txt
2--- linux-4.4/Documentation/vserver/debug.txt 1970-01-01 01:00:00.000000000 +0100
3+++ linux-4.4/Documentation/vserver/debug.txt 2021-02-24 16:56:24.542821396 +0100
4@@ -0,0 +1,154 @@
5+
6+debug_cvirt:
7+
8+ 2 4 "vx_map_tgid: %p/%llx: %d -> %d"
9+ "vx_rmap_tgid: %p/%llx: %d -> %d"
10+
11+debug_dlim:
12+
13+ 0 1 "ALLOC (%p,#%d)%c inode (%d)"
14+ "FREE (%p,#%d)%c inode"
15+ 1 2 "ALLOC (%p,#%d)%c %lld bytes (%d)"
16+ "FREE (%p,#%d)%c %lld bytes"
17+ 2 4 "ADJUST: %lld,%lld on %ld,%ld [mult=%d]"
18+ 3 8 "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d"
19+ "ext3_has_free_blocks(%p): free=%lu, root=%lu"
20+ "rcu_free_dl_info(%p)"
21+ 4 10 "alloc_dl_info(%p,%d) = %p"
22+ "dealloc_dl_info(%p)"
23+ "get_dl_info(%p[#%d.%d])"
24+ "put_dl_info(%p[#%d.%d])"
25+ 5 20 "alloc_dl_info(%p,%d)*"
26+ 6 40 "__hash_dl_info: %p[#%d]"
27+ "__unhash_dl_info: %p[#%d]"
28+ 7 80 "locate_dl_info(%p,#%d) = %p"
29+
30+debug_misc:
31+
32+ 0 1 "destroy_dqhash: %p [#0x%08x] c=%d"
33+ "new_dqhash: %p [#0x%08x]"
34+ "vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]"
35+ "vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]"
36+ "vroot[%d]_set_dev: dev=%p[%lu,%d:%d]"
37+ "vroot_get_real_bdev not set"
38+ 1 2 "cow_break_link(?%s?)"
39+ "temp copy ?%s?"
40+ 2 4 "dentry_open(new): %p"
41+ "dentry_open(old): %p"
42+ "lookup_create(new): %p"
43+ "old path ?%s?"
44+ "path_lookup(old): %d"
45+ "vfs_create(new): %d"
46+ "vfs_rename: %d"
47+ "vfs_sendfile: %d"
48+ 3 8 "fput(new_file=%p[#%d])"
49+ "fput(old_file=%p[#%d])"
50+ 4 10 "vx_info_kill(%p[#%d],%d,%d) = %d"
51+ "vx_info_kill(%p[#%d],%d,%d)*"
52+ 5 20 "vs_reboot(%p[#%d],%d)"
53+ 6 40 "dropping task %p[#%u,%u] for %p[#%u,%u]"
54+
55+debug_net:
56+
57+ 2 4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d"
58+ 3 8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
59+ "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
60+ 4 10 "ip_route_connect(%p) %p,%p;%lx"
61+ 5 20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
62+ 6 40 "sk,egf: %p [#%d] (from %d)"
63+ "sk,egn: %p [#%d] (from %d)"
64+ "sk,req: %p [#%d] (from %d)"
65+ "sk: %p [#%d] (from %d)"
66+ "tw: %p [#%d] (from %d)"
67+ 7 80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
68+ "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
69+
70+debug_nid:
71+
72+ 0 1 "__lookup_nx_info(#%u): %p[#%u]"
73+ "alloc_nx_info(%d) = %p"
74+ "create_nx_info(%d) (dynamic rejected)"
75+ "create_nx_info(%d) = %p (already there)"
76+ "create_nx_info(%d) = %p (new)"
77+ "dealloc_nx_info(%p)"
78+ 1 2 "alloc_nx_info(%d)*"
79+ "create_nx_info(%d)*"
80+ 2 4 "get_nx_info(%p[#%d.%d])"
81+ "put_nx_info(%p[#%d.%d])"
82+ 3 8 "claim_nx_info(%p[#%d.%d.%d]) %p"
83+ "clr_nx_info(%p[#%d.%d])"
84+ "init_nx_info(%p[#%d.%d])"
85+ "release_nx_info(%p[#%d.%d.%d]) %p"
86+ "set_nx_info(%p[#%d.%d])"
87+ 4 10 "__hash_nx_info: %p[#%d]"
88+ "__nx_dynamic_id: [#%d]"
89+ "__unhash_nx_info: %p[#%d.%d.%d]"
90+ 5 20 "moved task %p into nxi:%p[#%d]"
91+ "nx_migrate_task(%p,%p[#%d.%d.%d])"
92+ "task_get_nx_info(%p)"
93+ 6 40 "nx_clear_persistent(%p[#%d])"
94+
95+debug_quota:
96+
97+ 0 1 "quota_sync_dqh(%p,%d) discard inode %p"
98+ 1 2 "quota_sync_dqh(%p,%d)"
99+ "sync_dquots(%p,%d)"
100+ "sync_dquots_dqh(%p,%d)"
101+ 3 8 "do_quotactl(%p,%d,cmd=%d,id=%d,%p)"
102+
103+debug_switch:
104+
105+ 0 1 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]"
106+ 1 2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]"
107+ 4 10 "%s: (%s %s) returned %s with %d"
108+
109+debug_tag:
110+
111+ 7 80 "dx_parse_tag(?%s?): %d:#%d"
112+ "dx_propagate_tag(%p[#%lu.%d]): %d,%d"
113+
114+debug_xid:
115+
116+ 0 1 "__lookup_vx_info(#%u): %p[#%u]"
117+ "alloc_vx_info(%d) = %p"
118+ "alloc_vx_info(%d)*"
119+ "create_vx_info(%d) (dynamic rejected)"
120+ "create_vx_info(%d) = %p (already there)"
121+ "create_vx_info(%d) = %p (new)"
122+ "dealloc_vx_info(%p)"
123+ "loc_vx_info(%d) = %p (found)"
124+ "loc_vx_info(%d) = %p (new)"
125+ "loc_vx_info(%d) = %p (not available)"
126+ 1 2 "create_vx_info(%d)*"
127+ "loc_vx_info(%d)*"
128+ 2 4 "get_vx_info(%p[#%d.%d])"
129+ "put_vx_info(%p[#%d.%d])"
130+ 3 8 "claim_vx_info(%p[#%d.%d.%d]) %p"
131+ "clr_vx_info(%p[#%d.%d])"
132+ "init_vx_info(%p[#%d.%d])"
133+ "release_vx_info(%p[#%d.%d.%d]) %p"
134+ "set_vx_info(%p[#%d.%d])"
135+ 4 10 "__hash_vx_info: %p[#%d]"
136+ "__unhash_vx_info: %p[#%d.%d.%d]"
137+ "__vx_dynamic_id: [#%d]"
138+ 5 20 "enter_vx_info(%p[#%d],%p) %p[#%d,%p]"
139+ "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]"
140+ "moved task %p into vxi:%p[#%d]"
141+ "task_get_vx_info(%p)"
142+ "vx_migrate_task(%p,%p[#%d.%d])"
143+ 6 40 "vx_clear_persistent(%p[#%d])"
144+ "vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
145+ "vx_set_init(%p[#%d],%p[#%d,%d,%d])"
146+ "vx_set_persistent(%p[#%d])"
147+ "vx_set_reaper(%p[#%d],%p[#%d,%d])"
148+ 7 80 "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]"
149+
150+
151+debug_limit:
152+
153+ n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s"
154+ "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
155+
156+ m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s"
157+ "vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
158+ "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
159diff -urNp -x '*.orig' linux-4.4/arch/alpha/Kconfig linux-4.4/arch/alpha/Kconfig
160--- linux-4.4/arch/alpha/Kconfig 2016-01-11 00:01:32.000000000 +0100
161+++ linux-4.4/arch/alpha/Kconfig 2021-02-24 16:56:24.532821082 +0100
927ca606 162@@ -745,6 +745,8 @@ config DUMMY_CONSOLE
2380c486
JR
163 depends on VGA_HOSE
164 default y
d337f35e
JR
165
166+source "kernel/vserver/Kconfig"
167+
168 source "security/Kconfig"
169
170 source "crypto/Kconfig"
c2806d43
AM
171diff -urNp -x '*.orig' linux-4.4/arch/alpha/kernel/systbls.S linux-4.4/arch/alpha/kernel/systbls.S
172--- linux-4.4/arch/alpha/kernel/systbls.S 2016-01-11 00:01:32.000000000 +0100
173+++ linux-4.4/arch/alpha/kernel/systbls.S 2021-02-24 16:56:24.532821082 +0100
d337f35e
JR
174@@ -446,7 +446,7 @@ sys_call_table:
175 .quad sys_stat64 /* 425 */
176 .quad sys_lstat64
177 .quad sys_fstat64
178- .quad sys_ni_syscall /* sys_vserver */
179+ .quad sys_vserver /* sys_vserver */
180 .quad sys_ni_syscall /* sys_mbind */
181 .quad sys_ni_syscall /* sys_get_mempolicy */
182 .quad sys_ni_syscall /* sys_set_mempolicy */
c2806d43
AM
183diff -urNp -x '*.orig' linux-4.4/arch/alpha/kernel/traps.c linux-4.4/arch/alpha/kernel/traps.c
184--- linux-4.4/arch/alpha/kernel/traps.c 2016-01-11 00:01:32.000000000 +0100
185+++ linux-4.4/arch/alpha/kernel/traps.c 2021-02-24 16:56:24.532821082 +0100
927ca606 186@@ -174,7 +174,8 @@ die_if_kernel(char * str, struct pt_regs
d337f35e
JR
187 #ifdef CONFIG_SMP
188 printk("CPU %d ", hard_smp_processor_id());
189 #endif
2380c486 190- printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
61333608 191+ printk("%s(%d:#%u): %s %ld\n", current->comm,
2380c486 192+ task_pid_nr(current), current->xid, str, err);
d337f35e 193 dik_show_regs(regs, r9_15);
b00e13aa 194 add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
d337f35e 195 dik_show_trace((unsigned long *)(regs+1));
c2806d43
AM
196diff -urNp -x '*.orig' linux-4.4/arch/arm/Kconfig linux-4.4/arch/arm/Kconfig
197--- linux-4.4/arch/arm/Kconfig 2021-02-24 16:56:10.302372928 +0100
198+++ linux-4.4/arch/arm/Kconfig 2021-02-24 16:56:24.532821082 +0100
199@@ -2163,6 +2163,8 @@ source "fs/Kconfig"
d337f35e
JR
200
201 source "arch/arm/Kconfig.debug"
202
203+source "kernel/vserver/Kconfig"
204+
205 source "security/Kconfig"
206
207 source "crypto/Kconfig"
c2806d43
AM
208diff -urNp -x '*.orig' linux-4.4/arch/arm/kernel/calls.S linux-4.4/arch/arm/kernel/calls.S
209--- linux-4.4/arch/arm/kernel/calls.S 2016-01-11 00:01:32.000000000 +0100
210+++ linux-4.4/arch/arm/kernel/calls.S 2021-02-24 16:56:24.532821082 +0100
d337f35e
JR
211@@ -322,7 +322,7 @@
212 /* 310 */ CALL(sys_request_key)
213 CALL(sys_keyctl)
214 CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
215-/* vserver */ CALL(sys_ni_syscall)
216+ CALL(sys_vserver)
217 CALL(sys_ioprio_set)
218 /* 315 */ CALL(sys_ioprio_get)
219 CALL(sys_inotify_init)
c2806d43
AM
220diff -urNp -x '*.orig' linux-4.4/arch/arm/kernel/traps.c linux-4.4/arch/arm/kernel/traps.c
221--- linux-4.4/arch/arm/kernel/traps.c 2021-02-24 16:56:10.335707312 +0100
222+++ linux-4.4/arch/arm/kernel/traps.c 2021-02-24 16:56:24.536154520 +0100
8931d859 223@@ -259,8 +259,8 @@ static int __die(const char *str, int er
78865d5b 224
d337f35e
JR
225 print_modules();
226 __show_regs(regs);
927ca606
AM
227- pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
228- TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
229+ pr_emerg("Process %.*s (pid: %d:%u, stack limit = 0x%p)\n",
230+ TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), tsk->xid, end_of_stack(tsk));
d337f35e
JR
231
232 if (!user_mode(regs) || in_interrupt()) {
7e46296a 233 dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
c2806d43
AM
234diff -urNp -x '*.orig' linux-4.4/arch/cris/Kconfig linux-4.4/arch/cris/Kconfig
235--- linux-4.4/arch/cris/Kconfig 2016-01-11 00:01:32.000000000 +0100
236+++ linux-4.4/arch/cris/Kconfig 2021-02-24 16:56:24.536154520 +0100
927ca606 237@@ -581,6 +581,8 @@ source "fs/Kconfig"
d337f35e
JR
238
239 source "arch/cris/Kconfig.debug"
240
241+source "kernel/vserver/Kconfig"
242+
243 source "security/Kconfig"
244
245 source "crypto/Kconfig"
c2806d43
AM
246diff -urNp -x '*.orig' linux-4.4/arch/ia64/Kconfig linux-4.4/arch/ia64/Kconfig
247--- linux-4.4/arch/ia64/Kconfig 2016-01-11 00:01:32.000000000 +0100
248+++ linux-4.4/arch/ia64/Kconfig 2021-02-24 16:56:24.536154520 +0100
927ca606 249@@ -606,6 +606,8 @@ source "fs/Kconfig"
2380c486
JR
250
251 source "arch/ia64/Kconfig.debug"
d337f35e
JR
252
253+source "kernel/vserver/Kconfig"
254+
255 source "security/Kconfig"
256
257 source "crypto/Kconfig"
c2806d43
AM
258diff -urNp -x '*.orig' linux-4.4/arch/ia64/kernel/entry.S linux-4.4/arch/ia64/kernel/entry.S
259--- linux-4.4/arch/ia64/kernel/entry.S 2016-01-11 00:01:32.000000000 +0100
260+++ linux-4.4/arch/ia64/kernel/entry.S 2021-02-24 16:56:24.536154520 +0100
927ca606 261@@ -1694,7 +1694,7 @@ sys_call_table:
2380c486
JR
262 data8 sys_mq_notify
263 data8 sys_mq_getsetattr
264 data8 sys_kexec_load
265- data8 sys_ni_syscall // reserved for vserver
266+ data8 sys_vserver
267 data8 sys_waitid // 1270
268 data8 sys_add_key
269 data8 sys_request_key
c2806d43
AM
270diff -urNp -x '*.orig' linux-4.4/arch/ia64/kernel/ptrace.c linux-4.4/arch/ia64/kernel/ptrace.c
271--- linux-4.4/arch/ia64/kernel/ptrace.c 2016-01-11 00:01:32.000000000 +0100
272+++ linux-4.4/arch/ia64/kernel/ptrace.c 2021-02-24 16:56:24.536154520 +0100
78865d5b 273@@ -21,6 +21,7 @@
2380c486 274 #include <linux/regset.h>
d337f35e 275 #include <linux/elf.h>
ec22aa5c 276 #include <linux/tracehook.h>
d337f35e
JR
277+#include <linux/vs_base.h>
278
279 #include <asm/pgtable.h>
280 #include <asm/processor.h>
c2806d43
AM
281diff -urNp -x '*.orig' linux-4.4/arch/ia64/kernel/traps.c linux-4.4/arch/ia64/kernel/traps.c
282--- linux-4.4/arch/ia64/kernel/traps.c 2016-01-11 00:01:32.000000000 +0100
283+++ linux-4.4/arch/ia64/kernel/traps.c 2021-02-24 16:56:24.536154520 +0100
1e8b8f9b 284@@ -60,8 +60,9 @@ die (const char *str, struct pt_regs *re
d337f35e
JR
285 put_cpu();
286
287 if (++die.lock_owner_depth < 3) {
288- printk("%s[%d]: %s %ld [%d]\n",
2380c486 289- current->comm, task_pid_nr(current), str, err, ++die_counter);
61333608 290+ printk("%s[%d:#%u]: %s %ld [%d]\n",
2380c486 291+ current->comm, task_pid_nr(current), current->xid,
d337f35e 292+ str, err, ++die_counter);
2380c486
JR
293 if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
294 != NOTIFY_STOP)
295 show_regs(regs);
1e8b8f9b 296@@ -324,8 +325,9 @@ handle_fpu_swa (int fp_fault, struct pt_
2380c486
JR
297 if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
298 last.time = current_jiffies + 5 * HZ;
299 printk(KERN_WARNING
300- "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
301- current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
61333608 302+ "%s(%d:#%u): floating-point assist fault at ip %016lx, isr %016lx\n",
2380c486
JR
303+ current->comm, task_pid_nr(current), current->xid,
304+ regs->cr_iip + ia64_psr(regs)->ri, isr);
305 }
306 }
d337f35e 307 }
c2806d43
AM
308diff -urNp -x '*.orig' linux-4.4/arch/m32r/kernel/traps.c linux-4.4/arch/m32r/kernel/traps.c
309--- linux-4.4/arch/m32r/kernel/traps.c 2016-01-11 00:01:32.000000000 +0100
310+++ linux-4.4/arch/m32r/kernel/traps.c 2021-02-24 16:56:24.536154520 +0100
09be7631 311@@ -184,8 +184,9 @@ static void show_registers(struct pt_reg
d337f35e
JR
312 } else {
313 printk("SPI: %08lx\n", sp);
314 }
315- printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
2380c486 316- current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
61333608 317+ printk("Process %s (pid: %d:#%u, process nr: %d, stackpage=%08lx)",
2380c486 318+ current->comm, task_pid_nr(current), current->xid,
d337f35e
JR
319+ 0xffff & i, 4096+(unsigned long)current);
320
321 /*
322 * When in-kernel, we also print out the stack and code at the
c2806d43
AM
323diff -urNp -x '*.orig' linux-4.4/arch/m68k/Kconfig linux-4.4/arch/m68k/Kconfig
324--- linux-4.4/arch/m68k/Kconfig 2016-01-11 00:01:32.000000000 +0100
325+++ linux-4.4/arch/m68k/Kconfig 2021-02-24 16:56:24.536154520 +0100
927ca606 326@@ -164,6 +164,8 @@ source "fs/Kconfig"
d337f35e
JR
327
328 source "arch/m68k/Kconfig.debug"
329
330+source "kernel/vserver/Kconfig"
331+
332 source "security/Kconfig"
333
334 source "crypto/Kconfig"
c2806d43
AM
335diff -urNp -x '*.orig' linux-4.4/arch/mips/Kconfig linux-4.4/arch/mips/Kconfig
336--- linux-4.4/arch/mips/Kconfig 2021-02-24 16:56:10.382375448 +0100
337+++ linux-4.4/arch/mips/Kconfig 2021-02-24 16:56:24.536154520 +0100
338@@ -3031,6 +3031,8 @@ source "fs/Kconfig"
d337f35e
JR
339
340 source "arch/mips/Kconfig.debug"
341
342+source "kernel/vserver/Kconfig"
343+
344 source "security/Kconfig"
345
346 source "crypto/Kconfig"
c2806d43
AM
347diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/ptrace.c linux-4.4/arch/mips/kernel/ptrace.c
348--- linux-4.4/arch/mips/kernel/ptrace.c 2021-02-24 16:56:10.399042639 +0100
349+++ linux-4.4/arch/mips/kernel/ptrace.c 2021-02-24 16:56:24.536154520 +0100
927ca606 350@@ -30,6 +30,7 @@
2380c486
JR
351 #include <linux/audit.h>
352 #include <linux/seccomp.h>
c2e5f7c8 353 #include <linux/ftrace.h>
d337f35e
JR
354+#include <linux/vs_base.h>
355
356 #include <asm/byteorder.h>
357 #include <asm/cpu.h>
c2806d43 358@@ -797,6 +798,9 @@ long arch_ptrace(struct task_struct *chi
ab30d09f
AM
359 void __user *datavp = (void __user *) data;
360 unsigned long __user *datalp = (void __user *) data;
d337f35e 361
2380c486 362+ if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
d337f35e
JR
363+ goto out;
364+
365 switch (request) {
366 /* when I and D space are separate, these will need to be fixed. */
367 case PTRACE_PEEKTEXT: /* read word at location addr. */
c2806d43
AM
368diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/scall32-o32.S linux-4.4/arch/mips/kernel/scall32-o32.S
369--- linux-4.4/arch/mips/kernel/scall32-o32.S 2021-02-24 16:56:10.402376078 +0100
370+++ linux-4.4/arch/mips/kernel/scall32-o32.S 2021-02-24 16:56:24.536154520 +0100
371@@ -511,7 +511,7 @@ EXPORT(sys_call_table)
c2e5f7c8
JR
372 PTR sys_mq_timedreceive
373 PTR sys_mq_notify /* 4275 */
374 PTR sys_mq_getsetattr
375- PTR sys_ni_syscall /* sys_vserver */
376+ PTR sys_vserver
377 PTR sys_waitid
378 PTR sys_ni_syscall /* available, was setaltroot */
379 PTR sys_add_key /* 4280 */
c2806d43
AM
380diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/scall64-64.S linux-4.4/arch/mips/kernel/scall64-64.S
381--- linux-4.4/arch/mips/kernel/scall64-64.S 2021-02-24 16:56:10.402376078 +0100
382+++ linux-4.4/arch/mips/kernel/scall64-64.S 2021-02-24 16:56:24.536154520 +0100
383@@ -348,7 +348,7 @@ EXPORT(sys_call_table)
d337f35e
JR
384 PTR sys_mq_timedreceive
385 PTR sys_mq_notify
386 PTR sys_mq_getsetattr /* 5235 */
387- PTR sys_ni_syscall /* sys_vserver */
388+ PTR sys_vserver
389 PTR sys_waitid
390 PTR sys_ni_syscall /* available, was setaltroot */
391 PTR sys_add_key
c2806d43
AM
392diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/scall64-n32.S linux-4.4/arch/mips/kernel/scall64-n32.S
393--- linux-4.4/arch/mips/kernel/scall64-n32.S 2021-02-24 16:56:10.402376078 +0100
394+++ linux-4.4/arch/mips/kernel/scall64-n32.S 2021-02-24 16:56:24.536154520 +0100
395@@ -343,7 +343,7 @@ EXPORT(sysn32_call_table)
d337f35e
JR
396 PTR compat_sys_mq_timedreceive
397 PTR compat_sys_mq_notify
398 PTR compat_sys_mq_getsetattr
399- PTR sys_ni_syscall /* 6240, sys_vserver */
400+ PTR sys32_vserver /* 6240 */
2380c486 401 PTR compat_sys_waitid
d337f35e
JR
402 PTR sys_ni_syscall /* available, was setaltroot */
403 PTR sys_add_key
c2806d43
AM
404diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/scall64-o32.S linux-4.4/arch/mips/kernel/scall64-o32.S
405--- linux-4.4/arch/mips/kernel/scall64-o32.S 2021-02-24 16:56:10.402376078 +0100
406+++ linux-4.4/arch/mips/kernel/scall64-o32.S 2021-02-24 16:56:24.539487958 +0100
407@@ -499,7 +499,7 @@ EXPORT(sys32_call_table)
d337f35e
JR
408 PTR compat_sys_mq_timedreceive
409 PTR compat_sys_mq_notify /* 4275 */
410 PTR compat_sys_mq_getsetattr
411- PTR sys_ni_syscall /* sys_vserver */
412+ PTR sys32_vserver
b00e13aa 413 PTR compat_sys_waitid
d337f35e
JR
414 PTR sys_ni_syscall /* available, was setaltroot */
415 PTR sys_add_key /* 4280 */
c2806d43
AM
416diff -urNp -x '*.orig' linux-4.4/arch/mips/kernel/traps.c linux-4.4/arch/mips/kernel/traps.c
417--- linux-4.4/arch/mips/kernel/traps.c 2021-02-24 16:56:10.402376078 +0100
418+++ linux-4.4/arch/mips/kernel/traps.c 2021-02-24 16:56:24.539487958 +0100
8931d859 419@@ -354,9 +354,10 @@ void show_registers(struct pt_regs *regs
2380c486
JR
420
421 __show_regs(regs);
d337f35e 422 print_modules();
2380c486
JR
423- printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
424- current->comm, current->pid, current_thread_info(), current,
425- field, current_thread_info()->tp_value);
426+ printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p, tls=%0*lx)\n",
427+ current->comm, task_pid_nr(current), current->xid,
428+ current_thread_info(), current,
429+ field, current_thread_info()->tp_value);
430 if (cpu_has_userlocal) {
431 unsigned long tls;
432
c2806d43
AM
433diff -urNp -x '*.orig' linux-4.4/arch/parisc/Kconfig linux-4.4/arch/parisc/Kconfig
434--- linux-4.4/arch/parisc/Kconfig 2021-02-24 16:56:10.415709831 +0100
435+++ linux-4.4/arch/parisc/Kconfig 2021-02-24 16:56:24.539487958 +0100
927ca606 436@@ -341,6 +341,8 @@ config SECCOMP
d337f35e 437
bb20add7 438 If unsure, say Y. Only embedded should say N here.
d337f35e
JR
439
440+source "kernel/vserver/Kconfig"
441+
442 source "security/Kconfig"
443
444 source "crypto/Kconfig"
c2806d43
AM
445diff -urNp -x '*.orig' linux-4.4/arch/parisc/kernel/syscall_table.S linux-4.4/arch/parisc/kernel/syscall_table.S
446--- linux-4.4/arch/parisc/kernel/syscall_table.S 2021-02-24 16:56:10.422376707 +0100
447+++ linux-4.4/arch/parisc/kernel/syscall_table.S 2021-02-24 16:56:24.539487958 +0100
b00e13aa 448@@ -358,7 +358,7 @@
d337f35e
JR
449 ENTRY_COMP(mbind) /* 260 */
450 ENTRY_COMP(get_mempolicy)
451 ENTRY_COMP(set_mempolicy)
452- ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */
453+ ENTRY_DIFF(vserver)
454 ENTRY_SAME(add_key)
455 ENTRY_SAME(request_key) /* 265 */
927ca606 456 ENTRY_COMP(keyctl)
c2806d43
AM
457diff -urNp -x '*.orig' linux-4.4/arch/parisc/kernel/traps.c linux-4.4/arch/parisc/kernel/traps.c
458--- linux-4.4/arch/parisc/kernel/traps.c 2021-02-24 16:56:10.422376707 +0100
459+++ linux-4.4/arch/parisc/kernel/traps.c 2021-02-24 16:56:24.539487958 +0100
927ca606 460@@ -235,8 +235,9 @@ void die_if_kernel(char *str, struct pt_
d337f35e
JR
461 return; /* STFU */
462
98968f7b
JR
463 parisc_printk_ratelimited(1, regs,
464- KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
2380c486 465- current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
98968f7b 466+ KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n",
2380c486 467+ current->comm, task_pid_nr(current), current->xid,
d337f35e 468+ str, err, regs->iaoq[0]);
98968f7b
JR
469
470 return;
471 }
927ca606 472@@ -266,8 +267,8 @@ void die_if_kernel(char *str, struct pt_
d337f35e
JR
473 pdc_console_restart();
474
2380c486
JR
475 if (err)
476- printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
477- current->comm, task_pid_nr(current), str, err);
478+ printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n",
479+ current->comm, task_pid_nr(current), current->xid, str, err);
480
481 /* Wot's wrong wif bein' racy? */
482 if (current->thread.flags & PARISC_KERNEL_DEATH) {
c2806d43
AM
483diff -urNp -x '*.orig' linux-4.4/arch/powerpc/Kconfig linux-4.4/arch/powerpc/Kconfig
484--- linux-4.4/arch/powerpc/Kconfig 2021-02-24 16:56:10.422376707 +0100
485+++ linux-4.4/arch/powerpc/Kconfig 2021-02-24 16:56:24.539487958 +0100
48cb6a3c 486@@ -1087,6 +1087,8 @@ source "lib/Kconfig"
3cc86a71
AM
487
488 source "arch/powerpc/Kconfig.debug"
489
490+source "kernel/vserver/Kconfig"
491+
492 source "security/Kconfig"
493
494 source "crypto/Kconfig"
c2806d43
AM
495diff -urNp -x '*.orig' linux-4.4/arch/powerpc/include/uapi/asm/unistd.h linux-4.4/arch/powerpc/include/uapi/asm/unistd.h
496--- linux-4.4/arch/powerpc/include/uapi/asm/unistd.h 2016-01-11 00:01:32.000000000 +0100
497+++ linux-4.4/arch/powerpc/include/uapi/asm/unistd.h 2021-02-24 16:56:24.539487958 +0100
498@@ -275,7 +275,7 @@
499 #endif
500 #define __NR_rtas 255
501 #define __NR_sys_debug_setcontext 256
502-/* Number 257 is reserved for vserver */
503+#define __NR_vserver 257
504 #define __NR_migrate_pages 258
505 #define __NR_mbind 259
506 #define __NR_get_mempolicy 260
507diff -urNp -x '*.orig' linux-4.4/arch/powerpc/kernel/traps.c linux-4.4/arch/powerpc/kernel/traps.c
508--- linux-4.4/arch/powerpc/kernel/traps.c 2021-02-24 16:56:10.445710775 +0100
509+++ linux-4.4/arch/powerpc/kernel/traps.c 2021-02-24 16:56:24.539487958 +0100
927ca606 510@@ -1315,8 +1315,9 @@ void nonrecoverable_exception(struct pt_
d337f35e
JR
511
512 void trace_syscall(struct pt_regs *regs)
513 {
514- printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n",
2380c486 515- current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
61333608 516+ printk("Task: %p(%d:#%u), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n",
2380c486 517+ current, task_pid_nr(current), current->xid,
d337f35e
JR
518+ regs->nip, regs->link, regs->gpr[0],
519 regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
520 }
521
c2806d43
AM
522diff -urNp -x '*.orig' linux-4.4/arch/s390/Kconfig linux-4.4/arch/s390/Kconfig
523--- linux-4.4/arch/s390/Kconfig 2021-02-24 16:56:10.465711405 +0100
524+++ linux-4.4/arch/s390/Kconfig 2021-02-24 16:56:24.539487958 +0100
525@@ -776,6 +776,8 @@ source "fs/Kconfig"
526
527 source "arch/s390/Kconfig.debug"
528
529+source "kernel/vserver/Kconfig"
530+
531 source "security/Kconfig"
532
533 source "crypto/Kconfig"
534diff -urNp -x '*.orig' linux-4.4/arch/s390/include/asm/tlb.h linux-4.4/arch/s390/include/asm/tlb.h
535--- linux-4.4/arch/s390/include/asm/tlb.h 2016-01-11 00:01:32.000000000 +0100
536+++ linux-4.4/arch/s390/include/asm/tlb.h 2021-02-24 16:56:24.539487958 +0100
dd5f3080 537@@ -24,6 +24,7 @@
0411181d 538 #include <linux/mm.h>
d33d7b00 539 #include <linux/pagemap.h>
0411181d 540 #include <linux/swap.h>
0411181d
AM
541+
542 #include <asm/processor.h>
543 #include <asm/pgalloc.h>
763640ca 544 #include <asm/tlbflush.h>
c2806d43
AM
545diff -urNp -x '*.orig' linux-4.4/arch/s390/include/uapi/asm/unistd.h linux-4.4/arch/s390/include/uapi/asm/unistd.h
546--- linux-4.4/arch/s390/include/uapi/asm/unistd.h 2016-01-11 00:01:32.000000000 +0100
547+++ linux-4.4/arch/s390/include/uapi/asm/unistd.h 2021-02-24 16:56:24.539487958 +0100
92598135 548@@ -200,7 +200,7 @@
927ca606
AM
549 #define __NR_clock_gettime 260
550 #define __NR_clock_getres 261
551 #define __NR_clock_nanosleep 262
0411181d
AM
552-/* Number 263 is reserved for vserver */
553+#define __NR_vserver 263
554 #define __NR_statfs64 265
555 #define __NR_fstatfs64 266
556 #define __NR_remap_file_pages 267
c2806d43
AM
557diff -urNp -x '*.orig' linux-4.4/arch/s390/kernel/ptrace.c linux-4.4/arch/s390/kernel/ptrace.c
558--- linux-4.4/arch/s390/kernel/ptrace.c 2021-02-24 16:56:10.475711720 +0100
559+++ linux-4.4/arch/s390/kernel/ptrace.c 2021-02-24 16:56:24.539487958 +0100
db55b927 560@@ -21,6 +21,7 @@
ec22aa5c
AM
561 #include <linux/tracehook.h>
562 #include <linux/seccomp.h>
969f5c41 563 #include <linux/compat.h>
db55b927 564+#include <linux/vs_base.h>
ec22aa5c 565 #include <trace/syscall.h>
d337f35e 566 #include <asm/segment.h>
db55b927 567 #include <asm/page.h>
c2806d43
AM
568diff -urNp -x '*.orig' linux-4.4/arch/s390/kernel/syscalls.S linux-4.4/arch/s390/kernel/syscalls.S
569--- linux-4.4/arch/s390/kernel/syscalls.S 2021-02-24 16:56:10.475711720 +0100
570+++ linux-4.4/arch/s390/kernel/syscalls.S 2021-02-24 16:56:24.539487958 +0100
927ca606
AM
571@@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,compat_sys_clo
572 SYSCALL(sys_clock_gettime,compat_sys_clock_gettime) /* 260 */
573 SYSCALL(sys_clock_getres,compat_sys_clock_getres)
574 SYSCALL(sys_clock_nanosleep,compat_sys_clock_nanosleep)
575-NI_SYSCALL /* reserved for vserver */
d337f35e 576+SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
927ca606
AM
577 SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
578 SYSCALL(sys_statfs64,compat_sys_statfs64)
579 SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
c2806d43
AM
580diff -urNp -x '*.orig' linux-4.4/arch/sh/Kconfig linux-4.4/arch/sh/Kconfig
581--- linux-4.4/arch/sh/Kconfig 2016-01-11 00:01:32.000000000 +0100
582+++ linux-4.4/arch/sh/Kconfig 2021-02-24 16:56:24.542821396 +0100
927ca606 583@@ -883,6 +883,8 @@ source "fs/Kconfig"
d337f35e
JR
584
585 source "arch/sh/Kconfig.debug"
586
587+source "kernel/vserver/Kconfig"
588+
589 source "security/Kconfig"
590
591 source "crypto/Kconfig"
c2806d43
AM
592diff -urNp -x '*.orig' linux-4.4/arch/sh/kernel/irq.c linux-4.4/arch/sh/kernel/irq.c
593--- linux-4.4/arch/sh/kernel/irq.c 2016-01-11 00:01:32.000000000 +0100
594+++ linux-4.4/arch/sh/kernel/irq.c 2021-02-24 16:56:24.542821396 +0100
f86f0b53 595@@ -14,6 +14,7 @@
7e46296a 596 #include <linux/ftrace.h>
76514441 597 #include <linux/delay.h>
763640ca 598 #include <linux/ratelimit.h>
f86f0b53 599+// #include <linux/vs_context.h>
d337f35e 600 #include <asm/processor.h>
2380c486 601 #include <asm/machvec.h>
f86f0b53 602 #include <asm/uaccess.h>
c2806d43
AM
603diff -urNp -x '*.orig' linux-4.4/arch/sparc/Kconfig linux-4.4/arch/sparc/Kconfig
604--- linux-4.4/arch/sparc/Kconfig 2021-02-24 16:56:10.485712035 +0100
605+++ linux-4.4/arch/sparc/Kconfig 2021-02-24 16:56:24.542821396 +0100
3cc86a71
AM
606@@ -561,6 +561,8 @@ source "fs/Kconfig"
607
608 source "arch/sparc/Kconfig.debug"
609
610+source "kernel/vserver/Kconfig"
611+
612 source "security/Kconfig"
613
614 source "crypto/Kconfig"
c2806d43
AM
615diff -urNp -x '*.orig' linux-4.4/arch/sparc/include/uapi/asm/unistd.h linux-4.4/arch/sparc/include/uapi/asm/unistd.h
616--- linux-4.4/arch/sparc/include/uapi/asm/unistd.h 2016-01-11 00:01:32.000000000 +0100
617+++ linux-4.4/arch/sparc/include/uapi/asm/unistd.h 2021-02-24 16:56:24.542821396 +0100
618@@ -332,7 +332,7 @@
619 #define __NR_timer_getoverrun 264
620 #define __NR_timer_delete 265
621 #define __NR_timer_create 266
622-/* #define __NR_vserver 267 Reserved for VSERVER */
623+#define __NR_vserver 267
624 #define __NR_io_setup 268
625 #define __NR_io_destroy 269
626 #define __NR_io_submit 270
627diff -urNp -x '*.orig' linux-4.4/arch/sparc/kernel/systbls_32.S linux-4.4/arch/sparc/kernel/systbls_32.S
628--- linux-4.4/arch/sparc/kernel/systbls_32.S 2016-01-11 00:01:32.000000000 +0100
629+++ linux-4.4/arch/sparc/kernel/systbls_32.S 2021-02-24 16:56:24.542821396 +0100
50e68740 630@@ -70,7 +70,7 @@ sys_call_table:
a168f21d 631 /*250*/ .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
50e68740
JR
632 /*255*/ .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
633 /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
634-/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
635+/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
636 /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
637 /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
638 /*280*/ .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
c2806d43
AM
639diff -urNp -x '*.orig' linux-4.4/arch/sparc/kernel/systbls_64.S linux-4.4/arch/sparc/kernel/systbls_64.S
640--- linux-4.4/arch/sparc/kernel/systbls_64.S 2016-01-11 00:01:32.000000000 +0100
641+++ linux-4.4/arch/sparc/kernel/systbls_64.S 2021-02-24 16:56:24.542821396 +0100
50e68740 642@@ -71,7 +71,7 @@ sys_call_table32:
b00e13aa 643 /*250*/ .word sys_mremap, compat_sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
50e68740
JR
644 .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
645 /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
646- .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
647+ .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
648 /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
649 .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
b00e13aa 650 /*280*/ .word sys_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat
927ca606 651@@ -152,7 +152,7 @@ sys_call_table:
a168f21d 652 /*250*/ .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
50e68740
JR
653 .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
654 /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
655- .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
656+ .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
657 /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
658 .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
659 /*280*/ .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
c2806d43
AM
660diff -urNp -x '*.orig' linux-4.4/arch/um/Kconfig.rest linux-4.4/arch/um/Kconfig.rest
661--- linux-4.4/arch/um/Kconfig.rest 2016-01-11 00:01:32.000000000 +0100
662+++ linux-4.4/arch/um/Kconfig.rest 2021-02-24 16:56:24.542821396 +0100
f6c5ef8b 663@@ -12,6 +12,8 @@ source "arch/um/Kconfig.net"
d33d7b00
AM
664
665 source "fs/Kconfig"
666
667+source "kernel/vserver/Kconfig"
668+
669 source "security/Kconfig"
670
671 source "crypto/Kconfig"
c2806d43
AM
672diff -urNp -x '*.orig' linux-4.4/arch/x86/Kconfig linux-4.4/arch/x86/Kconfig
673--- linux-4.4/arch/x86/Kconfig 2021-02-24 16:56:10.502379227 +0100
674+++ linux-4.4/arch/x86/Kconfig 2021-02-24 16:56:24.542821396 +0100
675@@ -2724,6 +2724,8 @@ source "fs/Kconfig"
676
677 source "arch/x86/Kconfig.debug"
678
679+source "kernel/vserver/Kconfig"
680+
681 source "security/Kconfig"
682
683 source "crypto/Kconfig"
684diff -urNp -x '*.orig' linux-4.4/arch/x86/entry/syscalls/syscall_32.tbl linux-4.4/arch/x86/entry/syscalls/syscall_32.tbl
685--- linux-4.4/arch/x86/entry/syscalls/syscall_32.tbl 2021-02-24 16:56:10.509046103 +0100
686+++ linux-4.4/arch/x86/entry/syscalls/syscall_32.tbl 2021-02-24 16:56:24.542821396 +0100
db55b927
AM
687@@ -279,7 +279,7 @@
688 270 i386 tgkill sys_tgkill
689 271 i386 utimes sys_utimes compat_sys_utimes
690 272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
691-273 i386 vserver
692+273 i386 vserver sys_vserver sys32_vserver
693 274 i386 mbind sys_mbind
694 275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
695 276 i386 set_mempolicy sys_set_mempolicy
c2806d43
AM
696diff -urNp -x '*.orig' linux-4.4/arch/x86/entry/syscalls/syscall_64.tbl linux-4.4/arch/x86/entry/syscalls/syscall_64.tbl
697--- linux-4.4/arch/x86/entry/syscalls/syscall_64.tbl 2016-01-11 00:01:32.000000000 +0100
698+++ linux-4.4/arch/x86/entry/syscalls/syscall_64.tbl 2021-02-24 16:56:24.542821396 +0100
db55b927 699@@ -242,7 +242,7 @@
1e8b8f9b
AM
700 233 common epoll_ctl sys_epoll_ctl
701 234 common tgkill sys_tgkill
702 235 common utimes sys_utimes
db55b927
AM
703-236 64 vserver
704+236 64 vserver sys_vserver
1e8b8f9b
AM
705 237 common mbind sys_mbind
706 238 common set_mempolicy sys_set_mempolicy
707 239 common get_mempolicy sys_get_mempolicy
c2806d43
AM
708diff -urNp -x '*.orig' linux-4.4/block/ioprio.c linux-4.4/block/ioprio.c
709--- linux-4.4/block/ioprio.c 2021-02-24 16:56:10.582381746 +0100
710+++ linux-4.4/block/ioprio.c 2021-02-24 16:56:24.542821396 +0100
bb20add7
AM
711@@ -28,6 +28,7 @@
712 #include <linux/syscalls.h>
713 #include <linux/security.h>
714 #include <linux/pid_namespace.h>
715+#include <linux/vs_base.h>
716
717 int set_task_ioprio(struct task_struct *task, int ioprio)
718 {
719@@ -105,6 +106,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which,
720 else
721 pgrp = find_vpid(who);
722 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
723+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
724+ continue;
725 ret = set_task_ioprio(p, ioprio);
726 if (ret)
727 break;
927ca606 728@@ -203,6 +206,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which,
bb20add7 729 pgrp = find_vpid(who);
27eae5d4 730 read_lock(&tasklist_lock);
bb20add7
AM
731 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
732+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
733+ continue;
734 tmpio = get_task_ioprio(p);
735 if (tmpio < 0)
736 continue;
c2806d43
AM
737diff -urNp -x '*.orig' linux-4.4/drivers/block/Kconfig linux-4.4/drivers/block/Kconfig
738--- linux-4.4/drivers/block/Kconfig 2021-02-24 16:56:10.629049882 +0100
739+++ linux-4.4/drivers/block/Kconfig 2021-02-24 16:56:24.542821396 +0100
740@@ -283,6 +283,13 @@ config BLK_DEV_CRYPTOLOOP
741
742 source "drivers/block/drbd/Kconfig"
743
744+config BLK_DEV_VROOT
745+ tristate "Virtual Root device support"
746+ depends on QUOTACTL
747+ ---help---
748+ Saying Y here will allow you to use quota/fs ioctls on a shared
749+ partition within a virtual server without compromising security.
3cc86a71
AM
750+
751 config BLK_DEV_NBD
752 tristate "Network block device support"
753 depends on NET
c2806d43
AM
754diff -urNp -x '*.orig' linux-4.4/drivers/block/Makefile linux-4.4/drivers/block/Makefile
755--- linux-4.4/drivers/block/Makefile 2016-01-11 00:01:32.000000000 +0100
756+++ linux-4.4/drivers/block/Makefile 2021-02-24 16:56:24.542821396 +0100
757@@ -32,6 +32,7 @@ obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
758
759 obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
760 obj-$(CONFIG_BLK_DEV_HD) += hd.o
761+obj-$(CONFIG_BLK_DEV_VROOT) += vroot.o
762
763 obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
764 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
765diff -urNp -x '*.orig' linux-4.4/drivers/block/loop.c linux-4.4/drivers/block/loop.c
766--- linux-4.4/drivers/block/loop.c 2021-02-24 16:56:10.632383321 +0100
767+++ linux-4.4/drivers/block/loop.c 2021-02-24 16:56:24.542821396 +0100
3cc86a71
AM
768@@ -76,6 +76,7 @@
769 #include <linux/miscdevice.h>
770 #include <linux/falloc.h>
771 #include <linux/uio.h>
772+#include <linux/vs_context.h>
773 #include "loop.h"
774
775 #include <asm/uaccess.h>
776@@ -947,6 +948,7 @@ static int loop_set_fd(struct loop_devic
777 lo->lo_blocksize = lo_blocksize;
778 lo->lo_device = bdev;
d337f35e
JR
779 lo->lo_flags = lo_flags;
780+ lo->lo_xid = vx_current_xid();
781 lo->lo_backing_file = file;
927ca606 782 lo->transfer = NULL;
d337f35e 783 lo->ioctl = NULL;
8931d859 784@@ -1067,6 +1069,7 @@ static int loop_clr_fd(struct loop_devic
927ca606 785 lo->lo_offset = 0;
f6c5ef8b 786 lo->lo_sizelimit = 0;
2380c486 787 lo->lo_encrypt_key_size = 0;
2380c486
JR
788+ lo->lo_xid = 0;
789 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
790 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
791 memset(lo->lo_file_name, 0, LO_NAME_SIZE);
8931d859 792@@ -1113,7 +1116,7 @@ loop_set_status(struct loop_device *lo,
2380c486 793
ec22aa5c 794 if (lo->lo_encrypt_key_size &&
537831f9 795 !uid_eq(lo->lo_key_owner, uid) &&
d337f35e
JR
796- !capable(CAP_SYS_ADMIN))
797+ !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP))
798 return -EPERM;
799 if (lo->lo_state != Lo_bound)
800 return -ENXIO;
8931d859 801@@ -1218,7 +1221,8 @@ loop_get_status(struct loop_device *lo,
d337f35e
JR
802 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
803 info->lo_encrypt_type =
804 lo->lo_encryption ? lo->lo_encryption->number : 0;
805- if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
806+ if (lo->lo_encrypt_key_size &&
807+ vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) {
808 info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
809 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
810 lo->lo_encrypt_key_size);
48cb6a3c 811@@ -1580,6 +1584,11 @@ static int lo_open(struct block_device *
a168f21d
AM
812 goto out;
813 }
d337f35e 814
dd5f3080 815+ if (!vx_check(lo->lo_xid, VS_IDENT|VS_HOSTID|VS_ADMIN_P)) {
816+ err = -EACCES;
817+ goto out;
818+ }
d337f35e 819+
927ca606
AM
820 atomic_inc(&lo->lo_refcnt);
821 out:
822 mutex_unlock(&loop_index_mutex);
c2806d43
AM
823diff -urNp -x '*.orig' linux-4.4/drivers/block/loop.h linux-4.4/drivers/block/loop.h
824--- linux-4.4/drivers/block/loop.h 2021-02-24 16:56:10.632383321 +0100
825+++ linux-4.4/drivers/block/loop.h 2021-02-24 16:56:24.542821396 +0100
927ca606 826@@ -43,6 +43,7 @@ struct loop_device {
c2e5f7c8
JR
827 struct loop_func_table *lo_encryption;
828 __u32 lo_init[2];
829 kuid_t lo_key_owner; /* Who set the key */
830+ vxid_t lo_xid;
831 int (*ioctl)(struct loop_device *, int cmd,
832 unsigned long arg);
833
c2806d43
AM
834diff -urNp -x '*.orig' linux-4.4/drivers/block/vroot.c linux-4.4/drivers/block/vroot.c
835--- linux-4.4/drivers/block/vroot.c 1970-01-01 01:00:00.000000000 +0100
836+++ linux-4.4/drivers/block/vroot.c 2021-02-24 16:56:24.542821396 +0100
f19bd705 837@@ -0,0 +1,291 @@
d337f35e
JR
838+/*
839+ * linux/drivers/block/vroot.c
840+ *
c2806d43
AM
841+ * written by Herbert P?tzl, 9/11/2002
842+ * ported to 2.6.10 by Herbert P?tzl, 30/12/2004
d337f35e
JR
843+ *
844+ * based on the loop.c code by Theodore Ts'o.
845+ *
c2806d43 846+ * Copyright (C) 2002-2007 by Herbert P?tzl.
d337f35e
JR
847+ * Redistribution of this file is permitted under the
848+ * GNU General Public License.
849+ *
850+ */
851+
852+#include <linux/module.h>
853+#include <linux/moduleparam.h>
854+#include <linux/file.h>
855+#include <linux/major.h>
856+#include <linux/blkdev.h>
76514441 857+#include <linux/slab.h>
d337f35e
JR
858+
859+#include <linux/vroot.h>
860+#include <linux/vs_context.h>
861+
862+
863+static int max_vroot = 8;
864+
865+static struct vroot_device *vroot_dev;
866+static struct gendisk **disks;
867+
868+
869+static int vroot_set_dev(
870+ struct vroot_device *vr,
d337f35e
JR
871+ struct block_device *bdev,
872+ unsigned int arg)
873+{
874+ struct block_device *real_bdev;
875+ struct file *file;
876+ struct inode *inode;
877+ int error;
878+
879+ error = -EBUSY;
880+ if (vr->vr_state != Vr_unbound)
881+ goto out;
882+
883+ error = -EBADF;
884+ file = fget(arg);
885+ if (!file)
886+ goto out;
887+
888+ error = -EINVAL;
927ca606 889+ inode = file->f_path.dentry->d_inode;
d337f35e
JR
890+
891+
892+ if (S_ISBLK(inode->i_mode)) {
893+ real_bdev = inode->i_bdev;
894+ vr->vr_device = real_bdev;
895+ __iget(real_bdev->bd_inode);
896+ } else
897+ goto out_fput;
898+
899+ vxdprintk(VXD_CBIT(misc, 0),
900+ "vroot[%d]_set_dev: dev=" VXF_DEV,
901+ vr->vr_number, VXD_DEV(real_bdev));
902+
903+ vr->vr_state = Vr_bound;
904+ error = 0;
905+
906+ out_fput:
907+ fput(file);
908+ out:
909+ return error;
910+}
911+
912+static int vroot_clr_dev(
913+ struct vroot_device *vr,
d337f35e
JR
914+ struct block_device *bdev)
915+{
916+ struct block_device *real_bdev;
917+
918+ if (vr->vr_state != Vr_bound)
919+ return -ENXIO;
920+ if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */
921+ return -EBUSY;
922+
923+ real_bdev = vr->vr_device;
924+
925+ vxdprintk(VXD_CBIT(misc, 0),
926+ "vroot[%d]_clr_dev: dev=" VXF_DEV,
927+ vr->vr_number, VXD_DEV(real_bdev));
928+
929+ bdput(real_bdev);
930+ vr->vr_state = Vr_unbound;
931+ vr->vr_device = NULL;
932+ return 0;
933+}
934+
935+
ec22aa5c 936+static int vr_ioctl(struct block_device *bdev, fmode_t mode,
d337f35e
JR
937+ unsigned int cmd, unsigned long arg)
938+{
ec22aa5c 939+ struct vroot_device *vr = bdev->bd_disk->private_data;
d337f35e
JR
940+ int err;
941+
942+ down(&vr->vr_ctl_mutex);
943+ switch (cmd) {
944+ case VROOT_SET_DEV:
ec22aa5c 945+ err = vroot_set_dev(vr, bdev, arg);
d337f35e
JR
946+ break;
947+ case VROOT_CLR_DEV:
ec22aa5c 948+ err = vroot_clr_dev(vr, bdev);
d337f35e
JR
949+ break;
950+ default:
951+ err = -EINVAL;
952+ break;
953+ }
954+ up(&vr->vr_ctl_mutex);
955+ return err;
956+}
957+
ec22aa5c 958+static int vr_open(struct block_device *bdev, fmode_t mode)
d337f35e 959+{
ec22aa5c 960+ struct vroot_device *vr = bdev->bd_disk->private_data;
d337f35e
JR
961+
962+ down(&vr->vr_ctl_mutex);
963+ vr->vr_refcnt++;
964+ up(&vr->vr_ctl_mutex);
965+ return 0;
966+}
967+
09be7631 968+static void vr_release(struct gendisk *disk, fmode_t mode)
d337f35e 969+{
ec22aa5c 970+ struct vroot_device *vr = disk->private_data;
d337f35e
JR
971+
972+ down(&vr->vr_ctl_mutex);
973+ --vr->vr_refcnt;
974+ up(&vr->vr_ctl_mutex);
d337f35e
JR
975+}
976+
977+static struct block_device_operations vr_fops = {
978+ .owner = THIS_MODULE,
979+ .open = vr_open,
980+ .release = vr_release,
981+ .ioctl = vr_ioctl,
982+};
983+
f19bd705 984+static blk_qc_t vroot_make_request(struct request_queue *q, struct bio *bio)
b3b0d4fd
AM
985+{
986+ printk("vroot_make_request %p, %p\n", q, bio);
987+ bio_io_error(bio);
f19bd705 988+ return BLK_QC_T_NONE;
b3b0d4fd
AM
989+}
990+
d337f35e
JR
991+struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
992+{
993+ struct inode *inode = bdev->bd_inode;
994+ struct vroot_device *vr;
995+ struct block_device *real_bdev;
996+ int minor = iminor(inode);
997+
998+ vr = &vroot_dev[minor];
999+ real_bdev = vr->vr_device;
1000+
1001+ vxdprintk(VXD_CBIT(misc, 0),
1002+ "vroot[%d]_get_real_bdev: dev=" VXF_DEV,
1003+ vr->vr_number, VXD_DEV(real_bdev));
1004+
1005+ if (vr->vr_state != Vr_bound)
1006+ return ERR_PTR(-ENXIO);
1007+
1008+ __iget(real_bdev->bd_inode);
1009+ return real_bdev;
1010+}
1011+
b3b0d4fd
AM
1012+
1013+
d337f35e
JR
1014+/*
1015+ * And now the modules code and kernel interface.
1016+ */
1017+
1018+module_param(max_vroot, int, 0);
1019+
1020+MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
1021+MODULE_LICENSE("GPL");
1022+MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
1023+
c2806d43 1024+MODULE_AUTHOR ("Herbert P?tzl");
d337f35e
JR
1025+MODULE_DESCRIPTION ("Virtual Root Device Mapper");
1026+
1027+
1028+int __init vroot_init(void)
1029+{
1030+ int err, i;
1031+
1032+ if (max_vroot < 1 || max_vroot > 256) {
1033+ max_vroot = MAX_VROOT_DEFAULT;
1034+ printk(KERN_WARNING "vroot: invalid max_vroot "
1035+ "(must be between 1 and 256), "
1036+ "using default (%d)\n", max_vroot);
1037+ }
1038+
1039+ if (register_blkdev(VROOT_MAJOR, "vroot"))
1040+ return -EIO;
1041+
1042+ err = -ENOMEM;
1043+ vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
1044+ if (!vroot_dev)
1045+ goto out_mem1;
1046+ memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
1047+
1048+ disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
1049+ if (!disks)
1050+ goto out_mem2;
1051+
1052+ for (i = 0; i < max_vroot; i++) {
1053+ disks[i] = alloc_disk(1);
1054+ if (!disks[i])
1055+ goto out_mem3;
2380c486
JR
1056+ disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
1057+ if (!disks[i]->queue)
1058+ goto out_mem3;
b3b0d4fd 1059+ blk_queue_make_request(disks[i]->queue, vroot_make_request);
d337f35e
JR
1060+ }
1061+
1062+ for (i = 0; i < max_vroot; i++) {
1063+ struct vroot_device *vr = &vroot_dev[i];
1064+ struct gendisk *disk = disks[i];
1065+
1066+ memset(vr, 0, sizeof(*vr));
5a9fc8e8 1067+ sema_init(&vr->vr_ctl_mutex, 1);
d337f35e
JR
1068+ vr->vr_number = i;
1069+ disk->major = VROOT_MAJOR;
1070+ disk->first_minor = i;
1071+ disk->fops = &vr_fops;
1072+ sprintf(disk->disk_name, "vroot%d", i);
1073+ disk->private_data = vr;
1074+ }
1075+
1076+ err = register_vroot_grb(&__vroot_get_real_bdev);
1077+ if (err)
1078+ goto out_mem3;
1079+
1080+ for (i = 0; i < max_vroot; i++)
1081+ add_disk(disks[i]);
1082+ printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
1083+ return 0;
1084+
1085+out_mem3:
1086+ while (i--)
1087+ put_disk(disks[i]);
1088+ kfree(disks);
1089+out_mem2:
1090+ kfree(vroot_dev);
1091+out_mem1:
1092+ unregister_blkdev(VROOT_MAJOR, "vroot");
1093+ printk(KERN_ERR "vroot: ran out of memory\n");
1094+ return err;
1095+}
1096+
1097+void vroot_exit(void)
1098+{
1099+ int i;
1100+
1101+ if (unregister_vroot_grb(&__vroot_get_real_bdev))
1102+ printk(KERN_WARNING "vroot: cannot unregister grb\n");
1103+
1104+ for (i = 0; i < max_vroot; i++) {
1105+ del_gendisk(disks[i]);
1106+ put_disk(disks[i]);
1107+ }
2380c486 1108+ unregister_blkdev(VROOT_MAJOR, "vroot");
d337f35e
JR
1109+
1110+ kfree(disks);
1111+ kfree(vroot_dev);
1112+}
1113+
1114+module_init(vroot_init);
1115+module_exit(vroot_exit);
1116+
1117+#ifndef MODULE
1118+
1119+static int __init max_vroot_setup(char *str)
1120+{
1121+ max_vroot = simple_strtol(str, NULL, 0);
1122+ return 1;
1123+}
1124+
1125+__setup("max_vroot=", max_vroot_setup);
1126+
1127+#endif
1128+
c2806d43
AM
1129diff -urNp -x '*.orig' linux-4.4/drivers/infiniband/core/addr.c linux-4.4/drivers/infiniband/core/addr.c
1130--- linux-4.4/drivers/infiniband/core/addr.c 2021-02-24 16:56:10.869057440 +0100
1131+++ linux-4.4/drivers/infiniband/core/addr.c 2021-02-24 16:56:24.546154835 +0100
8931d859 1132@@ -299,7 +299,7 @@ static int addr6_resolve(struct sockaddr
5dd10c98 1133
763640ca 1134 if (ipv6_addr_any(&fl6.saddr)) {
927ca606 1135 ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
763640ca
JR
1136- &fl6.daddr, 0, &fl6.saddr);
1137+ &fl6.daddr, 0, &fl6.saddr, NULL);
5dd10c98
AM
1138 if (ret)
1139 goto put;
1140
c2806d43
AM
1141diff -urNp -x '*.orig' linux-4.4/drivers/md/dm-ioctl.c linux-4.4/drivers/md/dm-ioctl.c
1142--- linux-4.4/drivers/md/dm-ioctl.c 2021-02-24 16:56:10.939059645 +0100
1143+++ linux-4.4/drivers/md/dm-ioctl.c 2021-02-24 16:56:24.546154835 +0100
3cc86a71
AM
1144@@ -16,6 +16,7 @@
1145 #include <linux/dm-ioctl.h>
1146 #include <linux/hdreg.h>
1147 #include <linux/compat.h>
1148+#include <linux/vs_context.h>
1149
1150 #include <asm/uaccess.h>
1151
1152@@ -114,7 +115,8 @@ static struct hash_cell *__get_name_cell
1153 unsigned int h = hash_str(str);
1154
1155 list_for_each_entry (hc, _name_buckets + h, name_list)
1156- if (!strcmp(hc->name, str)) {
1157+ if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1158+ !strcmp(hc->name, str)) {
1159 dm_get(hc->md);
1160 return hc;
1161 }
1162@@ -128,7 +130,8 @@ static struct hash_cell *__get_uuid_cell
1163 unsigned int h = hash_str(str);
1164
1165 list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
1166- if (!strcmp(hc->uuid, str)) {
1167+ if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1168+ !strcmp(hc->uuid, str)) {
1169 dm_get(hc->md);
1170 return hc;
1171 }
1172@@ -139,13 +142,15 @@ static struct hash_cell *__get_uuid_cell
1173 static struct hash_cell *__get_dev_cell(uint64_t dev)
1174 {
1175 struct mapped_device *md;
1176- struct hash_cell *hc;
1177+ struct hash_cell *hc = NULL;
1178
1179 md = dm_get_md(huge_decode_dev(dev));
1180 if (!md)
1181 return NULL;
1182
1183- hc = dm_get_mdptr(md);
1184+ if (vx_check(dm_get_xid(md), VS_WATCH_P | VS_IDENT))
1185+ hc = dm_get_mdptr(md);
1186+
1187 if (!hc) {
1188 dm_put(md);
1189 return NULL;
1190@@ -467,6 +472,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl
1191
1192 static int remove_all(struct dm_ioctl *param, size_t param_size)
1193 {
1194+ if (!vx_check(0, VS_ADMIN))
1195+ return -EPERM;
1196+
1197 dm_hash_remove_all(true, !!(param->flags & DM_DEFERRED_REMOVE), false);
1198 param->data_size = 0;
1199 return 0;
1200@@ -514,6 +522,8 @@ static int list_devices(struct dm_ioctl
1201 */
1202 for (i = 0; i < NUM_BUCKETS; i++) {
1203 list_for_each_entry (hc, _name_buckets + i, name_list) {
1204+ if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1205+ continue;
1206 needed += sizeof(struct dm_name_list);
1207 needed += strlen(hc->name) + 1;
1208 needed += ALIGN_MASK;
1209@@ -537,6 +547,8 @@ static int list_devices(struct dm_ioctl
1210 */
1211 for (i = 0; i < NUM_BUCKETS; i++) {
1212 list_for_each_entry (hc, _name_buckets + i, name_list) {
1213+ if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1214+ continue;
1215 if (old_nl)
1216 old_nl->next = (uint32_t) ((void *) nl -
1217 (void *) old_nl);
c2806d43 1218@@ -1796,8 +1808,8 @@ static int ctl_ioctl(uint command, struc
3cc86a71
AM
1219 size_t input_param_size;
1220 struct dm_ioctl param_kernel;
1221
1222- /* only root can play with this */
1223- if (!capable(CAP_SYS_ADMIN))
1224+ /* only root and certain contexts can play with this */
1225+ if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER))
1226 return -EACCES;
1227
1228 if (_IOC_TYPE(command) != DM_IOCTL)
c2806d43
AM
1229diff -urNp -x '*.orig' linux-4.4/drivers/md/dm.c linux-4.4/drivers/md/dm.c
1230--- linux-4.4/drivers/md/dm.c 2021-02-24 16:56:10.945726521 +0100
1231+++ linux-4.4/drivers/md/dm.c 2021-02-24 16:56:24.546154835 +0100
1232@@ -25,6 +25,7 @@
1233 #include <linux/elevator.h> /* for rq_end_sector() */
1234 #include <linux/blk-mq.h>
1235 #include <linux/pr.h>
1236+#include <linux/vs_base.h>
d33d7b00 1237
c2806d43
AM
1238 #include <trace/events/block.h>
1239
1240@@ -144,6 +145,7 @@ struct mapped_device {
1241 struct mutex suspend_lock;
1242 atomic_t holders;
1243 atomic_t open_count;
1244+ vxid_t xid;
1245
1246 /*
1247 * The current mapping.
1248@@ -445,6 +447,7 @@ int dm_deleting_md(struct mapped_device
1249 static int dm_blk_open(struct block_device *bdev, fmode_t mode)
1250 {
1251 struct mapped_device *md;
1252+ int ret = -ENXIO;
1253
1254 spin_lock(&_minor_lock);
1255
1256@@ -453,17 +456,19 @@ static int dm_blk_open(struct block_devi
1257 goto out;
1258
1259 if (test_bit(DMF_FREEING, &md->flags) ||
1260- dm_deleting_md(md)) {
1261- md = NULL;
1262+ dm_deleting_md(md))
1263+ goto out;
1264+
1265+ ret = -EACCES;
1266+ if (!vx_check(md->xid, VS_IDENT|VS_HOSTID))
1267 goto out;
1268- }
1269
1270 dm_get(md);
1271 atomic_inc(&md->open_count);
1272+ ret = 0;
1273 out:
1274 spin_unlock(&_minor_lock);
1275-
1276- return md ? 0 : -ENXIO;
1277+ return ret;
1278 }
1279
1280 static void dm_blk_close(struct gendisk *disk, fmode_t mode)
1281@@ -909,6 +914,14 @@ int dm_set_geometry(struct mapped_device
1282 return 0;
1283 }
1284
1285+/*
1286+ * Get the xid associated with a dm device
1287+ */
1288+vxid_t dm_get_xid(struct mapped_device *md)
1289+{
1290+ return md->xid;
1291+}
1292+
1293 /*-----------------------------------------------------------------
1294 * CRUD START:
1295 * A more elegant soln is in the works that uses the queue
1296@@ -2381,6 +2394,7 @@ static struct mapped_device *alloc_dev(i
1297 INIT_LIST_HEAD(&md->table_devices);
1298 spin_lock_init(&md->uevent_lock);
1299
1300+ md->xid = vx_current_xid();
1301 md->queue = blk_alloc_queue(GFP_KERNEL);
1302 if (!md->queue)
1303 goto bad;
1304diff -urNp -x '*.orig' linux-4.4/drivers/md/dm.h linux-4.4/drivers/md/dm.h
1305--- linux-4.4/drivers/md/dm.h 2016-01-11 00:01:32.000000000 +0100
1306+++ linux-4.4/drivers/md/dm.h 2021-02-24 16:56:24.546154835 +0100
1307@@ -52,6 +52,8 @@ struct dm_dev_internal {
1308 struct dm_table;
1309 struct dm_md_mempools;
1310
1311+vxid_t dm_get_xid(struct mapped_device *md);
1312+
1313 /*-----------------------------------------------------------------
1314 * Internal table functions.
1315 *---------------------------------------------------------------*/
1316diff -urNp -x '*.orig' linux-4.4/drivers/net/tun.c linux-4.4/drivers/net/tun.c
1317--- linux-4.4/drivers/net/tun.c 2021-02-24 16:56:11.222401901 +0100
1318+++ linux-4.4/drivers/net/tun.c 2021-02-24 16:56:24.546154835 +0100
1319@@ -65,6 +65,7 @@
1320 #include <linux/nsproxy.h>
1321 #include <linux/virtio_net.h>
1322 #include <linux/rcupdate.h>
1323+#include <linux/vs_network.h>
1324 #include <net/net_namespace.h>
1325 #include <net/netns/generic.h>
1326 #include <net/rtnetlink.h>
1327@@ -181,6 +182,7 @@ struct tun_struct {
1328 unsigned int flags;
1329 kuid_t owner;
1330 kgid_t group;
1331+ vnid_t nid;
1332
1333 struct net_device *dev;
1334 netdev_features_t set_features;
1335@@ -475,6 +477,7 @@ static inline bool tun_not_capable(struc
b00e13aa
AM
1336 return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
1337 (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
1338 !ns_capable(net->user_ns, CAP_NET_ADMIN);
1339+ /* !cap_raised(current_cap(), CAP_NET_ADMIN) */
1340 }
1341
1342 static void tun_set_real_num_queues(struct tun_struct *tun)
3cc86a71 1343@@ -1465,6 +1468,7 @@ static void tun_setup(struct net_device
2380c486 1344
537831f9
AM
1345 tun->owner = INVALID_UID;
1346 tun->group = INVALID_GID;
1347+ tun->nid = nx_current_nid();
2380c486 1348
ec22aa5c
AM
1349 dev->ethtool_ops = &tun_ethtool_ops;
1350 dev->destructor = tun_free_netdev;
3cc86a71 1351@@ -1661,7 +1665,7 @@ static int tun_set_iff(struct net *net,
b00e13aa
AM
1352 int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
1353 MAX_TAP_QUEUES : 1;
1354
1355- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
c2e5f7c8 1356+ if (!nx_ns_capable(net->user_ns, CAP_NET_ADMIN, NXC_TUN_CREATE))
b00e13aa
AM
1357 return -EPERM;
1358 err = security_tun_dev_create();
1359 if (err < 0)
3cc86a71 1360@@ -2018,6 +2022,16 @@ static long __tun_chr_ioctl(struct file
537831f9 1361 from_kgid(&init_user_ns, tun->group));
2380c486 1362 break;
d337f35e 1363
2380c486
JR
1364+ case TUNSETNID:
1365+ if (!capable(CAP_CONTEXT))
1366+ return -EPERM;
d337f35e 1367+
2380c486 1368+ /* Set nid owner of the device */
61333608 1369+ tun->nid = (vnid_t) arg;
d337f35e 1370+
763640ca 1371+ tun_debug(KERN_INFO, tun, "nid owner set to %u\n", tun->nid);
2380c486 1372+ break;
d337f35e 1373+
2380c486
JR
1374 case TUNSETLINK:
1375 /* Only allow setting the type when the interface is down */
ec22aa5c 1376 if (tun->dev->flags & IFF_UP) {
c2806d43
AM
1377diff -urNp -x '*.orig' linux-4.4/drivers/scsi/cxgbi/libcxgbi.c linux-4.4/drivers/scsi/cxgbi/libcxgbi.c
1378--- linux-4.4/drivers/scsi/cxgbi/libcxgbi.c 2021-02-24 16:56:11.419074761 +0100
1379+++ linux-4.4/drivers/scsi/cxgbi/libcxgbi.c 2021-02-24 16:56:24.546154835 +0100
1380@@ -773,7 +773,8 @@ static struct cxgbi_sock *cxgbi_check_ro
bb20add7
AM
1381 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
1382
1383 err = ipv6_dev_get_saddr(&init_net, idev ? idev->dev : NULL,
1384- &daddr6->sin6_addr, 0, &pref_saddr);
1385+ &daddr6->sin6_addr, 0, &pref_saddr,
1386+ NULL);
1387 if (err) {
1388 pr_info("failed to get source address to reach %pI6\n",
1389 &daddr6->sin6_addr);
c2806d43
AM
1390diff -urNp -x '*.orig' linux-4.4/drivers/tty/sysrq.c linux-4.4/drivers/tty/sysrq.c
1391--- linux-4.4/drivers/tty/sysrq.c 2021-02-24 16:56:11.559079170 +0100
1392+++ linux-4.4/drivers/tty/sysrq.c 2021-02-24 16:56:24.546154835 +0100
bb20add7 1393@@ -47,6 +47,7 @@
c2e5f7c8
JR
1394 #include <linux/syscalls.h>
1395 #include <linux/of.h>
bb20add7 1396 #include <linux/rcupdate.h>
ab30d09f
AM
1397+#include <linux/vserver/debug.h>
1398
1399 #include <asm/ptrace.h>
1400 #include <asm/irq_regs.h>
1d9ad342 1401@@ -427,6 +428,21 @@ static struct sysrq_key_op sysrq_unrt_op
ab30d09f
AM
1402 .enable_mask = SYSRQ_ENABLE_RTNICE,
1403 };
1404
1405+
1406+#ifdef CONFIG_VSERVER_DEBUG
1407+static void sysrq_handle_vxinfo(int key)
1408+{
1409+ dump_vx_info_inactive((key == 'x') ? 0 : 1);
1410+}
1411+
1412+static struct sysrq_key_op sysrq_showvxinfo_op = {
1413+ .handler = sysrq_handle_vxinfo,
1414+ .help_msg = "conteXt",
1415+ .action_msg = "Show Context Info",
1416+ .enable_mask = SYSRQ_ENABLE_DUMP,
1417+};
1418+#endif
1419+
1420 /* Key Operations table and lock */
1421 static DEFINE_SPINLOCK(sysrq_key_table_lock);
1422
1d9ad342 1423@@ -483,7 +499,11 @@ static struct sysrq_key_op *sysrq_key_ta
927ca606 1424 /* x: May be registered on mips for TLB dump */
ab30d09f 1425 /* x: May be registered on ppc/powerpc for xmon */
537831f9 1426 /* x: May be registered on sparc64 for global PMU dump */
ab30d09f
AM
1427+#ifdef CONFIG_VSERVER_DEBUG
1428+ &sysrq_showvxinfo_op, /* x */
1429+#else
4bf69007 1430 NULL, /* x */
ab30d09f
AM
1431+#endif
1432 /* y: May be registered on sparc64 for global register dump */
1433 NULL, /* y */
1434 &sysrq_ftrace_dump_op, /* z */
1d9ad342 1435@@ -498,6 +518,8 @@ static int sysrq_key_table_key2index(int
ab30d09f
AM
1436 retval = key - '0';
1437 else if ((key >= 'a') && (key <= 'z'))
1438 retval = key + 10 - 'a';
1439+ else if ((key >= 'A') && (key <= 'Z'))
1440+ retval = key + 10 - 'A';
1441 else
1442 retval = -1;
1443 return retval;
c2806d43
AM
1444diff -urNp -x '*.orig' linux-4.4/drivers/tty/tty_io.c linux-4.4/drivers/tty/tty_io.c
1445--- linux-4.4/drivers/tty/tty_io.c 2021-02-24 16:56:11.559079170 +0100
1446+++ linux-4.4/drivers/tty/tty_io.c 2021-02-24 16:56:24.546154835 +0100
1e8b8f9b 1447@@ -104,6 +104,7 @@
ab30d09f
AM
1448
1449 #include <linux/kmod.h>
1450 #include <linux/nsproxy.h>
1451+#include <linux/vs_pid.h>
1452
1453 #undef TTY_DEBUG_HANGUP
927ca606 1454 #ifdef TTY_DEBUG_HANGUP
c2806d43 1455@@ -2296,7 +2297,8 @@ static int tiocsti(struct tty_struct *tt
ab30d09f
AM
1456 char ch, mbz = 0;
1457 struct tty_ldisc *ld;
1458
1459- if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
1460+ if (((current->signal->tty != tty) &&
1461+ !vx_capable(CAP_SYS_ADMIN, VXC_TIOCSTI)))
1462 return -EPERM;
1463 if (get_user(ch, p))
1464 return -EFAULT;
3cc86a71 1465@@ -2607,6 +2609,7 @@ static int tiocspgrp(struct tty_struct *
c2806d43 1466
ab30d09f
AM
1467 if (get_user(pgrp_nr, p))
1468 return -EFAULT;
1469+ pgrp_nr = vx_rmap_pid(pgrp_nr);
1470 if (pgrp_nr < 0)
1471 return -EINVAL;
c2806d43
AM
1472
1473diff -urNp -x '*.orig' linux-4.4/fs/attr.c linux-4.4/fs/attr.c
1474--- linux-4.4/fs/attr.c 2021-02-24 16:56:11.675749511 +0100
1475+++ linux-4.4/fs/attr.c 2021-02-24 16:56:24.549488273 +0100
537831f9 1476@@ -15,6 +15,9 @@
d337f35e 1477 #include <linux/security.h>
f6c5ef8b 1478 #include <linux/evm.h>
537831f9 1479 #include <linux/ima.h>
d337f35e
JR
1480+#include <linux/proc_fs.h>
1481+#include <linux/devpts_fs.h>
2380c486 1482+#include <linux/vs_tag.h>
d337f35e 1483
93de0823
AM
1484 /**
1485 * inode_change_ok - check if attribute changes to an inode are allowed
b00e13aa 1486@@ -77,6 +80,10 @@ int inode_change_ok(const struct inode *
93de0823 1487 return -EPERM;
d337f35e 1488 }
93de0823
AM
1489
1490+ /* check for inode tag permission */
2380c486 1491+ if (dx_permission(inode, MAY_WRITE))
93de0823 1492+ return -EACCES;
2380c486 1493+
93de0823
AM
1494 return 0;
1495 }
1496 EXPORT_SYMBOL(inode_change_ok);
b00e13aa 1497@@ -147,6 +154,8 @@ void setattr_copy(struct inode *inode, c
d337f35e
JR
1498 inode->i_uid = attr->ia_uid;
1499 if (ia_valid & ATTR_GID)
1500 inode->i_gid = attr->ia_gid;
1501+ if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
1502+ inode->i_tag = attr->ia_tag;
1503 if (ia_valid & ATTR_ATIME)
1504 inode->i_atime = timespec_trunc(attr->ia_atime,
1505 inode->i_sb->s_time_gran);
c2e5f7c8 1506@@ -197,7 +206,8 @@ int notify_change(struct dentry * dentry
92598135
AM
1507
1508 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
78865d5b
AM
1509
1510- if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
1511+ if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
1512+ ATTR_TAG | ATTR_TIMES_SET)) {
1513 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1514 return -EPERM;
1515 }
c2806d43
AM
1516diff -urNp -x '*.orig' linux-4.4/fs/block_dev.c linux-4.4/fs/block_dev.c
1517--- linux-4.4/fs/block_dev.c 2021-02-24 16:56:11.679082949 +0100
1518+++ linux-4.4/fs/block_dev.c 2021-02-24 16:56:24.549488273 +0100
927ca606 1519@@ -29,6 +29,7 @@
2380c486 1520 #include <linux/log2.h>
db55b927 1521 #include <linux/cleancache.h>
927ca606 1522 #include <linux/dax.h>
2380c486
JR
1523+#include <linux/vs_device.h>
1524 #include <asm/uaccess.h>
1525 #include "internal.h"
1526
927ca606 1527@@ -645,6 +646,7 @@ struct block_device *bdget(dev_t dev)
2380c486
JR
1528 bdev->bd_invalidated = 0;
1529 inode->i_mode = S_IFBLK;
1530 inode->i_rdev = dev;
1531+ inode->i_mdev = dev;
1532 inode->i_bdev = bdev;
1533 inode->i_data.a_ops = &def_blk_aops;
1534 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
927ca606 1535@@ -691,6 +693,11 @@ EXPORT_SYMBOL(bdput);
2380c486
JR
1536 static struct block_device *bd_acquire(struct inode *inode)
1537 {
1538 struct block_device *bdev;
1539+ dev_t mdev;
1540+
1541+ if (!vs_map_blkdev(inode->i_rdev, &mdev, DATTR_OPEN))
1542+ return NULL;
1543+ inode->i_mdev = mdev;
1544
1545 spin_lock(&bdev_lock);
1546 bdev = inode->i_bdev;
927ca606 1547@@ -701,7 +708,7 @@ static struct block_device *bd_acquire(s
2380c486
JR
1548 }
1549 spin_unlock(&bdev_lock);
1550
1551- bdev = bdget(inode->i_rdev);
1552+ bdev = bdget(mdev);
1553 if (bdev) {
1554 spin_lock(&bdev_lock);
1555 if (!inode->i_bdev) {
c2806d43
AM
1556diff -urNp -x '*.orig' linux-4.4/fs/btrfs/ctree.h linux-4.4/fs/btrfs/ctree.h
1557--- linux-4.4/fs/btrfs/ctree.h 2021-02-24 16:56:11.682416387 +0100
1558+++ linux-4.4/fs/btrfs/ctree.h 2021-02-24 16:56:24.549488273 +0100
3cc86a71 1559@@ -732,11 +732,14 @@ struct btrfs_inode_item {
e22b5178
AM
1560 /* modification sequence number for NFS */
1561 __le64 sequence;
1562
1563+ __le16 tag;
1564 /*
1565 * a little future expansion, for more than this we can
1566 * just grow the inode item and version it
1567 */
1568- __le64 reserved[4];
1569+ __le16 reserved16;
1570+ __le32 reserved32;
1571+ __le64 reserved[3];
1572 struct btrfs_timespec atime;
1573 struct btrfs_timespec ctime;
1574 struct btrfs_timespec mtime;
48cb6a3c 1575@@ -2189,6 +2192,8 @@ struct btrfs_ioctl_defrag_range_args {
c2e5f7c8 1576 #define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
bb20add7 1577 #define BTRFS_DEFAULT_MAX_INLINE (8192)
e22b5178
AM
1578
1579+#define BTRFS_MOUNT_TAGGED (1 << 24)
1580+
1581 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1582 #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
b00e13aa 1583 #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
48cb6a3c 1584@@ -2531,6 +2536,7 @@ BTRFS_SETGET_FUNCS(inode_block_group, st
e22b5178
AM
1585 BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
1586 BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
1587 BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
1588+BTRFS_SETGET_FUNCS(inode_tag, struct btrfs_inode_item, tag, 16);
1589 BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
1590 BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
1591 BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
48cb6a3c 1592@@ -2578,6 +2584,10 @@ BTRFS_SETGET_FUNCS(extent_flags, struct
78865d5b
AM
1593
1594 BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
1595
1596+#define BTRFS_INODE_IXUNLINK (1 << 24)
1597+#define BTRFS_INODE_BARRIER (1 << 25)
1598+#define BTRFS_INODE_COW (1 << 26)
1599+
1600
1601 BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
1602
48cb6a3c 1603@@ -4041,6 +4051,7 @@ long btrfs_ioctl(struct file *file, unsi
d4263eb0
JR
1604 void btrfs_update_iflags(struct inode *inode);
1605 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
c2e5f7c8 1606 int btrfs_is_empty_uuid(u8 *uuid);
d4263eb0 1607+int btrfs_sync_flags(struct inode *inode, int, int);
763640ca
JR
1608 int btrfs_defrag_file(struct inode *inode, struct file *file,
1609 struct btrfs_ioctl_defrag_range_args *range,
1610 u64 newer_than, unsigned long max_pages);
c2806d43
AM
1611diff -urNp -x '*.orig' linux-4.4/fs/btrfs/disk-io.c linux-4.4/fs/btrfs/disk-io.c
1612--- linux-4.4/fs/btrfs/disk-io.c 2021-02-24 16:56:11.685749826 +0100
1613+++ linux-4.4/fs/btrfs/disk-io.c 2021-02-24 16:56:24.549488273 +0100
48cb6a3c 1614@@ -2666,6 +2666,9 @@ int open_ctree(struct super_block *sb,
763640ca 1615 goto fail_alloc;
e22b5178
AM
1616 }
1617
1618+ if (btrfs_test_opt(tree_root, TAGGED))
1619+ sb->s_flags |= MS_TAGGED;
1620+
1621 features = btrfs_super_incompat_flags(disk_super) &
1622 ~BTRFS_FEATURE_INCOMPAT_SUPP;
1623 if (features) {
c2806d43
AM
1624diff -urNp -x '*.orig' linux-4.4/fs/btrfs/inode.c linux-4.4/fs/btrfs/inode.c
1625--- linux-4.4/fs/btrfs/inode.c 2021-02-24 16:56:11.692416702 +0100
1626+++ linux-4.4/fs/btrfs/inode.c 2021-02-24 16:56:24.552821711 +0100
c2e5f7c8 1627@@ -43,6 +43,7 @@
b00e13aa 1628 #include <linux/blkdev.h>
c2e5f7c8 1629 #include <linux/posix_acl_xattr.h>
927ca606 1630 #include <linux/uio.h>
e22b5178 1631+#include <linux/vs_tag.h>
e22b5178
AM
1632 #include "ctree.h"
1633 #include "disk-io.h"
c2e5f7c8 1634 #include "transaction.h"
c2806d43 1635@@ -3666,6 +3667,9 @@ static void btrfs_read_locked_inode(stru
bb20add7 1636 unsigned long ptr;
e22b5178 1637 int maybe_acls;
e22b5178 1638 u32 rdev;
a4a22af8
AM
1639+ kuid_t kuid;
1640+ kgid_t kgid;
1641+ ktag_t ktag;
e22b5178 1642 int ret;
763640ca 1643 bool filled = false;
bb20add7 1644 int first_xattr_slot;
c2806d43 1645@@ -3693,8 +3697,14 @@ static void btrfs_read_locked_inode(stru
a168f21d 1646 struct btrfs_inode_item);
e22b5178 1647 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
f6c5ef8b 1648 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
537831f9
AM
1649- i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
1650- i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
e22b5178 1651+
a4a22af8
AM
1652+ kuid = make_kuid(&init_user_ns, btrfs_inode_uid(leaf, inode_item));
1653+ kgid = make_kgid(&init_user_ns, btrfs_inode_gid(leaf, inode_item));
1654+ ktag = make_ktag(&init_user_ns, btrfs_inode_tag(leaf, inode_item));
1655+
1656+ inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
1657+ inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
1658+ inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, ktag);
e22b5178
AM
1659 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
1660
927ca606 1661 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
c2806d43 1662@@ -3850,11 +3860,18 @@ static void fill_inode_item(struct btrfs
e22b5178
AM
1663 struct inode *inode)
1664 {
b00e13aa 1665 struct btrfs_map_token token;
a4a22af8
AM
1666+ uid_t uid = from_kuid(&init_user_ns,
1667+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
1668+ gid_t gid = from_kgid(&init_user_ns,
1669+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
b00e13aa
AM
1670
1671 btrfs_init_map_token(&token);
1672
1673- btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
1674- btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
1675+ btrfs_set_token_inode_uid(leaf, item, uid, &token);
1676+ btrfs_set_token_inode_gid(leaf, item, gid, &token);
e22b5178 1677+#ifdef CONFIG_TAGGING_INTERN
b00e13aa 1678+ btrfs_set_token_inode_tag(leaf, item, i_tag_read(inode), &token);
e22b5178 1679+#endif
b00e13aa
AM
1680 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
1681 &token);
1682 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
c2806d43 1683@@ -10133,6 +10150,7 @@ static const struct inode_operations btr
d4263eb0
JR
1684 .listxattr = btrfs_listxattr,
1685 .removexattr = btrfs_removexattr,
d4263eb0
JR
1686 .permission = btrfs_permission,
1687+ .sync_flags = btrfs_sync_flags,
a168f21d 1688 .get_acl = btrfs_get_acl,
f15949f2 1689 .set_acl = btrfs_set_acl,
c2e5f7c8 1690 .update_time = btrfs_update_time,
c2806d43 1691@@ -10141,6 +10159,7 @@ static const struct inode_operations btr
7e46296a 1692 static const struct inode_operations btrfs_dir_ro_inode_operations = {
d4263eb0 1693 .lookup = btrfs_lookup,
d4263eb0 1694 .permission = btrfs_permission,
d4263eb0 1695+ .sync_flags = btrfs_sync_flags,
a168f21d 1696 .get_acl = btrfs_get_acl,
f15949f2 1697 .set_acl = btrfs_set_acl,
c2e5f7c8 1698 .update_time = btrfs_update_time,
c2806d43 1699@@ -10211,6 +10230,7 @@ static const struct inode_operations btr
c2e5f7c8
JR
1700 .removexattr = btrfs_removexattr,
1701 .permission = btrfs_permission,
1702 .fiemap = btrfs_fiemap,
1703+ .sync_flags = btrfs_sync_flags,
1704 .get_acl = btrfs_get_acl,
bb20add7 1705 .set_acl = btrfs_set_acl,
c2e5f7c8 1706 .update_time = btrfs_update_time,
c2806d43
AM
1707diff -urNp -x '*.orig' linux-4.4/fs/btrfs/ioctl.c linux-4.4/fs/btrfs/ioctl.c
1708--- linux-4.4/fs/btrfs/ioctl.c 2021-02-24 16:56:11.692416702 +0100
1709+++ linux-4.4/fs/btrfs/ioctl.c 2021-02-24 16:56:24.552821711 +0100
1710@@ -109,10 +109,13 @@ static unsigned int btrfs_flags_to_ioctl
d4263eb0
JR
1711 {
1712 unsigned int iflags = 0;
1713
1714- if (flags & BTRFS_INODE_SYNC)
1715- iflags |= FS_SYNC_FL;
1716 if (flags & BTRFS_INODE_IMMUTABLE)
1717 iflags |= FS_IMMUTABLE_FL;
1718+ if (flags & BTRFS_INODE_IXUNLINK)
1719+ iflags |= FS_IXUNLINK_FL;
1720+
1721+ if (flags & BTRFS_INODE_SYNC)
1722+ iflags |= FS_SYNC_FL;
1723 if (flags & BTRFS_INODE_APPEND)
1724 iflags |= FS_APPEND_FL;
1725 if (flags & BTRFS_INODE_NODUMP)
c2806d43 1726@@ -129,34 +132,84 @@ static unsigned int btrfs_flags_to_ioctl
763640ca
JR
1727 else if (flags & BTRFS_INODE_NOCOMPRESS)
1728 iflags |= FS_NOCOMP_FL;
d4263eb0
JR
1729
1730+ if (flags & BTRFS_INODE_BARRIER)
1731+ iflags |= FS_BARRIER_FL;
1732+ if (flags & BTRFS_INODE_COW)
1733+ iflags |= FS_COW_FL;
1734 return iflags;
1735 }
1736
1737 /*
1738- * Update inode->i_flags based on the btrfs internal flags.
1739+ * Update inode->i_(v)flags based on the btrfs internal flags.
1740 */
1741 void btrfs_update_iflags(struct inode *inode)
1742 {
1743 struct btrfs_inode *ip = BTRFS_I(inode);
bb20add7 1744 unsigned int new_fl = 0;
d4263eb0
JR
1745
1746- if (ip->flags & BTRFS_INODE_SYNC)
bb20add7 1747- new_fl |= S_SYNC;
d4263eb0 1748 if (ip->flags & BTRFS_INODE_IMMUTABLE)
bb20add7 1749 new_fl |= S_IMMUTABLE;
d4263eb0 1750+ if (ip->flags & BTRFS_INODE_IXUNLINK)
bb20add7 1751+ new_fl |= S_IXUNLINK;
d4263eb0
JR
1752+
1753+ if (ip->flags & BTRFS_INODE_SYNC)
bb20add7 1754+ new_fl |= S_SYNC;
d4263eb0 1755 if (ip->flags & BTRFS_INODE_APPEND)
bb20add7 1756 new_fl |= S_APPEND;
d4263eb0 1757 if (ip->flags & BTRFS_INODE_NOATIME)
bb20add7 1758 new_fl |= S_NOATIME;
d4263eb0 1759 if (ip->flags & BTRFS_INODE_DIRSYNC)
bb20add7
AM
1760 new_fl |= S_DIRSYNC;
1761-
1762 set_mask_bits(&inode->i_flags,
1763- S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
1764+ S_SYNC | S_APPEND | S_IMMUTABLE | S_IXUNLINK | S_NOATIME | S_DIRSYNC,
1765 new_fl);
d4263eb0 1766+
bb20add7 1767+ new_fl = 0;
d4263eb0 1768+ if (ip->flags & BTRFS_INODE_BARRIER)
bb20add7 1769+ new_fl |= V_BARRIER;
d4263eb0 1770+ if (ip->flags & BTRFS_INODE_COW)
bb20add7 1771+ new_fl |= V_COW;
d4263eb0 1772+
bb20add7
AM
1773+ set_mask_bits(&inode->i_vflags,
1774+ V_BARRIER | V_COW, new_fl);
1775 }
1776
1777 /*
d4263eb0
JR
1778+ * Update btrfs internal flags from inode->i_(v)flags.
1779+ */
1780+void btrfs_update_flags(struct inode *inode)
1781+{
1782+ struct btrfs_inode *ip = BTRFS_I(inode);
1783+
1784+ unsigned int flags = inode->i_flags;
1785+ unsigned int vflags = inode->i_vflags;
1786+
1787+ ip->flags &= ~(BTRFS_INODE_SYNC | BTRFS_INODE_APPEND |
1788+ BTRFS_INODE_IMMUTABLE | BTRFS_INODE_IXUNLINK |
1789+ BTRFS_INODE_NOATIME | BTRFS_INODE_DIRSYNC |
1790+ BTRFS_INODE_BARRIER | BTRFS_INODE_COW);
1791+
1792+ if (flags & S_IMMUTABLE)
1793+ ip->flags |= BTRFS_INODE_IMMUTABLE;
1794+ if (flags & S_IXUNLINK)
1795+ ip->flags |= BTRFS_INODE_IXUNLINK;
1796+
1797+ if (flags & S_SYNC)
1798+ ip->flags |= BTRFS_INODE_SYNC;
1799+ if (flags & S_APPEND)
1800+ ip->flags |= BTRFS_INODE_APPEND;
1801+ if (flags & S_NOATIME)
1802+ ip->flags |= BTRFS_INODE_NOATIME;
1803+ if (flags & S_DIRSYNC)
1804+ ip->flags |= BTRFS_INODE_DIRSYNC;
1805+
1806+ if (vflags & V_BARRIER)
1807+ ip->flags |= BTRFS_INODE_BARRIER;
1808+ if (vflags & V_COW)
1809+ ip->flags |= BTRFS_INODE_COW;
bb20add7
AM
1810+ }
1811+
1812+/*
1813 * Inherit flags from the parent inode.
1814 *
1815 * Currently only the compression flags and the cow flags are inherited.
c2806d43 1816@@ -169,6 +222,7 @@ void btrfs_inherit_iflags(struct inode *
f6c5ef8b 1817 return;
d4263eb0 1818
f6c5ef8b
AM
1819 flags = BTRFS_I(dir)->flags;
1820+ flags &= ~BTRFS_INODE_BARRIER;
d4263eb0 1821
f6c5ef8b
AM
1822 if (flags & BTRFS_INODE_NOCOMPRESS) {
1823 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
c2806d43 1824@@ -187,6 +241,30 @@ void btrfs_inherit_iflags(struct inode *
d4263eb0
JR
1825 btrfs_update_iflags(inode);
1826 }
1827
1828+int btrfs_sync_flags(struct inode *inode, int flags, int vflags)
1829+{
1830+ struct btrfs_inode *ip = BTRFS_I(inode);
1831+ struct btrfs_root *root = ip->root;
1832+ struct btrfs_trans_handle *trans;
1833+ int ret;
1834+
763640ca 1835+ trans = btrfs_join_transaction(root);
d4263eb0
JR
1836+ BUG_ON(!trans);
1837+
d4263eb0
JR
1838+ inode->i_flags = flags;
1839+ inode->i_vflags = vflags;
1840+ btrfs_update_flags(inode);
e22b5178
AM
1841+
1842+ ret = btrfs_update_inode(trans, root, inode);
1843+ BUG_ON(ret);
1844+
1845+ btrfs_update_iflags(inode);
d4263eb0
JR
1846+ inode->i_ctime = CURRENT_TIME;
1847+ btrfs_end_transaction(trans, root);
1848+
1849+ return 0;
1850+}
1851+
1852 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
1853 {
b00e13aa 1854 struct btrfs_inode *ip = BTRFS_I(file_inode(file));
c2806d43 1855@@ -249,21 +327,27 @@ static int btrfs_ioctl_setflags(struct f
d4263eb0
JR
1856
1857 flags = btrfs_mask_flags(inode->i_mode, flags);
1858 oldflags = btrfs_flags_to_ioctl(ip->flags);
1859- if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
1860+ if ((flags ^ oldflags) & (FS_APPEND_FL |
1861+ FS_IMMUTABLE_FL | FS_IXUNLINK_FL)) {
1862 if (!capable(CAP_LINUX_IMMUTABLE)) {
1863 ret = -EPERM;
1864 goto out_unlock;
92598135
AM
1865 }
1866 }
d4263eb0
JR
1867
1868- if (flags & FS_SYNC_FL)
1869- ip->flags |= BTRFS_INODE_SYNC;
1870- else
1871- ip->flags &= ~BTRFS_INODE_SYNC;
1872 if (flags & FS_IMMUTABLE_FL)
1873 ip->flags |= BTRFS_INODE_IMMUTABLE;
1874 else
1875 ip->flags &= ~BTRFS_INODE_IMMUTABLE;
1876+ if (flags & FS_IXUNLINK_FL)
1877+ ip->flags |= BTRFS_INODE_IXUNLINK;
1878+ else
1879+ ip->flags &= ~BTRFS_INODE_IXUNLINK;
1880+
1881+ if (flags & FS_SYNC_FL)
1882+ ip->flags |= BTRFS_INODE_SYNC;
1883+ else
1884+ ip->flags &= ~BTRFS_INODE_SYNC;
1885 if (flags & FS_APPEND_FL)
1886 ip->flags |= BTRFS_INODE_APPEND;
1887 else
c2806d43
AM
1888diff -urNp -x '*.orig' linux-4.4/fs/btrfs/super.c linux-4.4/fs/btrfs/super.c
1889--- linux-4.4/fs/btrfs/super.c 2021-02-24 16:56:11.699083579 +0100
1890+++ linux-4.4/fs/btrfs/super.c 2021-02-24 16:56:24.552821711 +0100
927ca606
AM
1891@@ -306,7 +306,7 @@ enum {
1892 #ifdef CONFIG_BTRFS_DEBUG
1893 Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
1894 #endif
db55b927 1895- Opt_err,
f6c5ef8b 1896+ Opt_tag, Opt_notag, Opt_tagid, Opt_err,
e22b5178
AM
1897 };
1898
1899 static match_table_t tokens = {
927ca606
AM
1900@@ -363,6 +363,9 @@ static match_table_t tokens = {
1901 {Opt_fragment_metadata, "fragment=metadata"},
1902 {Opt_fragment_all, "fragment=all"},
1903 #endif
e22b5178
AM
1904+ {Opt_tag, "tag"},
1905+ {Opt_notag, "notag"},
1906+ {Opt_tagid, "tagid=%u"},
1907 {Opt_err, NULL},
1908 };
1909
927ca606
AM
1910@@ -745,6 +748,22 @@ int btrfs_parse_options(struct btrfs_roo
1911 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
1e8b8f9b 1912 break;
927ca606 1913 #endif
e22b5178
AM
1914+#ifndef CONFIG_TAGGING_NONE
1915+ case Opt_tag:
1916+ printk(KERN_INFO "btrfs: use tagging\n");
1917+ btrfs_set_opt(info->mount_opt, TAGGED);
1918+ break;
1919+ case Opt_notag:
1920+ printk(KERN_INFO "btrfs: disabled tagging\n");
1921+ btrfs_clear_opt(info->mount_opt, TAGGED);
1922+ break;
1923+#endif
1924+#ifdef CONFIG_PROPAGATE
1925+ case Opt_tagid:
1926+ /* use args[0] */
1927+ btrfs_set_opt(info->mount_opt, TAGGED);
1928+ break;
1929+#endif
2bf5ad28 1930 case Opt_err:
bb20add7
AM
1931 btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
1932 ret = -EINVAL;
c2806d43 1933@@ -1653,6 +1672,12 @@ static int btrfs_remount(struct super_bl
42bc425c
AM
1934 btrfs_resize_thread_pool(fs_info,
1935 fs_info->thread_pool_size, old_thread_pool_size);
e22b5178
AM
1936
1937+ if (btrfs_test_opt(root, TAGGED) && !(sb->s_flags & MS_TAGGED)) {
1938+ printk("btrfs: %s: tagging not permitted on remount.\n",
1939+ sb->s_id);
1940+ return -EINVAL;
1941+ }
1942+
1943 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
b00e13aa 1944 goto out;
e22b5178 1945
c2806d43
AM
1946diff -urNp -x '*.orig' linux-4.4/fs/char_dev.c linux-4.4/fs/char_dev.c
1947--- linux-4.4/fs/char_dev.c 2021-02-24 16:56:11.709083894 +0100
1948+++ linux-4.4/fs/char_dev.c 2021-02-24 16:56:24.552821711 +0100
4744a4b1 1949@@ -21,6 +21,8 @@
2380c486
JR
1950 #include <linux/mutex.h>
1951 #include <linux/backing-dev.h>
7942c842 1952 #include <linux/tty.h>
2380c486
JR
1953+#include <linux/vs_context.h>
1954+#include <linux/vs_device.h>
1955
ec22aa5c
AM
1956 #include "internal.h"
1957
3cc86a71 1958@@ -356,14 +358,21 @@ static int chrdev_open(struct inode *ino
2380c486
JR
1959 struct cdev *p;
1960 struct cdev *new = NULL;
1961 int ret = 0;
1962+ dev_t mdev;
1963+
1964+ if (!vs_map_chrdev(inode->i_rdev, &mdev, DATTR_OPEN))
1965+ return -EPERM;
1966+ inode->i_mdev = mdev;
1967
1968 spin_lock(&cdev_lock);
1969 p = inode->i_cdev;
1970 if (!p) {
1971 struct kobject *kobj;
1972 int idx;
1973+
1974 spin_unlock(&cdev_lock);
1975- kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
1976+
1977+ kobj = kobj_lookup(cdev_map, mdev, &idx);
1978 if (!kobj)
1979 return -ENXIO;
1980 new = container_of(kobj, struct cdev, kobj);
c2806d43
AM
1981diff -urNp -x '*.orig' linux-4.4/fs/dcache.c linux-4.4/fs/dcache.c
1982--- linux-4.4/fs/dcache.c 2021-02-24 16:56:11.722417647 +0100
1983+++ linux-4.4/fs/dcache.c 2021-02-24 16:56:24.552821711 +0100
927ca606 1984@@ -39,6 +39,7 @@
f6c5ef8b 1985 #include <linux/ratelimit.h>
c2e5f7c8 1986 #include <linux/list_lru.h>
927ca606 1987 #include <linux/kasan.h>
d337f35e 1988+#include <linux/vs_limit.h>
927ca606 1989
d337f35e 1990 #include "internal.h"
db55b927 1991 #include "mount.h"
8931d859 1992@@ -683,6 +684,7 @@ static inline bool fast_dput(struct dent
927ca606
AM
1993 spin_lock(&dentry->d_lock);
1994 if (dentry->d_lockref.count > 1) {
1995 dentry->d_lockref.count--;
1996+ vx_dentry_dec(dentry);
1997 spin_unlock(&dentry->d_lock);
1998 return 1;
1999 }
8931d859 2000@@ -812,6 +814,7 @@ repeat:
927ca606
AM
2001 dentry_lru_add(dentry);
2002
2003 dentry->d_lockref.count--;
2004+ vx_dentry_dec(dentry);
2005 spin_unlock(&dentry->d_lock);
2006 return;
d337f35e 2007
8931d859 2008@@ -829,6 +832,7 @@ EXPORT_SYMBOL(dput);
d33d7b00 2009 static inline void __dget_dlock(struct dentry *dentry)
2380c486 2010 {
c2e5f7c8 2011 dentry->d_lockref.count++;
2380c486 2012+ vx_dentry_inc(dentry);
d337f35e 2013 }
2380c486 2014
d33d7b00 2015 static inline void __dget(struct dentry *dentry)
8931d859 2016@@ -841,6 +845,8 @@ struct dentry *dget_parent(struct dentry
bb20add7
AM
2017 int gotref;
2018 struct dentry *ret;
2019
2020+ vx_dentry_dec(dentry);
2021+
2022 /*
2023 * Do optimistic parent lookup without any
2024 * locking.
8931d859 2025@@ -871,6 +877,7 @@ repeat:
927ca606
AM
2026 rcu_read_unlock();
2027 BUG_ON(!ret->d_lockref.count);
2028 ret->d_lockref.count++;
2029+ vx_dentry_inc(ret);
2030 spin_unlock(&ret->d_lock);
2031 return ret;
2032 }
8931d859 2033@@ -1025,6 +1032,7 @@ static void shrink_dentry_list(struct li
927ca606
AM
2034 parent = lock_parent(dentry);
2035 if (dentry->d_lockref.count != 1) {
2036 dentry->d_lockref.count--;
2037+ vx_dentry_dec(dentry);
2038 spin_unlock(&dentry->d_lock);
2039 if (parent)
2040 spin_unlock(&parent->d_lock);
3cc86a71 2041@@ -1581,6 +1589,9 @@ struct dentry *__d_alloc(struct super_bl
d337f35e
JR
2042 struct dentry *dentry;
2043 char *dname;
2044
2045+ if (!vx_dentry_avail(1))
2046+ return NULL;
2047+
2380c486 2048 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
d337f35e
JR
2049 if (!dentry)
2050 return NULL;
3cc86a71 2051@@ -1619,6 +1630,7 @@ struct dentry *__d_alloc(struct super_bl
d337f35e 2052
c2e5f7c8 2053 dentry->d_lockref.count = 1;
763640ca 2054 dentry->d_flags = 0;
ab30d09f 2055+ vx_dentry_inc(dentry);
ab30d09f 2056 spin_lock_init(&dentry->d_lock);
d33d7b00 2057 seqcount_init(&dentry->d_seq);
763640ca 2058 dentry->d_inode = NULL;
3cc86a71 2059@@ -2379,6 +2391,7 @@ struct dentry *__d_lookup(const struct d
d337f35e 2060 }
2380c486 2061
c2e5f7c8 2062 dentry->d_lockref.count++;
2380c486
JR
2063+ vx_dentry_inc(dentry);
2064 found = dentry;
d337f35e 2065 spin_unlock(&dentry->d_lock);
2380c486 2066 break;
3cc86a71 2067@@ -3395,6 +3408,7 @@ static enum d_walk_ret d_genocide_kill(v
927ca606
AM
2068 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2069 dentry->d_flags |= DCACHE_GENOCIDE;
2070 dentry->d_lockref.count--;
2071+ vx_dentry_dec(dentry);
2072 }
2073 }
2074 return D_WALK_CONTINUE;
c2806d43
AM
2075diff -urNp -x '*.orig' linux-4.4/fs/devpts/inode.c linux-4.4/fs/devpts/inode.c
2076--- linux-4.4/fs/devpts/inode.c 2021-02-24 16:56:11.722417647 +0100
2077+++ linux-4.4/fs/devpts/inode.c 2021-02-24 16:56:24.552821711 +0100
bb20add7 2078@@ -27,6 +27,7 @@
d337f35e 2079 #include <linux/parser.h>
2380c486
JR
2080 #include <linux/fsnotify.h>
2081 #include <linux/seq_file.h>
d337f35e
JR
2082+#include <linux/vs_base.h>
2083
2380c486 2084 #define DEVPTS_DEFAULT_MODE 0600
ec22aa5c 2085 /*
bb20add7 2086@@ -38,6 +39,21 @@
ec22aa5c
AM
2087 #define DEVPTS_DEFAULT_PTMX_MODE 0000
2088 #define PTMX_MINOR 2
2380c486 2089
a168f21d 2090+static int devpts_permission(struct inode *inode, int mask)
d337f35e
JR
2091+{
2092+ int ret = -EACCES;
2093+
2094+ /* devpts is xid tagged */
61333608 2095+ if (vx_check((vxid_t)i_tag_read(inode), VS_WATCH_P | VS_IDENT))
a168f21d 2096+ ret = generic_permission(inode, mask);
d337f35e
JR
2097+ return ret;
2098+}
2099+
2100+static struct inode_operations devpts_file_inode_operations = {
2101+ .permission = devpts_permission,
2102+};
2380c486 2103+
1e8b8f9b
AM
2104+
2105 /*
2106 * sysctl support for setting limits on the number of Unix98 ptys allocated.
2107 * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
927ca606 2108@@ -353,6 +369,34 @@ static int devpts_show_options(struct se
d337f35e
JR
2109 return 0;
2110 }
2111
2112+static int devpts_filter(struct dentry *de)
2113+{
61333608 2114+ vxid_t xid = 0;
b3b0d4fd 2115+
d337f35e 2116+ /* devpts is xid tagged */
b3b0d4fd 2117+ if (de && de->d_inode)
61333608 2118+ xid = (vxid_t)i_tag_read(de->d_inode);
b3b0d4fd
AM
2119+#ifdef CONFIG_VSERVER_WARN_DEVPTS
2120+ else
2121+ vxwprintk_task(1, "devpts " VS_Q("%.*s") " without inode.",
2122+ de->d_name.len, de->d_name.name);
2123+#endif
2124+ return vx_check(xid, VS_WATCH_P | VS_IDENT);
d337f35e
JR
2125+}
2126+
c2e5f7c8 2127+static int devpts_readdir(struct file * filp, struct dir_context *ctx)
d337f35e 2128+{
c2e5f7c8 2129+ return dcache_readdir_filter(filp, ctx, devpts_filter);
d337f35e
JR
2130+}
2131+
2132+static struct file_operations devpts_dir_operations = {
2133+ .open = dcache_dir_open,
2134+ .release = dcache_dir_close,
2135+ .llseek = dcache_dir_lseek,
2136+ .read = generic_read_dir,
c2e5f7c8 2137+ .iterate = devpts_readdir,
d337f35e
JR
2138+};
2139+
2380c486 2140 static const struct super_operations devpts_sops = {
d337f35e
JR
2141 .statfs = simple_statfs,
2142 .remount_fs = devpts_remount,
927ca606 2143@@ -397,8 +441,10 @@ devpts_fill_super(struct super_block *s,
ec22aa5c 2144 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
d337f35e
JR
2145 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
2146 inode->i_op = &simple_dir_inode_operations;
2147- inode->i_fop = &simple_dir_operations;
2148+ inode->i_fop = &devpts_dir_operations;
f6c5ef8b 2149 set_nlink(inode, 2);
d337f35e 2150+ /* devpts is xid tagged */
61333608 2151+ i_tag_write(inode, (vtag_t)vx_current_xid());
d337f35e 2152
1e8b8f9b 2153 s->s_root = d_make_root(inode);
d337f35e 2154 if (s->s_root)
927ca606 2155@@ -630,6 +676,9 @@ struct inode *devpts_pty_new(struct pts_
ec22aa5c 2156 inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
d337f35e 2157 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
ec22aa5c 2158 init_special_inode(inode, S_IFCHR|opts->mode, device);
d337f35e 2159+ /* devpts is xid tagged */
61333608 2160+ i_tag_write(inode, (vtag_t)vx_current_xid());
d337f35e 2161+ inode->i_op = &devpts_file_inode_operations;
b00e13aa 2162 inode->i_private = priv;
d337f35e 2163
b00e13aa 2164 sprintf(s, "%d", index);
c2806d43
AM
2165diff -urNp -x '*.orig' linux-4.4/fs/ext2/balloc.c linux-4.4/fs/ext2/balloc.c
2166--- linux-4.4/fs/ext2/balloc.c 2016-01-11 00:01:32.000000000 +0100
2167+++ linux-4.4/fs/ext2/balloc.c 2021-02-24 16:56:24.552821711 +0100
b00e13aa 2168@@ -693,7 +693,6 @@ ext2_try_to_allocate(struct super_block
2380c486
JR
2169 start = 0;
2170 end = EXT2_BLOCKS_PER_GROUP(sb);
d337f35e 2171 }
2380c486
JR
2172-
2173 BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
2174
2175 repeat:
c2806d43
AM
2176diff -urNp -x '*.orig' linux-4.4/fs/ext2/ext2.h linux-4.4/fs/ext2/ext2.h
2177--- linux-4.4/fs/ext2/ext2.h 2016-01-11 00:01:32.000000000 +0100
2178+++ linux-4.4/fs/ext2/ext2.h 2021-02-24 16:56:24.552821711 +0100
1e8b8f9b
AM
2179@@ -244,8 +244,12 @@ struct ext2_group_desc
2180 #define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
2181 #define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
2182 #define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
2183+#define EXT2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
2184 #define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
2185
2186+#define EXT2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
2187+#define EXT2_COW_FL FS_COW_FL /* Copy on Write marker */
2188+
2189 #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
2190 #define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
2191
2192@@ -329,7 +333,8 @@ struct ext2_inode {
2193 __u16 i_pad1;
2194 __le16 l_i_uid_high; /* these 2 fields */
2195 __le16 l_i_gid_high; /* were reserved2[0] */
2196- __u32 l_i_reserved2;
2197+ __le16 l_i_tag; /* Context Tag */
2198+ __u16 l_i_reserved2;
2199 } linux2;
2200 struct {
2201 __u8 h_i_frag; /* Fragment number */
2202@@ -357,6 +362,7 @@ struct ext2_inode {
2203 #define i_gid_low i_gid
2204 #define i_uid_high osd2.linux2.l_i_uid_high
2205 #define i_gid_high osd2.linux2.l_i_gid_high
2206+#define i_raw_tag osd2.linux2.l_i_tag
2207 #define i_reserved2 osd2.linux2.l_i_reserved2
2208
2209 /*
927ca606
AM
2210@@ -389,6 +395,7 @@ struct ext2_inode {
2211 #else
2212 #define EXT2_MOUNT_DAX 0
2213 #endif
2214+#define EXT2_MOUNT_TAGGED 0x200000 /* Enable Context Tags */
1e8b8f9b
AM
2215
2216
2217 #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
927ca606 2218@@ -776,6 +783,7 @@ extern void ext2_set_inode_flags(struct
93de0823
AM
2219 extern void ext2_get_inode_flags(struct ext2_inode_info *);
2220 extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2221 u64 start, u64 len);
d4263eb0
JR
2222+extern int ext2_sync_flags(struct inode *, int, int);
2223
2224 /* ioctl.c */
2225 extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
c2806d43
AM
2226diff -urNp -x '*.orig' linux-4.4/fs/ext2/file.c linux-4.4/fs/ext2/file.c
2227--- linux-4.4/fs/ext2/file.c 2016-01-11 00:01:32.000000000 +0100
2228+++ linux-4.4/fs/ext2/file.c 2021-02-24 16:56:24.552821711 +0100
927ca606 2229@@ -202,4 +202,5 @@ const struct inode_operations ext2_file_
a168f21d 2230 .get_acl = ext2_get_acl,
bb20add7 2231 .set_acl = ext2_set_acl,
ec22aa5c 2232 .fiemap = ext2_fiemap,
d337f35e
JR
2233+ .sync_flags = ext2_sync_flags,
2234 };
c2806d43
AM
2235diff -urNp -x '*.orig' linux-4.4/fs/ext2/ialloc.c linux-4.4/fs/ext2/ialloc.c
2236--- linux-4.4/fs/ext2/ialloc.c 2021-02-24 16:56:11.729084524 +0100
2237+++ linux-4.4/fs/ext2/ialloc.c 2021-02-24 16:56:24.556155150 +0100
e22b5178
AM
2238@@ -17,6 +17,7 @@
2239 #include <linux/backing-dev.h>
2240 #include <linux/buffer_head.h>
2241 #include <linux/random.h>
2242+#include <linux/vs_tag.h>
2243 #include "ext2.h"
2244 #include "xattr.h"
2245 #include "acl.h"
c2806d43 2246@@ -547,6 +548,7 @@ got:
76514441
AM
2247 inode->i_mode = mode;
2248 inode->i_uid = current_fsuid();
2249 inode->i_gid = dir->i_gid;
a4a22af8 2250+ i_tag_write(inode, dx_current_fstag(sb));
e22b5178 2251 } else
76514441 2252 inode_init_owner(inode, dir, mode);
e22b5178 2253
c2806d43
AM
2254diff -urNp -x '*.orig' linux-4.4/fs/ext2/inode.c linux-4.4/fs/ext2/inode.c
2255--- linux-4.4/fs/ext2/inode.c 2021-02-24 16:56:11.729084524 +0100
2256+++ linux-4.4/fs/ext2/inode.c 2021-02-24 16:56:24.556155150 +0100
927ca606 2257@@ -33,6 +33,7 @@
ec22aa5c
AM
2258 #include <linux/fiemap.h>
2259 #include <linux/namei.h>
927ca606 2260 #include <linux/uio.h>
d337f35e
JR
2261+#include <linux/vs_tag.h>
2262 #include "ext2.h"
2263 #include "acl.h"
927ca606 2264 #include "xattr.h"
8931d859 2265@@ -1274,39 +1275,62 @@ void ext2_set_inode_flags(struct inode *
d337f35e
JR
2266 {
2267 unsigned int flags = EXT2_I(inode)->i_flags;
2268
927ca606
AM
2269- inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
2270- S_DIRSYNC | S_DAX);
2271+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK | S_DAX |
d337f35e
JR
2272+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2273+
2274+ if (flags & EXT2_IMMUTABLE_FL)
2275+ inode->i_flags |= S_IMMUTABLE;
2380c486
JR
2276+ if (flags & EXT2_IXUNLINK_FL)
2277+ inode->i_flags |= S_IXUNLINK;
d337f35e
JR
2278+
2279 if (flags & EXT2_SYNC_FL)
2280 inode->i_flags |= S_SYNC;
2281 if (flags & EXT2_APPEND_FL)
2282 inode->i_flags |= S_APPEND;
2283- if (flags & EXT2_IMMUTABLE_FL)
2284- inode->i_flags |= S_IMMUTABLE;
2285 if (flags & EXT2_NOATIME_FL)
2286 inode->i_flags |= S_NOATIME;
2287 if (flags & EXT2_DIRSYNC_FL)
2288 inode->i_flags |= S_DIRSYNC;
927ca606
AM
2289 if (test_opt(inode->i_sb, DAX))
2290 inode->i_flags |= S_DAX;
2380c486
JR
2291+
2292+ inode->i_vflags &= ~(V_BARRIER | V_COW);
2293+
2294+ if (flags & EXT2_BARRIER_FL)
2295+ inode->i_vflags |= V_BARRIER;
2296+ if (flags & EXT2_COW_FL)
2297+ inode->i_vflags |= V_COW;
2298 }
2299
2300 /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
2301 void ext2_get_inode_flags(struct ext2_inode_info *ei)
2302 {
2303 unsigned int flags = ei->vfs_inode.i_flags;
2304+ unsigned int vflags = ei->vfs_inode.i_vflags;
2305+
2306+ ei->i_flags &= ~(EXT2_SYNC_FL | EXT2_APPEND_FL |
2307+ EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL |
2308+ EXT2_NOATIME_FL | EXT2_DIRSYNC_FL |
2309+ EXT2_BARRIER_FL | EXT2_COW_FL);
2310+
2311+ if (flags & S_IMMUTABLE)
2312+ ei->i_flags |= EXT2_IMMUTABLE_FL;
2313+ if (flags & S_IXUNLINK)
2314+ ei->i_flags |= EXT2_IXUNLINK_FL;
2315
2316- ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
2317- EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
2318 if (flags & S_SYNC)
2319 ei->i_flags |= EXT2_SYNC_FL;
2320 if (flags & S_APPEND)
2321 ei->i_flags |= EXT2_APPEND_FL;
2322- if (flags & S_IMMUTABLE)
2323- ei->i_flags |= EXT2_IMMUTABLE_FL;
2324 if (flags & S_NOATIME)
2325 ei->i_flags |= EXT2_NOATIME_FL;
2326 if (flags & S_DIRSYNC)
2327 ei->i_flags |= EXT2_DIRSYNC_FL;
2328+
2329+ if (vflags & V_BARRIER)
2330+ ei->i_flags |= EXT2_BARRIER_FL;
2331+ if (vflags & V_COW)
2332+ ei->i_flags |= EXT2_COW_FL;
d337f35e
JR
2333 }
2334
2380c486 2335 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
8931d859 2336@@ -1342,8 +1366,10 @@ struct inode *ext2_iget (struct super_bl
42bc425c
AM
2337 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2338 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
d337f35e 2339 }
42bc425c
AM
2340- i_uid_write(inode, i_uid);
2341- i_gid_write(inode, i_gid);
2342+ i_uid_write(inode, INOTAG_UID(DX_TAG(inode), i_uid, i_gid));
2343+ i_gid_write(inode, INOTAG_GID(DX_TAG(inode), i_uid, i_gid));
537831f9
AM
2344+ i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), i_uid, i_gid,
2345+ le16_to_cpu(raw_inode->i_raw_tag)));
f6c5ef8b 2346 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
d337f35e 2347 inode->i_size = le32_to_cpu(raw_inode->i_size);
2380c486 2348 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
8931d859 2349@@ -1439,8 +1465,10 @@ static int __ext2_write_inode(struct ino
d337f35e
JR
2350 struct ext2_inode_info *ei = EXT2_I(inode);
2351 struct super_block *sb = inode->i_sb;
2352 ino_t ino = inode->i_ino;
42bc425c
AM
2353- uid_t uid = i_uid_read(inode);
2354- gid_t gid = i_gid_read(inode);
a4a22af8
AM
2355+ uid_t uid = from_kuid(&init_user_ns,
2356+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
2357+ gid_t gid = from_kgid(&init_user_ns,
2358+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
d337f35e
JR
2359 struct buffer_head * bh;
2360 struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
2361 int n;
8931d859 2362@@ -1476,6 +1504,9 @@ static int __ext2_write_inode(struct ino
d337f35e
JR
2363 raw_inode->i_uid_high = 0;
2364 raw_inode->i_gid_high = 0;
2365 }
2366+#ifdef CONFIG_TAGGING_INTERN
537831f9 2367+ raw_inode->i_raw_tag = cpu_to_le16(i_tag_read(inode));
d337f35e
JR
2368+#endif
2369 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2370 raw_inode->i_size = cpu_to_le32(inode->i_size);
2371 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
8931d859 2372@@ -1559,7 +1590,8 @@ int ext2_setattr(struct dentry *dentry,
927ca606
AM
2373 return error;
2374 }
42bc425c
AM
2375 if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
2376- (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
2377+ (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) ||
537831f9 2378+ (iattr->ia_valid & ATTR_TAG && !tag_eq(iattr->ia_tag, inode->i_tag))) {
78865d5b 2379 error = dquot_transfer(inode, iattr);
d337f35e
JR
2380 if (error)
2381 return error;
c2806d43
AM
2382diff -urNp -x '*.orig' linux-4.4/fs/ext2/ioctl.c linux-4.4/fs/ext2/ioctl.c
2383--- linux-4.4/fs/ext2/ioctl.c 2016-01-11 00:01:32.000000000 +0100
2384+++ linux-4.4/fs/ext2/ioctl.c 2021-02-24 16:56:24.556155150 +0100
d4263eb0
JR
2385@@ -17,6 +17,16 @@
2386 #include <asm/uaccess.h>
2387
2388
2389+int ext2_sync_flags(struct inode *inode, int flags, int vflags)
2390+{
2391+ inode->i_flags = flags;
2392+ inode->i_vflags = vflags;
2393+ ext2_get_inode_flags(EXT2_I(inode));
2394+ inode->i_ctime = CURRENT_TIME_SEC;
2395+ mark_inode_dirty(inode);
2396+ return 0;
2397+}
2398+
2399 long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2400 {
b00e13aa 2401 struct inode *inode = file_inode(filp);
d4263eb0 2402@@ -51,6 +61,11 @@ long ext2_ioctl(struct file *filp, unsig
d337f35e 2403
ec22aa5c 2404 flags = ext2_mask_flags(inode->i_mode, flags);
d337f35e 2405
2380c486
JR
2406+ if (IS_BARRIER(inode)) {
2407+ vxwprintk_task(1, "messing with the barrier.");
2408+ return -EACCES;
2409+ }
2410+
2411 mutex_lock(&inode->i_mutex);
2412 /* Is it quota file? Do not allow user to mess with it */
2413 if (IS_NOQUOTA(inode)) {
d4263eb0 2414@@ -66,7 +81,9 @@ long ext2_ioctl(struct file *filp, unsig
d337f35e
JR
2415 *
2416 * This test looks nicer. Thanks to Pauline Middelink
2417 */
2418- if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
2419+ if ((oldflags & EXT2_IMMUTABLE_FL) ||
2420+ ((flags ^ oldflags) & (EXT2_APPEND_FL |
2380c486
JR
2421+ EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL))) {
2422 if (!capable(CAP_LINUX_IMMUTABLE)) {
2423 mutex_unlock(&inode->i_mutex);
2424 ret = -EPERM;
d4263eb0
JR
2425@@ -74,7 +91,7 @@ long ext2_ioctl(struct file *filp, unsig
2426 }
2427 }
2428
2429- flags = flags & EXT2_FL_USER_MODIFIABLE;
2430+ flags &= EXT2_FL_USER_MODIFIABLE;
2431 flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
2432 ei->i_flags = flags;
db55b927 2433
c2806d43
AM
2434diff -urNp -x '*.orig' linux-4.4/fs/ext2/namei.c linux-4.4/fs/ext2/namei.c
2435--- linux-4.4/fs/ext2/namei.c 2021-02-24 16:56:11.729084524 +0100
2436+++ linux-4.4/fs/ext2/namei.c 2021-02-24 16:56:24.556155150 +0100
78865d5b 2437@@ -32,6 +32,7 @@
d337f35e
JR
2438
2439 #include <linux/pagemap.h>
78865d5b 2440 #include <linux/quotaops.h>
d337f35e
JR
2441+#include <linux/vs_tag.h>
2442 #include "ext2.h"
2443 #include "xattr.h"
2444 #include "acl.h"
8931d859 2445@@ -71,6 +72,7 @@ static struct dentry *ext2_lookup(struct
a168f21d
AM
2446 (unsigned long) ino);
2447 return ERR_PTR(-EIO);
ec22aa5c 2448 }
a168f21d 2449+ dx_propagate_tag(nd, inode);
d337f35e 2450 }
a168f21d
AM
2451 return d_splice_alias(inode, dentry);
2452 }
8931d859 2453@@ -444,6 +446,7 @@ const struct inode_operations ext2_speci
a168f21d 2454 .removexattr = generic_removexattr,
d337f35e
JR
2455 #endif
2456 .setattr = ext2_setattr,
d337f35e 2457+ .sync_flags = ext2_sync_flags,
a168f21d 2458 .get_acl = ext2_get_acl,
bb20add7 2459 .set_acl = ext2_set_acl,
d337f35e 2460 };
c2806d43
AM
2461diff -urNp -x '*.orig' linux-4.4/fs/ext2/super.c linux-4.4/fs/ext2/super.c
2462--- linux-4.4/fs/ext2/super.c 2021-02-24 16:56:11.729084524 +0100
2463+++ linux-4.4/fs/ext2/super.c 2021-02-24 16:56:24.556155150 +0100
927ca606 2464@@ -408,7 +408,8 @@ enum {
d337f35e
JR
2465 Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
2466 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
927ca606 2467 Opt_acl, Opt_noacl, Opt_xip, Opt_dax, Opt_ignore, Opt_err, Opt_quota,
2380c486
JR
2468- Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
2469+ Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation,
2470+ Opt_tag, Opt_notag, Opt_tagid
d337f35e
JR
2471 };
2472
ec22aa5c 2473 static const match_table_t tokens = {
927ca606 2474@@ -436,6 +437,9 @@ static const match_table_t tokens = {
d337f35e
JR
2475 {Opt_acl, "acl"},
2476 {Opt_noacl, "noacl"},
2477 {Opt_xip, "xip"},
2478+ {Opt_tag, "tag"},
2479+ {Opt_notag, "notag"},
2480+ {Opt_tagid, "tagid=%u"},
927ca606 2481 {Opt_dax, "dax"},
d337f35e
JR
2482 {Opt_grpquota, "grpquota"},
2483 {Opt_ignore, "noquota"},
927ca606 2484@@ -520,6 +524,20 @@ static int parse_options(char *options,
d337f35e
JR
2485 case Opt_nouid32:
2486 set_opt (sbi->s_mount_opt, NO_UID32);
2487 break;
2488+#ifndef CONFIG_TAGGING_NONE
2489+ case Opt_tag:
2490+ set_opt (sbi->s_mount_opt, TAGGED);
2491+ break;
2492+ case Opt_notag:
2493+ clear_opt (sbi->s_mount_opt, TAGGED);
2494+ break;
2495+#endif
2496+#ifdef CONFIG_PROPAGATE
2497+ case Opt_tagid:
2498+ /* use args[0] */
2499+ set_opt (sbi->s_mount_opt, TAGGED);
2500+ break;
2501+#endif
2502 case Opt_nocheck:
2503 clear_opt (sbi->s_mount_opt, CHECK);
2504 break;
3cc86a71 2505@@ -895,6 +913,8 @@ static int ext2_fill_super(struct super_
2bf5ad28 2506 if (!parse_options((char *) data, sb))
d337f35e
JR
2507 goto failed_mount;
2508
2509+ if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
2510+ sb->s_flags |= MS_TAGGED;
2511 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2512 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
2513 MS_POSIXACL : 0);
3cc86a71 2514@@ -1305,6 +1325,14 @@ static int ext2_remount (struct super_bl
537831f9 2515 err = -EINVAL;
d337f35e
JR
2516 goto restore_opts;
2517 }
537831f9 2518+
d337f35e
JR
2519+ if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
2520+ !(sb->s_flags & MS_TAGGED)) {
2521+ printk("EXT2-fs: %s: tagging not permitted on remount.\n",
2522+ sb->s_id);
d4263eb0
JR
2523+ err = -EINVAL;
2524+ goto restore_opts;
d337f35e 2525+ }
537831f9 2526
d337f35e
JR
2527 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2528 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
c2806d43
AM
2529diff -urNp -x '*.orig' linux-4.4/fs/ext4/ext4.h linux-4.4/fs/ext4/ext4.h
2530--- linux-4.4/fs/ext4/ext4.h 2021-02-24 16:56:11.732417962 +0100
2531+++ linux-4.4/fs/ext4/ext4.h 2021-02-24 16:56:24.556155150 +0100
927ca606 2532@@ -375,8 +375,11 @@ struct flex_groups {
2380c486 2533 #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
78865d5b
AM
2534 #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
2535 #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
b00e13aa 2536+#define EXT4_BARRIER_FL 0x04000000 /* Barrier for chroot() */
2380c486 2537+#define EXT4_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
b00e13aa 2538 #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
927ca606
AM
2539 #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
2540+#define EXT4_COW_FL 0x40000000 /* Copy on Write marker */
2380c486 2541 #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
d337f35e 2542
78865d5b 2543 #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
927ca606 2544@@ -674,7 +677,7 @@ struct ext4_inode {
ec22aa5c
AM
2545 __le16 l_i_uid_high; /* these 2 fields */
2546 __le16 l_i_gid_high; /* were reserved2[0] */
42bc425c
AM
2547 __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
2548- __le16 l_i_reserved;
ec22aa5c 2549+ __le16 l_i_tag; /* Context Tag */
ec22aa5c
AM
2550 } linux2;
2551 struct {
2552 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
927ca606 2553@@ -831,6 +834,7 @@ do { \
ec22aa5c
AM
2554 #define i_gid_low i_gid
2555 #define i_uid_high osd2.linux2.l_i_uid_high
2556 #define i_gid_high osd2.linux2.l_i_gid_high
2557+#define i_raw_tag osd2.linux2.l_i_tag
42bc425c 2558 #define i_checksum_lo osd2.linux2.l_i_checksum_lo
d337f35e 2559
ec22aa5c 2560 #elif defined(__GNU__)
927ca606 2561@@ -1068,6 +1072,7 @@ struct ext4_inode_info {
ab30d09f
AM
2562 #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
2563 #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
2564 #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
2565+#define EXT4_MOUNT_TAGGED 0x40000 /* Enable Context Tags */
2566 #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
2567 #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
2568 #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
c2806d43 2569@@ -2528,6 +2533,7 @@ extern int ext4_punch_hole(struct inode
927ca606
AM
2570 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
2571 extern void ext4_set_inode_flags(struct inode *);
2572 extern void ext4_get_inode_flags(struct ext4_inode_info *);
d4263eb0 2573+extern int ext4_sync_flags(struct inode *, int, int);
927ca606
AM
2574 extern int ext4_alloc_da_blocks(struct inode *inode);
2575 extern void ext4_set_aops(struct inode *inode);
2576 extern int ext4_writepage_trans_blocks(struct inode *);
c2806d43
AM
2577diff -urNp -x '*.orig' linux-4.4/fs/ext4/file.c linux-4.4/fs/ext4/file.c
2578--- linux-4.4/fs/ext4/file.c 2021-02-24 16:56:11.735751400 +0100
2579+++ linux-4.4/fs/ext4/file.c 2021-02-24 16:56:24.556155150 +0100
927ca606 2580@@ -749,5 +749,6 @@ const struct inode_operations ext4_file_
a168f21d 2581 .get_acl = ext4_get_acl,
bb20add7 2582 .set_acl = ext4_set_acl,
ec22aa5c 2583 .fiemap = ext4_fiemap,
d337f35e
JR
2584+ .sync_flags = ext4_sync_flags,
2585 };
2586
c2806d43
AM
2587diff -urNp -x '*.orig' linux-4.4/fs/ext4/ialloc.c linux-4.4/fs/ext4/ialloc.c
2588--- linux-4.4/fs/ext4/ialloc.c 2021-02-24 16:56:11.735751400 +0100
2589+++ linux-4.4/fs/ext4/ialloc.c 2021-02-24 16:56:24.556155150 +0100
927ca606 2590@@ -21,6 +21,7 @@
e22b5178
AM
2591 #include <linux/random.h>
2592 #include <linux/bitops.h>
2593 #include <linux/blkdev.h>
2594+#include <linux/vs_tag.h>
2595 #include <asm/byteorder.h>
2596
2597 #include "ext4.h"
48cb6a3c 2598@@ -780,6 +781,7 @@ struct inode *__ext4_new_inode(handle_t
76514441
AM
2599 inode->i_mode = mode;
2600 inode->i_uid = current_fsuid();
2601 inode->i_gid = dir->i_gid;
a4a22af8 2602+ i_tag_write(inode, dx_current_fstag(sb));
e22b5178 2603 } else
76514441 2604 inode_init_owner(inode, dir, mode);
927ca606 2605 err = dquot_initialize(inode);
c2806d43
AM
2606diff -urNp -x '*.orig' linux-4.4/fs/ext4/inode.c linux-4.4/fs/ext4/inode.c
2607--- linux-4.4/fs/ext4/inode.c 2021-02-24 16:56:11.739084838 +0100
2608+++ linux-4.4/fs/ext4/inode.c 2021-02-24 16:56:24.556155150 +0100
927ca606
AM
2609@@ -37,6 +37,7 @@
2610 #include <linux/printk.h>
2611 #include <linux/slab.h>
52afa9bd 2612 #include <linux/bitops.h>
d337f35e 2613+#include <linux/vs_tag.h>
ec22aa5c 2614
2380c486 2615 #include "ext4_jbd2.h"
d337f35e 2616 #include "xattr.h"
c2806d43 2617@@ -4149,12 +4150,15 @@ void ext4_set_inode_flags(struct inode *
d337f35e 2618 unsigned int flags = EXT4_I(inode)->i_flags;
52afa9bd 2619 unsigned int new_fl = 0;
978063ce 2620
d337f35e 2621+ if (flags & EXT4_IMMUTABLE_FL)
52afa9bd 2622+ new_fl |= S_IMMUTABLE;
2380c486 2623+ if (flags & EXT4_IXUNLINK_FL)
52afa9bd 2624+ new_fl |= S_IXUNLINK;
978063ce 2625+
d337f35e 2626 if (flags & EXT4_SYNC_FL)
52afa9bd 2627 new_fl |= S_SYNC;
d337f35e 2628 if (flags & EXT4_APPEND_FL)
52afa9bd 2629 new_fl |= S_APPEND;
d337f35e 2630- if (flags & EXT4_IMMUTABLE_FL)
52afa9bd 2631- new_fl |= S_IMMUTABLE;
d337f35e 2632 if (flags & EXT4_NOATIME_FL)
52afa9bd 2633 new_fl |= S_NOATIME;
d337f35e 2634 if (flags & EXT4_DIRSYNC_FL)
c2806d43 2635@@ -4162,31 +4166,52 @@ void ext4_set_inode_flags(struct inode *
927ca606
AM
2636 if (test_opt(inode->i_sb, DAX))
2637 new_fl |= S_DAX;
ca5d134c 2638 inode_set_flags(inode, new_fl,
927ca606
AM
2639- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
2640+ S_IXUNLINK | S_IMMUTABLE | S_DAX |
ca5d134c 2641+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2380c486 2642+
978063ce 2643+ new_fl = 0;
2380c486 2644+ if (flags & EXT4_BARRIER_FL)
978063ce 2645+ new_fl |= V_BARRIER;
2380c486 2646+ if (flags & EXT4_COW_FL)
978063ce
JR
2647+ new_fl |= V_COW;
2648+
2649+ set_mask_bits(&inode->i_vflags,
2650+ V_BARRIER | V_COW, new_fl);
d337f35e
JR
2651 }
2652
2380c486
JR
2653 /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
2654 void ext4_get_inode_flags(struct ext4_inode_info *ei)
2655 {
76514441
AM
2656- unsigned int vfs_fl;
2657+ unsigned int vfs_fl, vfs_vf;
2658 unsigned long old_fl, new_fl;
2380c486 2659
76514441
AM
2660 do {
2661 vfs_fl = ei->vfs_inode.i_flags;
2662+ vfs_vf = ei->vfs_inode.i_vflags;
2663 old_fl = ei->i_flags;
2664 new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
2665 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
2666- EXT4_DIRSYNC_FL);
2667+ EXT4_DIRSYNC_FL|EXT4_BARRIER_FL|
2668+ EXT4_COW_FL);
2669+
2670+ if (vfs_fl & S_IMMUTABLE)
2671+ new_fl |= EXT4_IMMUTABLE_FL;
2672+ if (vfs_fl & S_IXUNLINK)
2673+ new_fl |= EXT4_IXUNLINK_FL;
2674+
2675 if (vfs_fl & S_SYNC)
2676 new_fl |= EXT4_SYNC_FL;
2677 if (vfs_fl & S_APPEND)
2678 new_fl |= EXT4_APPEND_FL;
2679- if (vfs_fl & S_IMMUTABLE)
2680- new_fl |= EXT4_IMMUTABLE_FL;
2681 if (vfs_fl & S_NOATIME)
2682 new_fl |= EXT4_NOATIME_FL;
2683 if (vfs_fl & S_DIRSYNC)
2684 new_fl |= EXT4_DIRSYNC_FL;
2685+
2686+ if (vfs_vf & V_BARRIER)
2687+ new_fl |= EXT4_BARRIER_FL;
2688+ if (vfs_vf & V_COW)
2689+ new_fl |= EXT4_COW_FL;
2690 } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
ec22aa5c
AM
2691 }
2692
c2806d43 2693@@ -4318,8 +4343,10 @@ struct inode *__ext4_iget(struct super_b
42bc425c
AM
2694 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2695 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
d337f35e 2696 }
42bc425c
AM
2697- i_uid_write(inode, i_uid);
2698- i_gid_write(inode, i_gid);
2699+ i_uid_write(inode, INOTAG_UID(DX_TAG(inode), i_uid, i_gid));
2700+ i_gid_write(inode, INOTAG_GID(DX_TAG(inode), i_uid, i_gid));
537831f9
AM
2701+ i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), i_uid, i_gid,
2702+ le16_to_cpu(raw_inode->i_raw_tag)));
f6c5ef8b 2703 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2380c486 2704
d33d7b00 2705 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
c2806d43 2706@@ -4641,8 +4668,10 @@ static int ext4_do_update_inode(handle_t
d337f35e 2707
2380c486 2708 ext4_get_inode_flags(ei);
d337f35e 2709 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
42bc425c
AM
2710- i_uid = i_uid_read(inode);
2711- i_gid = i_gid_read(inode);
a4a22af8
AM
2712+ i_uid = from_kuid(&init_user_ns,
2713+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
2714+ i_gid = from_kgid(&init_user_ns,
2715+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
ec22aa5c 2716 if (!(test_opt(inode->i_sb, NO_UID32))) {
42bc425c
AM
2717 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
2718 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
c2806d43 2719@@ -4665,6 +4694,9 @@ static int ext4_do_update_inode(handle_t
d337f35e
JR
2720 raw_inode->i_uid_high = 0;
2721 raw_inode->i_gid_high = 0;
2722 }
2723+#ifdef CONFIG_TAGGING_INTERN
537831f9 2724+ raw_inode->i_raw_tag = cpu_to_le16(i_tag_read(inode));
d337f35e
JR
2725+#endif
2726 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2380c486
JR
2727
2728 EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
c2806d43 2729@@ -4910,7 +4942,8 @@ int ext4_setattr(struct dentry *dentry,
927ca606
AM
2730 return error;
2731 }
42bc425c
AM
2732 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
2733- (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
2734+ (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid)) ||
537831f9 2735+ (ia_valid & ATTR_TAG && !tag_eq(attr->ia_tag, inode->i_tag))) {
d337f35e
JR
2736 handle_t *handle;
2737
2738 /* (user+group)*(old+new) structure, inode write (sb,
c2806d43 2739@@ -4933,6 +4966,8 @@ int ext4_setattr(struct dentry *dentry,
d337f35e
JR
2740 inode->i_uid = attr->ia_uid;
2741 if (attr->ia_valid & ATTR_GID)
2742 inode->i_gid = attr->ia_gid;
2743+ if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
2744+ inode->i_tag = attr->ia_tag;
2745 error = ext4_mark_inode_dirty(handle, inode);
2746 ext4_journal_stop(handle);
2747 }
c2806d43
AM
2748diff -urNp -x '*.orig' linux-4.4/fs/ext4/ioctl.c linux-4.4/fs/ext4/ioctl.c
2749--- linux-4.4/fs/ext4/ioctl.c 2021-02-24 16:56:11.739084838 +0100
2750+++ linux-4.4/fs/ext4/ioctl.c 2021-02-24 16:56:24.559488588 +0100
09be7631 2751@@ -14,6 +14,7 @@
2380c486 2752 #include <linux/mount.h>
ec22aa5c 2753 #include <linux/file.h>
927ca606 2754 #include <linux/random.h>
d337f35e
JR
2755+#include <linux/vs_tag.h>
2756 #include <asm/uaccess.h>
2380c486
JR
2757 #include "ext4_jbd2.h"
2758 #include "ext4.h"
927ca606
AM
2759@@ -202,6 +203,33 @@ static int uuid_is_zero(__u8 u[16])
2760 return 1;
09be7631 2761 }
db55b927 2762
d4263eb0
JR
2763+int ext4_sync_flags(struct inode *inode, int flags, int vflags)
2764+{
2765+ handle_t *handle = NULL;
2766+ struct ext4_iloc iloc;
2767+ int err;
2768+
b00e13aa 2769+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
d4263eb0
JR
2770+ if (IS_ERR(handle))
2771+ return PTR_ERR(handle);
2772+
2773+ if (IS_SYNC(inode))
2774+ ext4_handle_sync(handle);
2775+ err = ext4_reserve_inode_write(handle, inode, &iloc);
2776+ if (err)
2777+ goto flags_err;
2778+
2779+ inode->i_flags = flags;
2780+ inode->i_vflags = vflags;
2781+ ext4_get_inode_flags(EXT4_I(inode));
2782+ inode->i_ctime = ext4_current_time(inode);
2783+
2784+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2785+flags_err:
2786+ ext4_journal_stop(handle);
2787+ return err;
2788+}
2789+
2790 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2791 {
b00e13aa 2792 struct inode *inode = file_inode(filp);
927ca606 2793@@ -235,6 +263,11 @@ long ext4_ioctl(struct file *filp, unsig
ec22aa5c
AM
2794
2795 flags = ext4_mask_flags(inode->i_mode, flags);
2380c486
JR
2796
2797+ if (IS_BARRIER(inode)) {
2798+ vxwprintk_task(1, "messing with the barrier.");
2799+ return -EACCES;
2800+ }
2801+
2802 err = -EPERM;
ec22aa5c
AM
2803 mutex_lock(&inode->i_mutex);
2804 /* Is it quota file? Do not allow user to mess with it */
927ca606 2805@@ -252,7 +285,9 @@ long ext4_ioctl(struct file *filp, unsig
d337f35e
JR
2806 *
2807 * This test looks nicer. Thanks to Pauline Middelink
2808 */
2809- if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
2810+ if ((oldflags & EXT4_IMMUTABLE_FL) ||
2811+ ((flags ^ oldflags) & (EXT4_APPEND_FL |
2380c486
JR
2812+ EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL))) {
2813 if (!capable(CAP_LINUX_IMMUTABLE))
2814 goto flags_out;
2815 }
c2806d43
AM
2816diff -urNp -x '*.orig' linux-4.4/fs/ext4/namei.c linux-4.4/fs/ext4/namei.c
2817--- linux-4.4/fs/ext4/namei.c 2021-02-24 16:56:11.739084838 +0100
2818+++ linux-4.4/fs/ext4/namei.c 2021-02-24 16:56:24.559488588 +0100
927ca606 2819@@ -33,6 +33,7 @@
2380c486 2820 #include <linux/quotaops.h>
d337f35e
JR
2821 #include <linux/buffer_head.h>
2822 #include <linux/bio.h>
d337f35e 2823+#include <linux/vs_tag.h>
2380c486
JR
2824 #include "ext4.h"
2825 #include "ext4_jbd2.h"
d337f35e 2826
c2806d43 2827@@ -1429,6 +1430,7 @@ restart:
a168f21d
AM
2828 ll_rw_block(READ | REQ_META | REQ_PRIO,
2829 1, &bh);
2380c486 2830 }
d337f35e 2831+ dx_propagate_tag(nd, inode);
2380c486
JR
2832 }
2833 if ((bh = bh_use[ra_ptr++]) == NULL)
2834 goto next;
c2806d43 2835@@ -3872,6 +3874,7 @@ const struct inode_operations ext4_dir_i
a168f21d 2836 .get_acl = ext4_get_acl,
bb20add7 2837 .set_acl = ext4_set_acl,
d4263eb0 2838 .fiemap = ext4_fiemap,
d337f35e
JR
2839+ .sync_flags = ext4_sync_flags,
2840 };
d4263eb0
JR
2841
2842 const struct inode_operations ext4_special_inode_operations = {
c2806d43
AM
2843diff -urNp -x '*.orig' linux-4.4/fs/ext4/super.c linux-4.4/fs/ext4/super.c
2844--- linux-4.4/fs/ext4/super.c 2021-02-24 16:56:11.742418277 +0100
2845+++ linux-4.4/fs/ext4/super.c 2021-02-24 16:56:24.559488588 +0100
2846@@ -1179,6 +1179,7 @@ enum {
78865d5b 2847 Opt_dioread_nolock, Opt_dioread_lock,
dd5f3080 2848 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
927ca606
AM
2849 Opt_max_dir_size_kb, Opt_nojournal_checksum,
2850+ Opt_tag, Opt_notag, Opt_tagid
d337f35e
JR
2851 };
2852
ec22aa5c 2853 static const match_table_t tokens = {
c2806d43 2854@@ -1264,6 +1265,9 @@ static const match_table_t tokens = {
1e8b8f9b
AM
2855 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
2856 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
2857 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
d337f35e
JR
2858+ {Opt_tag, "tag"},
2859+ {Opt_notag, "notag"},
2860+ {Opt_tagid, "tagid=%u"},
d337f35e 2861 {Opt_err, NULL},
d337f35e 2862 };
2380c486 2863
c2806d43 2864@@ -1506,6 +1510,20 @@ static int handle_mount_opt(struct super
927ca606
AM
2865 case Opt_nolazytime:
2866 sb->s_flags &= ~MS_LAZYTIME;
1e8b8f9b 2867 return 1;
d337f35e 2868+#ifndef CONFIG_TAGGING_NONE
1e8b8f9b
AM
2869+ case Opt_tag:
2870+ set_opt(sb, TAGGED);
2871+ return 1;
2872+ case Opt_notag:
2873+ clear_opt(sb, TAGGED);
2874+ return 1;
d337f35e
JR
2875+#endif
2876+#ifdef CONFIG_PROPAGATE
1e8b8f9b
AM
2877+ case Opt_tagid:
2878+ /* use args[0] */
2879+ set_opt(sb, TAGGED);
2880+ return 1;
d337f35e 2881+#endif
1e8b8f9b
AM
2882 }
2883
b00e13aa 2884 for (m = ext4_mount_opts; m->token != Opt_err; m++)
c2806d43 2885@@ -3477,6 +3495,9 @@ static int ext4_fill_super(struct super_
927ca606 2886 sb->s_iflags |= SB_I_CGROUPWB;
f6c5ef8b 2887 }
d337f35e
JR
2888
2889+ if (EXT4_SB(sb)->s_mount_opt & EXT4_MOUNT_TAGGED)
2890+ sb->s_flags |= MS_TAGGED;
2891+
2892 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
78865d5b 2893 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
d337f35e 2894
c2806d43 2895@@ -4852,6 +4873,14 @@ static int ext4_remount(struct super_blo
ec22aa5c 2896 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
93de0823 2897 ext4_abort(sb, "Abort forced by user");
2380c486 2898
d337f35e
JR
2899+ if ((sbi->s_mount_opt & EXT4_MOUNT_TAGGED) &&
2900+ !(sb->s_flags & MS_TAGGED)) {
2901+ printk("EXT4-fs: %s: tagging not permitted on remount.\n",
2902+ sb->s_id);
d4263eb0
JR
2903+ err = -EINVAL;
2904+ goto restore_opts;
d337f35e 2905+ }
2380c486 2906+
d337f35e 2907 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
78865d5b 2908 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
d337f35e 2909
c2806d43
AM
2910diff -urNp -x '*.orig' linux-4.4/fs/fcntl.c linux-4.4/fs/fcntl.c
2911--- linux-4.4/fs/fcntl.c 2021-02-24 16:56:11.752418592 +0100
2912+++ linux-4.4/fs/fcntl.c 2021-02-24 16:56:24.559488588 +0100
bb20add7 2913@@ -22,6 +22,7 @@
2380c486 2914 #include <linux/pid_namespace.h>
92598135 2915 #include <linux/user_namespace.h>
bb20add7 2916 #include <linux/shmem_fs.h>
d337f35e
JR
2917+#include <linux/vs_limit.h>
2918
2919 #include <asm/poll.h>
2920 #include <asm/siginfo.h>
0e1bbc97 2921@@ -389,6 +390,8 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, f
d337f35e 2922
537831f9 2923 if (!f.file)
2380c486
JR
2924 goto out;
2925+ if (!vx_files_avail(1))
2926+ goto out;
2927
537831f9 2928 if (unlikely(f.file->f_mode & FMODE_PATH)) {
42bc425c 2929 if (!check_fcntl_cmd(cmd))
c2806d43
AM
2930diff -urNp -x '*.orig' linux-4.4/fs/file.c linux-4.4/fs/file.c
2931--- linux-4.4/fs/file.c 2021-02-24 16:56:11.752418592 +0100
2932+++ linux-4.4/fs/file.c 2021-02-24 16:56:24.559488588 +0100
537831f9 2933@@ -22,6 +22,7 @@
2380c486
JR
2934 #include <linux/spinlock.h>
2935 #include <linux/rcupdate.h>
2936 #include <linux/workqueue.h>
2937+#include <linux/vs_limit.h>
2938
09be7631
JR
2939 int sysctl_nr_open __read_mostly = 1024*1024;
2940 int sysctl_nr_open_min = BITS_PER_LONG;
927ca606 2941@@ -356,6 +357,8 @@ struct files_struct *dup_fd(struct files
2380c486
JR
2942 struct file *f = *old_fds++;
2943 if (f) {
2944 get_file(f);
2945+ /* TODO: sum it first for check and performance */
2946+ vx_openfd_inc(open_files - i);
2947 } else {
2948 /*
2949 * The fd may be claimed in the fd bitmap but not yet
927ca606 2950@@ -405,9 +408,11 @@ static struct fdtable *close_files(struc
537831f9 2951 filp_close(file, files);
bb20add7 2952 cond_resched_rcu_qs();
537831f9
AM
2953 }
2954+ vx_openfd_dec(i);
2955 }
2956 i++;
2957 set >>= 1;
2958+ cond_resched();
2959 }
2960 }
bb20add7 2961
3cc86a71 2962@@ -539,6 +544,7 @@ repeat:
2380c486 2963 else
1e8b8f9b 2964 __clear_close_on_exec(fd, fdt);
2380c486
JR
2965 error = fd;
2966+ vx_openfd_inc(fd);
2967 #if 1
2968 /* Sanity check */
bb20add7 2969 if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
3cc86a71 2970@@ -569,6 +575,7 @@ static void __put_unused_fd(struct files
537831f9
AM
2971 __clear_open_fd(fd, fdt);
2972 if (fd < files->next_fd)
2973 files->next_fd = fd;
2974+ vx_openfd_dec(fd);
2975 }
2976
2977 void put_unused_fd(unsigned int fd)
3cc86a71 2978@@ -851,6 +858,8 @@ __releases(&files->file_lock)
537831f9
AM
2979
2980 if (tofree)
2981 filp_close(tofree, files);
2982+ else
2983+ vx_openfd_inc(fd); /* fd was unused */
2984
2985 return fd;
2986
c2806d43
AM
2987diff -urNp -x '*.orig' linux-4.4/fs/file_table.c linux-4.4/fs/file_table.c
2988--- linux-4.4/fs/file_table.c 2016-01-11 00:01:32.000000000 +0100
2989+++ linux-4.4/fs/file_table.c 2021-02-24 16:56:24.559488588 +0100
92598135 2990@@ -26,6 +26,8 @@
92598135 2991 #include <linux/task_work.h>
2bf5ad28 2992 #include <linux/ima.h>
927ca606 2993 #include <linux/swap.h>
d337f35e
JR
2994+#include <linux/vs_limit.h>
2995+#include <linux/vs_context.h>
2996
a168f21d 2997 #include <linux/atomic.h>
d337f35e 2998
c2e5f7c8 2999@@ -137,6 +139,8 @@ struct file *get_empty_filp(void)
bb20add7 3000 mutex_init(&f->f_pos_lock);
d337f35e
JR
3001 eventpoll_init_file(f);
3002 /* f->f_version: 0 */
3003+ f->f_xid = vx_current_xid();
3004+ vx_files_inc(f);
3005 return f;
3006
3007 over:
bb20add7 3008@@ -219,6 +223,8 @@ static void __fput(struct file *file)
265de2f7
JR
3009 put_write_access(inode);
3010 __mnt_drop_write(mnt);
3011 }
d337f35e
JR
3012+ vx_files_dec(file);
3013+ file->f_xid = 0;
92598135
AM
3014 file->f_path.dentry = NULL;
3015 file->f_path.mnt = NULL;
b00e13aa 3016 file->f_inode = NULL;
bb20add7 3017@@ -305,6 +311,8 @@ void put_filp(struct file *file)
d337f35e 3018 {
2380c486 3019 if (atomic_long_dec_and_test(&file->f_count)) {
d337f35e
JR
3020 security_file_free(file);
3021+ vx_files_dec(file);
3022+ file->f_xid = 0;
d337f35e
JR
3023 file_free(file);
3024 }
c2e5f7c8 3025 }
c2806d43
AM
3026diff -urNp -x '*.orig' linux-4.4/fs/fs_struct.c linux-4.4/fs/fs_struct.c
3027--- linux-4.4/fs/fs_struct.c 2016-01-11 00:01:32.000000000 +0100
3028+++ linux-4.4/fs/fs_struct.c 2021-02-24 16:56:24.559488588 +0100
ec22aa5c
AM
3029@@ -4,6 +4,7 @@
3030 #include <linux/path.h>
3031 #include <linux/slab.h>
3032 #include <linux/fs_struct.h>
3033+#include <linux/vserver/global.h>
d33d7b00 3034 #include "internal.h"
ec22aa5c 3035
92598135
AM
3036 /*
3037@@ -87,6 +88,7 @@ void free_fs_struct(struct fs_struct *fs
ec22aa5c 3038 {
92598135
AM
3039 path_put(&fs->root);
3040 path_put(&fs->pwd);
ec22aa5c
AM
3041+ atomic_dec(&vs_global_fs);
3042 kmem_cache_free(fs_cachep, fs);
3043 }
3044
537831f9 3045@@ -124,6 +126,7 @@ struct fs_struct *copy_fs_struct(struct
d33d7b00 3046 fs->pwd = old->pwd;
92598135 3047 path_get(&fs->pwd);
d33d7b00 3048 spin_unlock(&old->lock);
ec22aa5c
AM
3049+ atomic_inc(&vs_global_fs);
3050 }
3051 return fs;
3052 }
c2806d43
AM
3053diff -urNp -x '*.orig' linux-4.4/fs/gfs2/file.c linux-4.4/fs/gfs2/file.c
3054--- linux-4.4/fs/gfs2/file.c 2021-02-24 16:56:11.755752030 +0100
3055+++ linux-4.4/fs/gfs2/file.c 2021-02-24 16:56:24.559488588 +0100
927ca606 3056@@ -137,6 +137,9 @@ static const u32 fsflags_to_gfs2[32] = {
e22b5178
AM
3057 [12] = GFS2_DIF_EXHASH,
3058 [14] = GFS2_DIF_INHERIT_JDATA,
92598135 3059 [17] = GFS2_DIF_TOPDIR,
e22b5178
AM
3060+ [27] = GFS2_DIF_IXUNLINK,
3061+ [26] = GFS2_DIF_BARRIER,
3062+ [29] = GFS2_DIF_COW,
3063 };
3064
3065 static const u32 gfs2_to_fsflags[32] = {
927ca606 3066@@ -147,6 +150,9 @@ static const u32 gfs2_to_fsflags[32] = {
e22b5178 3067 [gfs2fl_ExHash] = FS_INDEX_FL,
92598135 3068 [gfs2fl_TopLevel] = FS_TOPDIR_FL,
e22b5178
AM
3069 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
3070+ [gfs2fl_IXUnlink] = FS_IXUNLINK_FL,
3071+ [gfs2fl_Barrier] = FS_BARRIER_FL,
3072+ [gfs2fl_Cow] = FS_COW_FL,
3073 };
3074
3075 static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
927ca606 3076@@ -177,12 +183,17 @@ void gfs2_set_inode_flags(struct inode *
e22b5178
AM
3077 {
3078 struct gfs2_inode *ip = GFS2_I(inode);
3079 unsigned int flags = inode->i_flags;
3080+ unsigned int vflags = inode->i_vflags;
8931d859 3081+
927ca606
AM
3082+ flags &= ~(S_IMMUTABLE | S_IXUNLINK |
3083+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC | S_NOSEC);
8931d859
AM
3084
3085- flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
a168f21d 3086 if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
927ca606 3087 flags |= S_NOSEC;
e22b5178
AM
3088 if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
3089 flags |= S_IMMUTABLE;
3090+ if (ip->i_diskflags & GFS2_DIF_IXUNLINK)
3091+ flags |= S_IXUNLINK;
e22b5178
AM
3092 if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
3093 flags |= S_APPEND;
3094 if (ip->i_diskflags & GFS2_DIF_NOATIME)
927ca606 3095@@ -190,6 +201,43 @@ void gfs2_set_inode_flags(struct inode *
e22b5178
AM
3096 if (ip->i_diskflags & GFS2_DIF_SYNC)
3097 flags |= S_SYNC;
3098 inode->i_flags = flags;
3099+
3100+ vflags &= ~(V_BARRIER | V_COW);
3101+
3102+ if (ip->i_diskflags & GFS2_DIF_BARRIER)
3103+ vflags |= V_BARRIER;
3104+ if (ip->i_diskflags & GFS2_DIF_COW)
3105+ vflags |= V_COW;
3106+ inode->i_vflags = vflags;
3107+}
3108+
3109+void gfs2_get_inode_flags(struct inode *inode)
3110+{
3111+ struct gfs2_inode *ip = GFS2_I(inode);
3112+ unsigned int flags = inode->i_flags;
3113+ unsigned int vflags = inode->i_vflags;
3114+
3115+ ip->i_diskflags &= ~(GFS2_DIF_APPENDONLY |
3116+ GFS2_DIF_NOATIME | GFS2_DIF_SYNC |
3117+ GFS2_DIF_IMMUTABLE | GFS2_DIF_IXUNLINK |
3118+ GFS2_DIF_BARRIER | GFS2_DIF_COW);
3119+
3120+ if (flags & S_IMMUTABLE)
3121+ ip->i_diskflags |= GFS2_DIF_IMMUTABLE;
3122+ if (flags & S_IXUNLINK)
3123+ ip->i_diskflags |= GFS2_DIF_IXUNLINK;
3124+
3125+ if (flags & S_APPEND)
3126+ ip->i_diskflags |= GFS2_DIF_APPENDONLY;
3127+ if (flags & S_NOATIME)
3128+ ip->i_diskflags |= GFS2_DIF_NOATIME;
3129+ if (flags & S_SYNC)
3130+ ip->i_diskflags |= GFS2_DIF_SYNC;
3131+
3132+ if (vflags & V_BARRIER)
3133+ ip->i_diskflags |= GFS2_DIF_BARRIER;
3134+ if (vflags & V_COW)
3135+ ip->i_diskflags |= GFS2_DIF_COW;
3136 }
3137
3138 /* Flags that can be set by user space */
927ca606 3139@@ -305,6 +353,37 @@ static int gfs2_set_flags(struct file *f
e22b5178
AM
3140 return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
3141 }
3142
3143+int gfs2_sync_flags(struct inode *inode, int flags, int vflags)
3144+{
3145+ struct gfs2_inode *ip = GFS2_I(inode);
3146+ struct gfs2_sbd *sdp = GFS2_SB(inode);
3147+ struct buffer_head *bh;
3148+ struct gfs2_holder gh;
3149+ int error;
3150+
3151+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
3152+ if (error)
3153+ return error;
3154+ error = gfs2_trans_begin(sdp, RES_DINODE, 0);
3155+ if (error)
3156+ goto out;
3157+ error = gfs2_meta_inode_buffer(ip, &bh);
3158+ if (error)
3159+ goto out_trans_end;
b00e13aa 3160+ gfs2_trans_add_meta(ip->i_gl, bh);
e22b5178
AM
3161+ inode->i_flags = flags;
3162+ inode->i_vflags = vflags;
3163+ gfs2_get_inode_flags(inode);
3164+ gfs2_dinode_out(ip, bh->b_data);
3165+ brelse(bh);
3166+ gfs2_set_aops(inode);
3167+out_trans_end:
3168+ gfs2_trans_end(sdp);
3169+out:
3170+ gfs2_glock_dq_uninit(&gh);
3171+ return error;
3172+}
3173+
3174 static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3175 {
3176 switch(cmd) {
c2806d43
AM
3177diff -urNp -x '*.orig' linux-4.4/fs/gfs2/inode.h linux-4.4/fs/gfs2/inode.h
3178--- linux-4.4/fs/gfs2/inode.h 2016-01-11 00:01:32.000000000 +0100
3179+++ linux-4.4/fs/gfs2/inode.h 2021-02-24 16:56:24.559488588 +0100
c2e5f7c8 3180@@ -118,6 +118,7 @@ extern const struct file_operations gfs2
e22b5178
AM
3181 extern const struct file_operations gfs2_dir_fops_nolock;
3182
3183 extern void gfs2_set_inode_flags(struct inode *inode);
3184+extern int gfs2_sync_flags(struct inode *inode, int flags, int vflags);
3185
3186 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
3187 extern const struct file_operations gfs2_file_fops;
c2806d43
AM
3188diff -urNp -x '*.orig' linux-4.4/fs/hostfs/hostfs.h linux-4.4/fs/hostfs/hostfs.h
3189--- linux-4.4/fs/hostfs/hostfs.h 2016-01-11 00:01:32.000000000 +0100
3190+++ linux-4.4/fs/hostfs/hostfs.h 2021-02-24 16:56:24.559488588 +0100
537831f9
AM
3191@@ -42,6 +42,7 @@ struct hostfs_iattr {
3192 unsigned short ia_mode;
3193 uid_t ia_uid;
3194 gid_t ia_gid;
61333608 3195+ vtag_t ia_tag;
537831f9
AM
3196 loff_t ia_size;
3197 struct timespec ia_atime;
3198 struct timespec ia_mtime;
c2806d43
AM
3199diff -urNp -x '*.orig' linux-4.4/fs/inode.c linux-4.4/fs/inode.c
3200--- linux-4.4/fs/inode.c 2021-02-24 16:56:11.762418907 +0100
3201+++ linux-4.4/fs/inode.c 2021-02-24 16:56:24.559488588 +0100
c2e5f7c8 3202@@ -18,6 +18,7 @@
763640ca 3203 #include <linux/buffer_head.h> /* for inode_has_buffers */
db55b927 3204 #include <linux/ratelimit.h>
c2e5f7c8 3205 #include <linux/list_lru.h>
76514441 3206+#include <linux/vs_tag.h>
927ca606 3207 #include <trace/events/writeback.h>
763640ca 3208 #include "internal.h"
76514441 3209
927ca606 3210@@ -133,6 +134,8 @@ int inode_init_always(struct super_block
ec22aa5c
AM
3211 struct address_space *const mapping = &inode->i_data;
3212
3213 inode->i_sb = sb;
3214+
3215+ /* essential because of inode slab reuse */
ec22aa5c
AM
3216 inode->i_blkbits = sb->s_blocksize_bits;
3217 inode->i_flags = 0;
c2806d43
AM
3218 atomic64_set(&inode->i_sequence, 0);
3219@@ -143,6 +146,7 @@ int inode_init_always(struct super_block
537831f9
AM
3220 inode->i_opflags = 0;
3221 i_uid_write(inode, 0);
3222 i_gid_write(inode, 0);
3223+ i_tag_write(inode, 0);
3224 atomic_set(&inode->i_writecount, 0);
3225 inode->i_size = 0;
3226 inode->i_blocks = 0;
c2806d43 3227@@ -153,6 +157,7 @@ int inode_init_always(struct super_block
ec22aa5c 3228 inode->i_cdev = NULL;
927ca606 3229 inode->i_link = NULL;
ec22aa5c
AM
3230 inode->i_rdev = 0;
3231+ inode->i_mdev = 0;
3232 inode->dirtied_when = 0;
3233
3cc86a71 3234 #ifdef CONFIG_CGROUP_WRITEBACK
c2806d43 3235@@ -476,6 +481,8 @@ void __insert_inode_hash(struct inode *i
d337f35e 3236 }
763640ca 3237 EXPORT_SYMBOL(__insert_inode_hash);
d337f35e
JR
3238
3239+EXPORT_SYMBOL_GPL(__iget);
3240+
3241 /**
a168f21d 3242 * __remove_inode_hash - remove an inode from the hash
ab30d09f 3243 * @inode: inode to unhash
c2806d43 3244@@ -1923,9 +1930,11 @@ void init_special_inode(struct inode *in
2380c486
JR
3245 if (S_ISCHR(mode)) {
3246 inode->i_fop = &def_chr_fops;
3247 inode->i_rdev = rdev;
3248+ inode->i_mdev = rdev;
3249 } else if (S_ISBLK(mode)) {
3250 inode->i_fop = &def_blk_fops;
3251 inode->i_rdev = rdev;
3252+ inode->i_mdev = rdev;
3253 } else if (S_ISFIFO(mode))
09be7631 3254 inode->i_fop = &pipefifo_fops;
2380c486 3255 else if (S_ISSOCK(mode))
c2806d43 3256@@ -1960,6 +1969,7 @@ void inode_init_owner(struct inode *inod
76514441
AM
3257 } else
3258 inode->i_gid = current_fsgid();
3259 inode->i_mode = mode;
8ce283e1 3260+ i_tag_write(inode, dx_current_fstag(inode->i_sb));
76514441
AM
3261 }
3262 EXPORT_SYMBOL(inode_init_owner);
763640ca 3263
c2806d43
AM
3264diff -urNp -x '*.orig' linux-4.4/fs/ioctl.c linux-4.4/fs/ioctl.c
3265--- linux-4.4/fs/ioctl.c 2016-01-11 00:01:32.000000000 +0100
3266+++ linux-4.4/fs/ioctl.c 2021-02-24 16:56:24.562822026 +0100
ab30d09f 3267@@ -15,6 +15,9 @@
ec22aa5c
AM
3268 #include <linux/writeback.h>
3269 #include <linux/buffer_head.h>
3270 #include <linux/falloc.h>
d337f35e
JR
3271+#include <linux/proc_fs.h>
3272+#include <linux/vserver/inode.h>
3273+#include <linux/vs_tag.h>
3274
d337f35e
JR
3275 #include <asm/ioctls.h>
3276
c2806d43
AM
3277diff -urNp -x '*.orig' linux-4.4/fs/jfs/file.c linux-4.4/fs/jfs/file.c
3278--- linux-4.4/fs/jfs/file.c 2016-01-11 00:01:32.000000000 +0100
3279+++ linux-4.4/fs/jfs/file.c 2021-02-24 16:56:24.562822026 +0100
927ca606
AM
3280@@ -113,7 +113,8 @@ int jfs_setattr(struct dentry *dentry, s
3281 return rc;
3282 }
537831f9
AM
3283 if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
3284- (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
3285+ (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) ||
3286+ (iattr->ia_valid & ATTR_TAG && !tag_eq(iattr->ia_tag, inode->i_tag))) {
78865d5b
AM
3287 rc = dquot_transfer(inode, iattr);
3288 if (rc)
3289 return rc;
927ca606 3290@@ -149,6 +150,7 @@ const struct inode_operations jfs_file_i
a168f21d 3291 .get_acl = jfs_get_acl,
bb20add7 3292 .set_acl = jfs_set_acl,
d337f35e
JR
3293 #endif
3294+ .sync_flags = jfs_sync_flags,
3295 };
3296
3297 const struct file_operations jfs_file_operations = {
c2806d43
AM
3298diff -urNp -x '*.orig' linux-4.4/fs/jfs/ioctl.c linux-4.4/fs/jfs/ioctl.c
3299--- linux-4.4/fs/jfs/ioctl.c 2016-01-11 00:01:32.000000000 +0100
3300+++ linux-4.4/fs/jfs/ioctl.c 2021-02-24 16:56:24.562822026 +0100
537831f9 3301@@ -12,6 +12,7 @@
d337f35e 3302 #include <linux/time.h>
2380c486 3303 #include <linux/sched.h>
537831f9 3304 #include <linux/blkdev.h>
d337f35e
JR
3305+#include <linux/mount.h>
3306 #include <asm/current.h>
3307 #include <asm/uaccess.h>
3308
537831f9 3309@@ -56,6 +57,16 @@ static long jfs_map_ext2(unsigned long f
d4263eb0
JR
3310 }
3311
3312
3313+int jfs_sync_flags(struct inode *inode, int flags, int vflags)
3314+{
3315+ inode->i_flags = flags;
3316+ inode->i_vflags = vflags;
3317+ jfs_get_inode_flags(JFS_IP(inode));
3318+ inode->i_ctime = CURRENT_TIME_SEC;
3319+ mark_inode_dirty(inode);
3320+ return 0;
3321+}
3322+
3323 long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3324 {
b00e13aa 3325 struct inode *inode = file_inode(filp);
537831f9 3326@@ -89,6 +100,11 @@ long jfs_ioctl(struct file *filp, unsign
2380c486
JR
3327 if (!S_ISDIR(inode->i_mode))
3328 flags &= ~JFS_DIRSYNC_FL;
d337f35e 3329
2380c486
JR
3330+ if (IS_BARRIER(inode)) {
3331+ vxwprintk_task(1, "messing with the barrier.");
3332+ return -EACCES;
3333+ }
3334+
3335 /* Is it quota file? Do not allow user to mess with it */
3336 if (IS_NOQUOTA(inode)) {
3337 err = -EPERM;
537831f9 3338@@ -106,8 +122,8 @@ long jfs_ioctl(struct file *filp, unsign
d337f35e
JR
3339 * the relevant capability.
3340 */
3341 if ((oldflags & JFS_IMMUTABLE_FL) ||
3342- ((flags ^ oldflags) &
3343- (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
3344+ ((flags ^ oldflags) & (JFS_APPEND_FL |
2380c486
JR
3345+ JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL))) {
3346 if (!capable(CAP_LINUX_IMMUTABLE)) {
3347 mutex_unlock(&inode->i_mutex);
3348 err = -EPERM;
537831f9 3349@@ -115,7 +131,7 @@ long jfs_ioctl(struct file *filp, unsign
d4263eb0
JR
3350 }
3351 }
3352
3353- flags = flags & JFS_FL_USER_MODIFIABLE;
3354+ flags &= JFS_FL_USER_MODIFIABLE;
3355 flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
3356 jfs_inode->mode2 = flags;
3357
c2806d43
AM
3358diff -urNp -x '*.orig' linux-4.4/fs/jfs/jfs_dinode.h linux-4.4/fs/jfs/jfs_dinode.h
3359--- linux-4.4/fs/jfs/jfs_dinode.h 2016-01-11 00:01:32.000000000 +0100
3360+++ linux-4.4/fs/jfs/jfs_dinode.h 2021-02-24 16:56:24.562822026 +0100
2380c486
JR
3361@@ -161,9 +161,13 @@ struct dinode {
3362
d337f35e
JR
3363 #define JFS_APPEND_FL 0x01000000 /* writes to file may only append */
3364 #define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */
2380c486 3365+#define JFS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
d337f35e
JR
3366
3367-#define JFS_FL_USER_VISIBLE 0x03F80000
2380c486 3368-#define JFS_FL_USER_MODIFIABLE 0x03F80000
d337f35e 3369+#define JFS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
2380c486 3370+#define JFS_COW_FL 0x20000000 /* Copy on Write marker */
d337f35e 3371+
2380c486
JR
3372+#define JFS_FL_USER_VISIBLE 0x07F80000
3373+#define JFS_FL_USER_MODIFIABLE 0x07F80000
3374 #define JFS_FL_INHERIT 0x03C80000
d337f35e
JR
3375
3376 /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
c2806d43
AM
3377diff -urNp -x '*.orig' linux-4.4/fs/jfs/jfs_filsys.h linux-4.4/fs/jfs/jfs_filsys.h
3378--- linux-4.4/fs/jfs/jfs_filsys.h 2016-01-11 00:01:32.000000000 +0100
3379+++ linux-4.4/fs/jfs/jfs_filsys.h 2021-02-24 16:56:24.562822026 +0100
537831f9 3380@@ -266,6 +266,7 @@
ec22aa5c
AM
3381 #define JFS_NAME_MAX 255
3382 #define JFS_PATH_MAX BPSIZE
bd427b06 3383
ec22aa5c 3384+#define JFS_TAGGED 0x00800000 /* Context Tagging */
bd427b06 3385
ec22aa5c
AM
3386 /*
3387 * file system state (superblock state)
c2806d43
AM
3388diff -urNp -x '*.orig' linux-4.4/fs/jfs/jfs_imap.c linux-4.4/fs/jfs/jfs_imap.c
3389--- linux-4.4/fs/jfs/jfs_imap.c 2016-01-11 00:01:32.000000000 +0100
3390+++ linux-4.4/fs/jfs/jfs_imap.c 2021-02-24 16:56:24.562822026 +0100
78865d5b 3391@@ -46,6 +46,7 @@
ec22aa5c
AM
3392 #include <linux/pagemap.h>
3393 #include <linux/quotaops.h>
78865d5b 3394 #include <linux/slab.h>
ec22aa5c 3395+#include <linux/vs_tag.h>
bd427b06 3396
ec22aa5c
AM
3397 #include "jfs_incore.h"
3398 #include "jfs_inode.h"
c2e5f7c8 3399@@ -3047,6 +3048,8 @@ static int copy_from_dinode(struct dinod
ec22aa5c
AM
3400 {
3401 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3402 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
a4a22af8
AM
3403+ kuid_t kuid;
3404+ kgid_t kgid;
bd427b06 3405
ec22aa5c
AM
3406 jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3407 jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
c2e5f7c8 3408@@ -3067,14 +3070,18 @@ static int copy_from_dinode(struct dinod
d337f35e 3409 }
f6c5ef8b 3410 set_nlink(ip, le32_to_cpu(dip->di_nlink));
bd427b06 3411
537831f9 3412- jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
a4a22af8
AM
3413+ kuid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3414+ kgid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3415+ ip->i_tag = INOTAG_KTAG(DX_TAG(ip), kuid, kgid, GLOBAL_ROOT_TAG);
ec22aa5c 3416+
a4a22af8 3417+ jfs_ip->saved_uid = INOTAG_KUID(DX_TAG(ip), kuid, kgid);
537831f9 3418 if (!uid_valid(sbi->uid))
ec22aa5c
AM
3419 ip->i_uid = jfs_ip->saved_uid;
3420 else {
3421 ip->i_uid = sbi->uid;
bd427b06
AM
3422 }
3423
537831f9 3424- jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
a4a22af8 3425+ jfs_ip->saved_gid = INOTAG_KGID(DX_TAG(ip), kuid, kgid);
537831f9 3426 if (!gid_valid(sbi->gid))
d337f35e
JR
3427 ip->i_gid = jfs_ip->saved_gid;
3428 else {
c2e5f7c8 3429@@ -3139,16 +3146,14 @@ static void copy_to_dinode(struct dinode
d337f35e
JR
3430 dip->di_size = cpu_to_le64(ip->i_size);
3431 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3432 dip->di_nlink = cpu_to_le32(ip->i_nlink);
537831f9
AM
3433- if (!uid_valid(sbi->uid))
3434- dip->di_uid = cpu_to_le32(i_uid_read(ip));
d337f35e 3435- else
537831f9
AM
3436- dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns,
3437- jfs_ip->saved_uid));
3438- if (!gid_valid(sbi->gid))
3439- dip->di_gid = cpu_to_le32(i_gid_read(ip));
d337f35e 3440- else
537831f9
AM
3441- dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3442- jfs_ip->saved_gid));
3443+ dip->di_uid = cpu_to_le32(from_kuid(&init_user_ns,
a4a22af8 3444+ TAGINO_KUID(DX_TAG(ip),
537831f9
AM
3445+ !uid_valid(sbi->uid) ? ip->i_uid : jfs_ip->saved_uid,
3446+ ip->i_tag)));
a4a22af8
AM
3447+ dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3448+ TAGINO_KGID(DX_TAG(ip),
537831f9
AM
3449+ !gid_valid(sbi->gid) ? ip->i_gid : jfs_ip->saved_gid,
3450+ ip->i_tag)));
2380c486 3451 jfs_get_inode_flags(jfs_ip);
d337f35e
JR
3452 /*
3453 * mode2 is only needed for storing the higher order bits.
c2806d43
AM
3454diff -urNp -x '*.orig' linux-4.4/fs/jfs/jfs_inode.c linux-4.4/fs/jfs/jfs_inode.c
3455--- linux-4.4/fs/jfs/jfs_inode.c 2016-01-11 00:01:32.000000000 +0100
3456+++ linux-4.4/fs/jfs/jfs_inode.c 2021-02-24 16:56:24.562822026 +0100
e22b5178
AM
3457@@ -18,6 +18,7 @@
3458
3459 #include <linux/fs.h>
3460 #include <linux/quotaops.h>
3461+#include <linux/vs_tag.h>
3462 #include "jfs_incore.h"
3463 #include "jfs_inode.h"
3464 #include "jfs_filsys.h"
8de2f54c 3465@@ -33,6 +34,9 @@ void jfs_set_inode_flags(struct inode *i
d337f35e
JR
3466
3467 if (flags & JFS_IMMUTABLE_FL)
bb20add7 3468 new_fl |= S_IMMUTABLE;
2380c486 3469+ if (flags & JFS_IXUNLINK_FL)
8de2f54c 3470+ new_fl |= S_IXUNLINK;
d337f35e 3471+
d337f35e 3472 if (flags & JFS_APPEND_FL)
bb20add7 3473 new_fl |= S_APPEND;
d337f35e 3474 if (flags & JFS_NOATIME_FL)
8de2f54c 3475@@ -41,18 +45,35 @@ void jfs_set_inode_flags(struct inode *i
bb20add7 3476 new_fl |= S_DIRSYNC;
8de2f54c
AM
3477 if (flags & JFS_SYNC_FL)
3478 new_fl |= S_SYNC;
bb20add7 3479- inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND | S_NOATIME |
8de2f54c
AM
3480- S_DIRSYNC | S_SYNC);
3481+
3482+ inode_set_flags(inode, new_fl, S_IMMUTABLE | S_IXUNLINK |
3483+ S_APPEND | S_NOATIME | S_DIRSYNC | S_SYNC);
2380c486 3484+
bb20add7 3485+ new_fl = 0;
2380c486 3486+ if (flags & JFS_BARRIER_FL)
bb20add7 3487+ new_fl |= V_BARRIER;
2380c486 3488+ if (flags & JFS_COW_FL)
bb20add7
AM
3489+ new_fl |= V_COW;
3490+
3491+ set_mask_bits(&inode->i_vflags,
3492+ V_BARRIER | V_COW, new_fl);
2380c486
JR
3493 }
3494
3495 void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
3496 {
3497 unsigned int flags = jfs_ip->vfs_inode.i_flags;
3498+ unsigned int vflags = jfs_ip->vfs_inode.i_vflags;
3499+
3500+ jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL |
3501+ JFS_APPEND_FL | JFS_NOATIME_FL |
3502+ JFS_DIRSYNC_FL | JFS_SYNC_FL |
3503+ JFS_BARRIER_FL | JFS_COW_FL);
3504
3505- jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
3506- JFS_DIRSYNC_FL | JFS_SYNC_FL);
3507 if (flags & S_IMMUTABLE)
3508 jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
3509+ if (flags & S_IXUNLINK)
3510+ jfs_ip->mode2 |= JFS_IXUNLINK_FL;
3511+
3512 if (flags & S_APPEND)
3513 jfs_ip->mode2 |= JFS_APPEND_FL;
3514 if (flags & S_NOATIME)
8de2f54c 3515@@ -61,6 +82,11 @@ void jfs_get_inode_flags(struct jfs_inod
2380c486
JR
3516 jfs_ip->mode2 |= JFS_DIRSYNC_FL;
3517 if (flags & S_SYNC)
3518 jfs_ip->mode2 |= JFS_SYNC_FL;
3519+
3520+ if (vflags & V_BARRIER)
3521+ jfs_ip->mode2 |= JFS_BARRIER_FL;
3522+ if (vflags & V_COW)
3523+ jfs_ip->mode2 |= JFS_COW_FL;
d337f35e
JR
3524 }
3525
3526 /*
c2806d43
AM
3527diff -urNp -x '*.orig' linux-4.4/fs/jfs/jfs_inode.h linux-4.4/fs/jfs/jfs_inode.h
3528--- linux-4.4/fs/jfs/jfs_inode.h 2016-01-11 00:01:32.000000000 +0100
3529+++ linux-4.4/fs/jfs/jfs_inode.h 2021-02-24 16:56:24.562822026 +0100
2380c486
JR
3530@@ -39,6 +39,7 @@ extern struct dentry *jfs_fh_to_dentry(s
3531 extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
3532 int fh_len, int fh_type);
d337f35e 3533 extern void jfs_set_inode_flags(struct inode *);
d4263eb0 3534+extern int jfs_sync_flags(struct inode *, int, int);
d337f35e 3535 extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
78865d5b 3536 extern int jfs_setattr(struct dentry *, struct iattr *);
d337f35e 3537
c2806d43
AM
3538diff -urNp -x '*.orig' linux-4.4/fs/jfs/namei.c linux-4.4/fs/jfs/namei.c
3539--- linux-4.4/fs/jfs/namei.c 2021-02-24 16:56:11.769085783 +0100
3540+++ linux-4.4/fs/jfs/namei.c 2021-02-24 16:56:24.562822026 +0100
d33d7b00 3541@@ -22,6 +22,7 @@
d337f35e
JR
3542 #include <linux/ctype.h>
3543 #include <linux/quotaops.h>
2380c486 3544 #include <linux/exportfs.h>
d337f35e
JR
3545+#include <linux/vs_tag.h>
3546 #include "jfs_incore.h"
3547 #include "jfs_superblock.h"
3548 #include "jfs_inode.h"
8931d859 3549@@ -1476,6 +1477,7 @@ static struct dentry *jfs_lookup(struct
a168f21d 3550 jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum);
d337f35e
JR
3551 }
3552
3553+ dx_propagate_tag(nd, ip);
d33d7b00
AM
3554 return d_splice_alias(ip, dentry);
3555 }
d337f35e 3556
8931d859 3557@@ -1541,6 +1543,7 @@ const struct inode_operations jfs_dir_in
a168f21d 3558 .get_acl = jfs_get_acl,
bb20add7 3559 .set_acl = jfs_set_acl,
d337f35e
JR
3560 #endif
3561+ .sync_flags = jfs_sync_flags,
3562 };
3563
3564 const struct file_operations jfs_dir_operations = {
c2806d43
AM
3565diff -urNp -x '*.orig' linux-4.4/fs/jfs/super.c linux-4.4/fs/jfs/super.c
3566--- linux-4.4/fs/jfs/super.c 2021-02-24 16:56:11.769085783 +0100
3567+++ linux-4.4/fs/jfs/super.c 2021-02-24 16:56:24.562822026 +0100
927ca606 3568@@ -206,7 +206,8 @@ enum {
d337f35e
JR
3569 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
3570 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
537831f9
AM
3571 Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
3572- Opt_discard, Opt_nodiscard, Opt_discard_minblk
3573+ Opt_discard, Opt_nodiscard, Opt_discard_minblk,
d337f35e
JR
3574+ Opt_tag, Opt_notag, Opt_tagid
3575 };
3576
ec22aa5c 3577 static const match_table_t tokens = {
927ca606 3578@@ -216,6 +217,10 @@ static const match_table_t tokens = {
d337f35e
JR
3579 {Opt_resize, "resize=%u"},
3580 {Opt_resize_nosize, "resize"},
3581 {Opt_errors, "errors=%s"},
3582+ {Opt_tag, "tag"},
3583+ {Opt_notag, "notag"},
3584+ {Opt_tagid, "tagid=%u"},
3585+ {Opt_tag, "tagxid"},
3586 {Opt_ignore, "noquota"},
3587 {Opt_ignore, "quota"},
3588 {Opt_usrquota, "usrquota"},
927ca606 3589@@ -405,7 +410,20 @@ static int parse_options(char *options,
bb20add7 3590 pr_err("JFS: discard option not supported on device\n");
d337f35e
JR
3591 break;
3592 }
537831f9 3593-
d337f35e
JR
3594+#ifndef CONFIG_TAGGING_NONE
3595+ case Opt_tag:
3596+ *flag |= JFS_TAGGED;
3597+ break;
3598+ case Opt_notag:
3599+ *flag &= JFS_TAGGED;
3600+ break;
3601+#endif
3602+#ifdef CONFIG_PROPAGATE
3603+ case Opt_tagid:
3604+ /* use args[0] */
3605+ *flag |= JFS_TAGGED;
3606+ break;
3607+#endif
3608 default:
bb20add7
AM
3609 printk("jfs: Unrecognized mount option \"%s\" or missing value\n",
3610 p);
927ca606 3611@@ -437,6 +455,12 @@ static int jfs_remount(struct super_bloc
bb20add7 3612 if (!parse_options(data, sb, &newLVSize, &flag))
d337f35e 3613 return -EINVAL;
ab30d09f 3614
d337f35e
JR
3615+ if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
3616+ printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
3617+ sb->s_id);
3618+ return -EINVAL;
3619+ }
3620+
3621 if (newLVSize) {
3622 if (sb->s_flags & MS_RDONLY) {
bb20add7
AM
3623 pr_err("JFS: resize requires volume to be mounted read-write\n");
3624@@ -517,6 +541,9 @@ static int jfs_fill_super(struct super_b
d337f35e
JR
3625 #ifdef CONFIG_JFS_POSIX_ACL
3626 sb->s_flags |= MS_POSIXACL;
3627 #endif
3628+ /* map mount option tagxid */
3629+ if (sbi->flag & JFS_TAGGED)
3630+ sb->s_flags |= MS_TAGGED;
3631
3632 if (newLVSize) {
537831f9 3633 pr_err("resize option for remount only\n");
c2806d43
AM
3634diff -urNp -x '*.orig' linux-4.4/fs/libfs.c linux-4.4/fs/libfs.c
3635--- linux-4.4/fs/libfs.c 2021-02-24 16:56:11.769085783 +0100
3636+++ linux-4.4/fs/libfs.c 2021-02-24 16:56:24.562822026 +0100
927ca606 3637@@ -141,13 +141,14 @@ static inline unsigned char dt_type(stru
d337f35e
JR
3638 * both impossible due to the lock on directory.
3639 */
3640
c2e5f7c8 3641-int dcache_readdir(struct file *file, struct dir_context *ctx)
2380c486 3642+static inline int do_dcache_readdir_filter(struct file *filp,
c2e5f7c8 3643+ struct dir_context *ctx, int (*filter)(struct dentry *dentry))
d337f35e 3644 {
c2e5f7c8
JR
3645- struct dentry *dentry = file->f_path.dentry;
3646- struct dentry *cursor = file->private_data;
3647+ struct dentry *dentry = filp->f_path.dentry;
3648+ struct dentry *cursor = filp->private_data;
bb20add7 3649 struct list_head *p, *q = &cursor->d_child;
c2e5f7c8
JR
3650
3651- if (!dir_emit_dots(file, ctx))
3652+ if (!dir_emit_dots(filp, ctx))
3653 return 0;
3654 spin_lock(&dentry->d_lock);
3655 if (ctx->pos == 2)
927ca606 3656@@ -155,6 +156,8 @@ int dcache_readdir(struct file *file, st
c2e5f7c8
JR
3657
3658 for (p = q->next; p != &dentry->d_subdirs; p = p->next) {
bb20add7 3659 struct dentry *next = list_entry(p, struct dentry, d_child);
c2e5f7c8
JR
3660+ if (filter && !filter(next))
3661+ continue;
3662 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
3663 if (!simple_positive(next)) {
3664 spin_unlock(&next->d_lock);
927ca606 3665@@ -177,8 +180,22 @@ int dcache_readdir(struct file *file, st
c2e5f7c8 3666 spin_unlock(&dentry->d_lock);
d337f35e
JR
3667 return 0;
3668 }
c2e5f7c8
JR
3669+
3670 EXPORT_SYMBOL(dcache_readdir);
d337f35e 3671
c2e5f7c8 3672+int dcache_readdir(struct file *filp, struct dir_context *ctx)
d337f35e 3673+{
c2e5f7c8 3674+ return do_dcache_readdir_filter(filp, ctx, NULL);
d337f35e
JR
3675+}
3676+
c2e5f7c8
JR
3677+EXPORT_SYMBOL(dcache_readdir_filter);
3678+
3679+int dcache_readdir_filter(struct file *filp, struct dir_context *ctx,
d337f35e
JR
3680+ int (*filter)(struct dentry *))
3681+{
c2e5f7c8 3682+ return do_dcache_readdir_filter(filp, ctx, filter);
d337f35e 3683+}
d337f35e
JR
3684+
3685 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
3686 {
3687 return -EISDIR;
c2806d43
AM
3688diff -urNp -x '*.orig' linux-4.4/fs/locks.c linux-4.4/fs/locks.c
3689--- linux-4.4/fs/locks.c 2021-02-24 16:56:11.769085783 +0100
3690+++ linux-4.4/fs/locks.c 2021-02-24 16:56:24.562822026 +0100
c2e5f7c8
JR
3691@@ -129,6 +129,8 @@
3692 #include <linux/hashtable.h>
3693 #include <linux/percpu.h>
3694 #include <linux/lglock.h>
d337f35e
JR
3695+#include <linux/vs_base.h>
3696+#include <linux/vs_limit.h>
3697
bb20add7
AM
3698 #define CREATE_TRACE_POINTS
3699 #include <trace/events/filelock.h>
927ca606 3700@@ -255,11 +257,15 @@ static void locks_init_lock_heads(struct
d337f35e 3701 /* Allocate an empty lock structure. */
ab30d09f 3702 struct file_lock *locks_alloc_lock(void)
d337f35e 3703 {
a168f21d 3704- struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
a0a3e0cf 3705+ struct file_lock *fl;
a168f21d
AM
3706
3707- if (fl)
3708- locks_init_lock_heads(fl);
a168f21d 3709+ fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
927ca606 3710
a168f21d
AM
3711+ if (fl) {
3712+ locks_init_lock_heads(fl);
927ca606 3713+ vx_locks_inc(fl);
a168f21d
AM
3714+ fl->fl_xid = -1;
3715+ }
3716 return fl;
3717 }
3718 EXPORT_SYMBOL_GPL(locks_alloc_lock);
927ca606 3719@@ -311,6 +317,7 @@ void locks_init_lock(struct file_lock *f
a168f21d
AM
3720 {
3721 memset(fl, 0, sizeof(struct file_lock));
3722 locks_init_lock_heads(fl);
3723+ fl->fl_xid = -1;
3724 }
3725
3726 EXPORT_SYMBOL(locks_init_lock);
927ca606 3727@@ -328,6 +335,7 @@ void locks_copy_conflock(struct file_loc
bb20add7
AM
3728 new->fl_start = fl->fl_start;
3729 new->fl_end = fl->fl_end;
d337f35e
JR
3730 new->fl_lmops = fl->fl_lmops;
3731+ new->fl_xid = fl->fl_xid;
bb20add7 3732 new->fl_ops = NULL;
d337f35e 3733
bb20add7 3734 if (fl->fl_lmops) {
927ca606 3735@@ -389,7 +397,10 @@ flock_make_lock(struct file *filp, unsig
d337f35e
JR
3736 fl->fl_flags = FL_FLOCK;
3737 fl->fl_type = type;
3738 fl->fl_end = OFFSET_MAX;
927ca606 3739-
d337f35e
JR
3740+
3741+ vxd_assert(filp->f_xid == vx_current_xid(),
3742+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3743+ fl->fl_xid = filp->f_xid;
bb20add7
AM
3744 return fl;
3745 }
927ca606
AM
3746
3747@@ -511,6 +522,7 @@ static int lease_init(struct file *filp,
d337f35e 3748
bb20add7 3749 fl->fl_owner = filp;
d337f35e
JR
3750 fl->fl_pid = current->tgid;
3751+ fl->fl_xid = vx_current_xid();
3752
3753 fl->fl_file = filp;
3754 fl->fl_flags = FL_LEASE;
927ca606 3755@@ -530,6 +542,10 @@ static struct file_lock *lease_alloc(str
d337f35e 3756 if (fl == NULL)
2380c486 3757 return ERR_PTR(error);
d337f35e
JR
3758
3759+ fl->fl_xid = vx_current_xid();
3760+ if (filp)
3761+ vxd_assert(filp->f_xid == fl->fl_xid,
3762+ "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
d337f35e
JR
3763 error = lease_init(filp, type, fl);
3764 if (error) {
3765 locks_free_lock(fl);
927ca606
AM
3766@@ -908,6 +924,7 @@ static int flock_lock_inode(struct inode
3767 goto out;
ab30d09f 3768 }
2380c486
JR
3769
3770+ new_fl->fl_xid = -1;
3771 find_conflict:
927ca606
AM
3772 list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
3773 if (!flock_locks_conflict(request, fl))
3774@@ -934,7 +951,8 @@ out:
d337f35e
JR
3775 return error;
3776 }
3777
2380c486
JR
3778-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
3779+static int __posix_lock_file(struct inode *inode, struct file_lock *request,
61333608 3780+ struct file_lock *conflock, vxid_t xid)
d337f35e 3781 {
927ca606 3782 struct file_lock *fl, *tmp;
d337f35e 3783 struct file_lock *new_fl = NULL;
927ca606
AM
3784@@ -950,6 +968,9 @@ static int __posix_lock_file(struct inod
3785 if (!ctx)
3786 return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
d337f35e 3787
927ca606
AM
3788+ if (xid)
3789+ vxd_assert(xid == vx_current_xid(),
3790+ "xid(%d) == current(%d)", xid, vx_current_xid());
d337f35e
JR
3791 /*
3792 * We may need two file_lock structures for this operation,
3793 * so we get them in advance to avoid races.
927ca606 3794@@ -960,7 +981,11 @@ static int __posix_lock_file(struct inod
d337f35e
JR
3795 (request->fl_type != F_UNLCK ||
3796 request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
3797 new_fl = locks_alloc_lock();
3798+ new_fl->fl_xid = xid;
927ca606 3799+ // vx_locks_inc(new_fl);
d337f35e
JR
3800 new_fl2 = locks_alloc_lock();
3801+ new_fl2->fl_xid = xid;
927ca606 3802+ // vx_locks_inc(new_fl2);
d337f35e
JR
3803 }
3804
927ca606
AM
3805 spin_lock(&ctx->flc_lock);
3806@@ -1162,7 +1187,8 @@ static int __posix_lock_file(struct inod
2380c486 3807 int posix_lock_file(struct file *filp, struct file_lock *fl,
d337f35e
JR
3808 struct file_lock *conflock)
3809 {
b00e13aa
AM
3810- return __posix_lock_file(file_inode(filp), fl, conflock);
3811+ return __posix_lock_file(file_inode(filp),
d337f35e
JR
3812+ fl, conflock, filp->f_xid);
3813 }
2380c486 3814 EXPORT_SYMBOL(posix_lock_file);
d337f35e 3815
927ca606
AM
3816@@ -1178,7 +1204,7 @@ static int posix_lock_inode_wait(struct
3817 int error;
3818 might_sleep ();
3819 for (;;) {
3820- error = __posix_lock_file(inode, fl, NULL);
3821+ error = __posix_lock_file(inode, fl, NULL, 0);
3822 if (error != FILE_LOCK_DEFERRED)
3823 break;
3824 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
3825@@ -1257,10 +1283,13 @@ int locks_mandatory_area(int read_write,
3826 fl.fl_end = offset + count - 1;
3827
3828 for (;;) {
3829+ vxid_t f_xid = 0;
3830+
ca5d134c 3831 if (filp) {
bb20add7 3832 fl.fl_owner = filp;
ca5d134c
JR
3833 fl.fl_flags &= ~FL_SLEEP;
3834- error = __posix_lock_file(inode, &fl, NULL);
927ca606
AM
3835+ f_xid = filp->f_xid;
3836+ error = __posix_lock_file(inode, &fl, NULL, f_xid);
ca5d134c
JR
3837 if (!error)
3838 break;
3839 }
927ca606 3840@@ -1268,7 +1297,7 @@ int locks_mandatory_area(int read_write,
ca5d134c
JR
3841 if (sleep)
3842 fl.fl_flags |= FL_SLEEP;
3843 fl.fl_owner = current->files;
2380c486 3844- error = __posix_lock_file(inode, &fl, NULL);
927ca606 3845+ error = __posix_lock_file(inode, &fl, NULL, f_xid);
2380c486 3846 if (error != FILE_LOCK_DEFERRED)
d337f35e 3847 break;
2380c486 3848 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
927ca606 3849@@ -2165,6 +2194,11 @@ int fcntl_setlk(unsigned int fd, struct
d337f35e
JR
3850 if (file_lock == NULL)
3851 return -ENOLCK;
3852
3853+ vxd_assert(filp->f_xid == vx_current_xid(),
3854+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3855+ file_lock->fl_xid = filp->f_xid;
927ca606 3856+ // vx_locks_inc(file_lock);
d337f35e
JR
3857+
3858 /*
3859 * This might block, so we do it before checking the inode.
3860 */
1d9ad342 3861@@ -2309,6 +2343,11 @@ int fcntl_setlk64(unsigned int fd, struc
d337f35e
JR
3862 if (file_lock == NULL)
3863 return -ENOLCK;
3864
3865+ vxd_assert(filp->f_xid == vx_current_xid(),
3866+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3867+ file_lock->fl_xid = filp->f_xid;
927ca606 3868+ // vx_locks_inc(file_lock);
d337f35e
JR
3869+
3870 /*
3871 * This might block, so we do it before checking the inode.
3872 */
1d9ad342 3873@@ -2624,8 +2663,11 @@ static int locks_show(struct seq_file *f
2380c486 3874
c2e5f7c8 3875 lock_get_status(f, fl, iter->li_pos, "");
2380c486
JR
3876
3877- list_for_each_entry(bfl, &fl->fl_block, fl_block)
3878+ list_for_each_entry(bfl, &fl->fl_block, fl_block) {
3879+ if (!vx_check(fl->fl_xid, VS_WATCH_P | VS_IDENT))
d337f35e 3880+ continue;
bb20add7 3881 lock_get_status(f, bfl, iter->li_pos, " ->");
2380c486 3882+ }
d337f35e 3883
2380c486 3884 return 0;
ab30d09f 3885 }
c2806d43
AM
3886diff -urNp -x '*.orig' linux-4.4/fs/mount.h linux-4.4/fs/mount.h
3887--- linux-4.4/fs/mount.h 2021-02-24 16:56:11.772419221 +0100
3888+++ linux-4.4/fs/mount.h 2021-02-24 16:56:24.562822026 +0100
927ca606 3889@@ -68,6 +68,7 @@ struct mount {
bb20add7 3890 struct hlist_head mnt_pins;
927ca606
AM
3891 struct fs_pin mnt_umount;
3892 struct dentry *mnt_ex_mountpoint;
61333608 3893+ vtag_t mnt_tag; /* tagging used for vfsmount */
db55b927
AM
3894 };
3895
92598135 3896 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
c2806d43
AM
3897diff -urNp -x '*.orig' linux-4.4/fs/namei.c linux-4.4/fs/namei.c
3898--- linux-4.4/fs/namei.c 2021-02-24 16:56:11.772419221 +0100
3899+++ linux-4.4/fs/namei.c 2021-02-24 16:56:24.566155465 +0100
bb20add7 3900@@ -34,10 +34,20 @@
2380c486 3901 #include <linux/device_cgroup.h>
ec22aa5c 3902 #include <linux/fs_struct.h>
a168f21d 3903 #include <linux/posix_acl.h>
d337f35e 3904+#include <linux/proc_fs.h>
09be7631 3905+#include <linux/magic.h>
d337f35e
JR
3906+#include <linux/vserver/inode.h>
3907+#include <linux/vs_base.h>
3908+#include <linux/vs_tag.h>
3909+#include <linux/vs_cowbl.h>
2380c486
JR
3910+#include <linux/vs_device.h>
3911+#include <linux/vs_context.h>
3912+#include <linux/pid_namespace.h>
bb20add7 3913 #include <linux/hash.h>
d337f35e
JR
3914 #include <asm/uaccess.h>
3915
2bf5ad28 3916 #include "internal.h"
09be7631
JR
3917+#include "proc/internal.h"
3918 #include "mount.h"
3919
3920 /* [Feb-1997 T. Schoebel-Theuer]
8931d859 3921@@ -284,6 +294,93 @@ static int check_acl(struct inode *inode
a168f21d
AM
3922 return -EAGAIN;
3923 }
d337f35e 3924
7e46296a 3925+static inline int dx_barrier(const struct inode *inode)
d337f35e 3926+{
2380c486
JR
3927+ if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN | VS_WATCH)) {
3928+ vxwprintk_task(1, "did hit the barrier.");
d337f35e
JR
3929+ return 1;
3930+ }
3931+ return 0;
3932+}
3933+
7e46296a 3934+static int __dx_permission(const struct inode *inode, int mask)
d337f35e
JR
3935+{
3936+ if (dx_barrier(inode))
3937+ return -EACCES;
d337f35e 3938+
2380c486
JR
3939+ if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) {
3940+ /* devpts is xid tagged */
3941+ if (S_ISDIR(inode->i_mode) ||
61333608 3942+ vx_check((vxid_t)i_tag_read(inode), VS_IDENT | VS_WATCH_P))
2380c486 3943+ return 0;
ba86f833 3944+
adc1caaa 3945+ /* just pretend we didn't find anything */
ba86f833 3946+ return -ENOENT;
2380c486
JR
3947+ }
3948+ else if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
3949+ struct proc_dir_entry *de = PDE(inode);
3950+
bb20add7
AM
3951+ if (de && !vx_hide_check(0, de->vx_flags)) {
3952+ vxdprintk(VXD_CBIT(misc, 9),
3953+ VS_Q("%*s") " hidden by _dx_permission",
3954+ de->namelen, de->name);
2380c486 3955+ goto out;
bb20add7 3956+ }
2380c486
JR
3957+
3958+ if ((mask & (MAY_WRITE | MAY_APPEND))) {
3959+ struct pid *pid;
3960+ struct task_struct *tsk;
3961+
3962+ if (vx_check(0, VS_ADMIN | VS_WATCH_P) ||
3963+ vx_flags(VXF_STATE_SETUP, 0))
3964+ return 0;
3965+
3966+ pid = PROC_I(inode)->pid;
3967+ if (!pid)
3968+ goto out;
3969+
c6ceaf95 3970+ rcu_read_lock();
2380c486
JR
3971+ tsk = pid_task(pid, PIDTYPE_PID);
3972+ vxdprintk(VXD_CBIT(tag, 0), "accessing %p[#%u]",
3973+ tsk, (tsk ? vx_task_xid(tsk) : 0));
c6ceaf95
AM
3974+ if (tsk &&
3975+ vx_check(vx_task_xid(tsk), VS_IDENT | VS_WATCH_P)) {
3976+ rcu_read_unlock();
2380c486 3977+ return 0;
c6ceaf95
AM
3978+ }
3979+ rcu_read_unlock();
2380c486
JR
3980+ }
3981+ else {
3982+ /* FIXME: Should we block some entries here? */
3983+ return 0;
3984+ }
3985+ }
3986+ else {
3987+ if (dx_notagcheck(inode->i_sb) ||
61333608 3988+ dx_check((vxid_t)i_tag_read(inode),
537831f9 3989+ DX_HOSTID | DX_ADMIN | DX_WATCH | DX_IDENT))
2380c486
JR
3990+ return 0;
3991+ }
3992+
3993+out:
d337f35e
JR
3994+ return -EACCES;
3995+}
3996+
7e46296a 3997+int dx_permission(const struct inode *inode, int mask)
2380c486
JR
3998+{
3999+ int ret = __dx_permission(inode, mask);
4000+ if (unlikely(ret)) {
ba86f833
AM
4001+#ifndef CONFIG_VSERVER_WARN_DEVPTS
4002+ if (inode->i_sb->s_magic != DEVPTS_SUPER_MAGIC)
4003+#endif
4004+ vxwprintk_task(1,
4005+ "denied [0x%x] access to inode %s:%p[#%d,%lu]",
8ce283e1
AM
4006+ mask, inode->i_sb->s_id, inode,
4007+ i_tag_read(inode), inode->i_ino);
2380c486
JR
4008+ }
4009+ return ret;
4010+}
4011+
7e46296a 4012 /*
f6c5ef8b 4013 * This does the basic permission checking
7e46296a 4014 */
8931d859 4015@@ -408,10 +505,14 @@ int __inode_permission(struct inode *ino
d337f35e
JR
4016 /*
4017 * Nobody gets write access to an immutable file.
4018 */
4019- if (IS_IMMUTABLE(inode))
4020+ if (IS_IMMUTABLE(inode) && !IS_COW(inode))
4021 return -EACCES;
4022 }
4023
2380c486
JR
4024+ retval = dx_permission(inode, mask);
4025+ if (retval)
d337f35e 4026+ return retval;
2380c486 4027+
a168f21d
AM
4028 retval = do_inode_permission(inode, mask);
4029 if (retval)
4030 return retval;
48cb6a3c 4031@@ -1627,6 +1728,9 @@ static int lookup_fast(struct nameidata
927ca606
AM
4032 */
4033 if (negative)
4034 return -ENOENT;
be261992
AM
4035+
4036+ /* FIXME: check dx permission */
4037+
4038 path->mnt = mnt;
4039 path->dentry = dentry;
927ca606 4040 if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
48cb6a3c 4041@@ -1657,6 +1761,8 @@ unlazy:
927ca606
AM
4042 dput(dentry);
4043 return -ENOENT;
be261992 4044 }
be261992 4045+
927ca606 4046+ /* FIXME: check dx permission */
be261992
AM
4047 path->mnt = mnt;
4048 path->dentry = dentry;
927ca606 4049 err = follow_managed(path, nd);
48cb6a3c 4050@@ -2618,7 +2724,7 @@ static int may_delete(struct inode *dir,
d337f35e 4051 return -EPERM;
c2e5f7c8
JR
4052
4053 if (check_sticky(dir, inode) || IS_APPEND(inode) ||
4054- IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
4055+ IS_IXORUNLINK(inode) || IS_SWAPFILE(inode))
d337f35e
JR
4056 return -EPERM;
4057 if (isdir) {
bb20add7 4058 if (!d_is_dir(victim))
48cb6a3c 4059@@ -2700,19 +2806,25 @@ int vfs_create(struct inode *dir, struct
92598135 4060 bool want_excl)
a168f21d
AM
4061 {
4062 int error = may_create(dir, dentry);
a168f21d
AM
4063- if (error)
4064+ if (error) {
4065+ vxdprintk(VXD_CBIT(misc, 3), "may_create failed with %d", error);
537831f9 4066 return error;
a168f21d
AM
4067+ }
4068
4069 if (!dir->i_op->create)
4070 return -EACCES; /* shouldn't it be ENOSYS? */
4071 mode &= S_IALLUGO;
4072 mode |= S_IFREG;
4073 error = security_inode_create(dir, dentry, mode);
4074- if (error)
4075+ if (error) {
4076+ vxdprintk(VXD_CBIT(misc, 3), "security_inode_create failed with %d", error);
537831f9 4077 return error;
a168f21d 4078+ }
92598135 4079 error = dir->i_op->create(dir, dentry, mode, want_excl);
a168f21d
AM
4080 if (!error)
4081 fsnotify_create(dir, dentry);
4082+ else
4083+ vxdprintk(VXD_CBIT(misc, 3), "i_op->create failed with %d", error);
4084 return error;
4085 }
bb20add7 4086 EXPORT_SYMBOL(vfs_create);
48cb6a3c 4087@@ -2748,6 +2860,15 @@ static int may_open(struct path *path, i
ec22aa5c 4088 break;
2380c486 4089 }
d337f35e
JR
4090
4091+#ifdef CONFIG_VSERVER_COWBL
763640ca
JR
4092+ if (IS_COW(inode) &&
4093+ ((flag & O_ACCMODE) != O_RDONLY)) {
d337f35e
JR
4094+ if (IS_COW_LINK(inode))
4095+ return -EMLINK;
2380c486 4096+ inode->i_flags &= ~(S_IXUNLINK|S_IMMUTABLE);
d337f35e
JR
4097+ mark_inode_dirty(inode);
4098+ }
4099+#endif
ec22aa5c 4100 error = inode_permission(inode, acc_mode);
d337f35e
JR
4101 if (error)
4102 return error;
48cb6a3c 4103@@ -3233,6 +3354,16 @@ finish_open:
7b17263b 4104 }
92598135 4105 finish_open_created:
7b17263b
AM
4106 error = may_open(&nd->path, acc_mode, open_flag);
4107+#ifdef CONFIG_VSERVER_COWBL
4108+ if (error == -EMLINK) {
4109+ struct dentry *dentry;
f19bd705 4110+ dentry = cow_break_link(nd->name->name);
7b17263b
AM
4111+ if (IS_ERR(dentry))
4112+ error = PTR_ERR(dentry);
4113+ else
4114+ dput(dentry);
4115+ }
4116+#endif
4117 if (error)
92598135 4118 goto out;
bb20add7 4119
48cb6a3c 4120@@ -3357,6 +3488,9 @@ static struct file *path_openat(struct n
92598135 4121 int opened = 0;
7b17263b
AM
4122 int error;
4123
927ca606 4124+#ifdef CONFIG_VSERVER_COWBL
7b17263b 4125+restart:
927ca606 4126+#endif
92598135 4127 file = get_empty_filp();
b00e13aa
AM
4128 if (IS_ERR(file))
4129 return file;
48cb6a3c 4130@@ -3383,6 +3517,12 @@ static struct file *path_openat(struct n
f19bd705
AM
4131 }
4132 }
4133 terminate_walk(nd);
4134+#ifdef CONFIG_VSERVER_COWBL
4135+ if (error == -EMLINK) {
4136+ // path_cleanup(nd);
4137+ goto restart;
4138+ }
4139+#endif
4140 out2:
4141 if (!(opened & FILE_OPENED)) {
4142 BUG_ON(!error);
48cb6a3c 4143@@ -3503,6 +3643,11 @@ static struct dentry *filename_create(in
a168f21d
AM
4144 goto fail;
4145 }
927ca606
AM
4146 putname(name);
4147+ vxdprintk(VXD_CBIT(misc, 3), "filename_create path.dentry = %p (%.*s), dentry = %p (%.*s), d_inode = %p",
a168f21d
AM
4148+ path->dentry, path->dentry->d_name.len,
4149+ path->dentry->d_name.name, dentry,
4150+ dentry->d_name.len, dentry->d_name.name,
4151+ path->dentry->d_inode);
4152 return dentry;
92598135 4153 fail:
a168f21d 4154 dput(dentry);
48cb6a3c 4155@@ -3619,6 +3764,7 @@ retry:
927ca606
AM
4156 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
4157 break;
4158 }
4159+
927ca606
AM
4160 out:
4161 done_path_create(&path, dentry);
4162 if (retry_estale(error, lookup_flags)) {
48cb6a3c 4163@@ -4065,7 +4211,7 @@ int vfs_link(struct dentry *old_dentry,
d337f35e
JR
4164 /*
4165 * A link to an append-only or immutable file cannot be created.
4166 */
4167- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4168+ if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
4169 return -EPERM;
ec22aa5c 4170 if (!dir->i_op->link)
d337f35e 4171 return -EPERM;
48cb6a3c 4172@@ -4574,6 +4720,330 @@ int generic_readlink(struct dentry *dent
d337f35e 4173 }
bb20add7 4174 EXPORT_SYMBOL(generic_readlink);
d337f35e
JR
4175
4176+
4177+#ifdef CONFIG_VSERVER_COWBL
4178+
2380c486 4179+static inline
8de2f54c
AM
4180+void dump_path(const char *name, struct path *path)
4181+{
4182+ vxdprintk(VXD_CBIT(misc, 3),
4183+ "%s: path=%p mnt=%p dentry=%p", name, path,
4184+ path ? path->mnt : NULL,
4185+ path ? path->dentry : NULL);
4186+
4187+ if (path && path->mnt)
4188+ vxdprintk(VXD_CBIT(misc, 3),
4189+ "%s: path mnt_sb=%p[#%d,#%d] mnt_root=%p[#%d]", name,
4190+ path->mnt->mnt_sb,
4191+ path->mnt->mnt_sb ? path->mnt->mnt_sb->s_count : -1,
4192+ path->mnt->mnt_sb ? atomic_read(&path->mnt->mnt_sb->s_active) : -1,
4193+ path->mnt->mnt_root,
4194+ path->mnt->mnt_root ? path->mnt->mnt_root->d_lockref.count : -1);
4195+
4196+ if (path && path->dentry)
4197+ vxdprintk(VXD_CBIT(misc, 3),
4198+ "%s: path dentry=%p[#%d]", name,
4199+ path->dentry,
4200+ path->dentry ? path->dentry->d_lockref.count : -1);
4201+}
4202+
4203+static inline
2380c486
JR
4204+long do_cow_splice(struct file *in, struct file *out, size_t len)
4205+{
4206+ loff_t ppos = 0;
09be7631 4207+ loff_t opos = 0;
2380c486 4208+
09be7631 4209+ return do_splice_direct(in, &ppos, out, &opos, len, 0);
2380c486
JR
4210+}
4211+
d337f35e
JR
4212+struct dentry *cow_break_link(const char *pathname)
4213+{
b00e13aa 4214+ int ret, mode, pathlen, redo = 0, drop = 1;
8de2f54c 4215+ struct path old_path = {}, par_path = {}, dir_path = {}, *new_path = NULL;
a168f21d 4216+ struct dentry *dir, *old_dentry, *new_dentry = NULL;
d337f35e
JR
4217+ struct file *old_file;
4218+ struct file *new_file;
8de2f54c
AM
4219+ struct qstr new_qstr;
4220+ int new_type;
d337f35e
JR
4221+ char *to, *path, pad='\251';
4222+ loff_t size;
927ca606
AM
4223+ struct filename *filename = getname_kernel(pathname);
4224+ struct filename *to_filename;
d337f35e 4225+
ba86f833
AM
4226+ vxdprintk(VXD_CBIT(misc, 1),
4227+ "cow_break_link(" VS_Q("%s") ")", pathname);
e915af4e 4228+
d337f35e 4229+ path = kmalloc(PATH_MAX, GFP_KERNEL);
2380c486 4230+ ret = -ENOMEM;
927ca606 4231+ if (!path || IS_ERR(filename))
2380c486 4232+ goto out;
d337f35e 4233+
8de2f54c
AM
4234+ /* old_path will have refs to dentry and mnt */
4235+ ret = filename_lookup(AT_FDCWD, filename, LOOKUP_FOLLOW, &old_path, NULL);
a168f21d 4236+ vxdprintk(VXD_CBIT(misc, 2),
e915af4e 4237+ "do_path_lookup(old): %d", ret);
2380c486
JR
4238+ if (ret < 0)
4239+ goto out_free_path;
d337f35e 4240+
8de2f54c
AM
4241+ dump_path("cow (old)", &old_path);
4242+
e915af4e 4243+ /* no explicit reference for old_dentry here */
8de2f54c
AM
4244+ old_dentry = old_path.dentry;
4245+
4246+ /* speculative put */
4247+ // dput(old_dentry);
2380c486 4248+
e915af4e 4249+ mode = old_dentry->d_inode->i_mode;
8de2f54c 4250+ to = d_path(&old_path, path, PATH_MAX-2);
d337f35e 4251+ pathlen = strlen(to);
ba86f833 4252+ vxdprintk(VXD_CBIT(misc, 2),
a168f21d
AM
4253+ "old path " VS_Q("%s") " [%p:" VS_Q("%.*s") ":%d]", to,
4254+ old_dentry,
4255+ old_dentry->d_name.len, old_dentry->d_name.name,
4256+ old_dentry->d_name.len);
d337f35e 4257+
2380c486 4258+ to[pathlen + 1] = 0;
d337f35e 4259+retry:
a168f21d 4260+ new_dentry = NULL;
d337f35e 4261+ to[pathlen] = pad--;
a168f21d 4262+ ret = -ELOOP;
d337f35e
JR
4263+ if (pad <= '\240')
4264+ goto out_rel_old;
4265+
ba86f833 4266+ vxdprintk(VXD_CBIT(misc, 1), "temp copy " VS_Q("%s"), to);
e915af4e 4267+
8de2f54c 4268+ /* dir_path will have refs to dentry and mnt */
927ca606 4269+ to_filename = getname_kernel(to);
8de2f54c
AM
4270+ to_filename = filename_parentat(AT_FDCWD, to_filename,
4271+ LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE, &par_path, &new_qstr, &new_type);
4272+ vxdprintk(VXD_CBIT(misc, 2), "filename_parentat(new): %p", to_filename);
4273+ dump_path("cow (par)", &par_path);
4274+ if (IS_ERR(to_filename))
2380c486
JR
4275+ goto retry;
4276+
8de2f54c
AM
4277+ vxdprintk(VXD_CBIT(misc, 2), "to_filename refcnt=%d", to_filename->refcnt);
4278+ // putname(to_filename);
4279+
e915af4e
AM
4280+ /* this puppy downs the dir inode mutex if successful.
4281+ dir_path will hold refs to dentry and mnt and
b00e13aa 4282+ we'll have write access to the mnt */
8de2f54c 4283+ new_dentry = filename_create(AT_FDCWD, to_filename, &dir_path, 0);
a168f21d 4284+ if (!new_dentry || IS_ERR(new_dentry)) {
8de2f54c 4285+ path_put(&par_path);
a168f21d 4286+ vxdprintk(VXD_CBIT(misc, 2),
8de2f54c 4287+ "filename_create(new) failed with %ld",
a168f21d 4288+ PTR_ERR(new_dentry));
d337f35e
JR
4289+ goto retry;
4290+ }
2380c486 4291+ vxdprintk(VXD_CBIT(misc, 2),
8de2f54c 4292+ "filename_create(new): %p [" VS_Q("%.*s") ":%d]",
a168f21d
AM
4293+ new_dentry,
4294+ new_dentry->d_name.len, new_dentry->d_name.name,
4295+ new_dentry->d_name.len);
4296+
8de2f54c
AM
4297+ dump_path("cow (dir)", &dir_path);
4298+
e915af4e
AM
4299+ /* take a reference on new_dentry */
4300+ dget(new_dentry);
4301+
4302+ /* dentry/mnt refs handed over to new_path */
4303+ new_path = &dir_path;
4304+
4305+ /* dentry for old/new dir */
8de2f54c 4306+ dir = par_path.dentry;
d337f35e 4307+
e915af4e
AM
4308+ /* give up reference on dir */
4309+ dput(new_path->dentry);
4310+
4311+ /* new_dentry already has a reference */
4312+ new_path->dentry = new_dentry;
4313+
4314+ ret = vfs_create(dir->d_inode, new_dentry, mode, 1);
d337f35e
JR
4315+ vxdprintk(VXD_CBIT(misc, 2),
4316+ "vfs_create(new): %d", ret);
4317+ if (ret == -EEXIST) {
8de2f54c 4318+ path_put(&par_path);
b00e13aa 4319+ mutex_unlock(&dir->d_inode->i_mutex);
e915af4e
AM
4320+ mnt_drop_write(new_path->mnt);
4321+ path_put(new_path);
4322+ new_dentry = NULL;
d337f35e
JR
4323+ goto retry;
4324+ }
2380c486
JR
4325+ else if (ret < 0)
4326+ goto out_unlock_new;
4327+
927ca606 4328+ /* the old file went away */
2380c486 4329+ ret = -ENOENT;
a168f21d 4330+ if ((redo = d_unhashed(old_dentry)))
2380c486
JR
4331+ goto out_unlock_new;
4332+
e915af4e 4333+ /* doesn't change refs for old_path */
8de2f54c 4334+ old_file = dentry_open(&old_path, O_RDONLY, current_cred());
d337f35e
JR
4335+ vxdprintk(VXD_CBIT(misc, 2),
4336+ "dentry_open(old): %p", old_file);
a168f21d
AM
4337+ if (IS_ERR(old_file)) {
4338+ ret = PTR_ERR(old_file);
2380c486
JR
4339+ goto out_unlock_new;
4340+ }
d337f35e 4341+
e915af4e
AM
4342+ /* doesn't change refs for new_path */
4343+ new_file = dentry_open(new_path, O_WRONLY, current_cred());
d337f35e
JR
4344+ vxdprintk(VXD_CBIT(misc, 2),
4345+ "dentry_open(new): %p", new_file);
a168f21d
AM
4346+ if (IS_ERR(new_file)) {
4347+ ret = PTR_ERR(new_file);
d337f35e 4348+ goto out_fput_old;
a168f21d 4349+ }
d337f35e 4350+
8de2f54c 4351+ /* unlock the inode mutex from filename_create() */
b00e13aa
AM
4352+ mutex_unlock(&dir->d_inode->i_mutex);
4353+
4354+ /* drop write access to mnt */
4355+ mnt_drop_write(new_path->mnt);
4356+
4357+ drop = 0;
4358+
927ca606 4359+ size = i_size_read(old_file->f_path.dentry->d_inode);
2380c486
JR
4360+ ret = do_cow_splice(old_file, new_file, size);
4361+ vxdprintk(VXD_CBIT(misc, 2), "do_splice_direct: %d", ret);
4362+ if (ret < 0) {
d337f35e 4363+ goto out_fput_both;
2380c486
JR
4364+ } else if (ret < size) {
4365+ ret = -ENOSPC;
4366+ goto out_fput_both;
4367+ } else {
a168f21d
AM
4368+ struct inode *old_inode = old_dentry->d_inode;
4369+ struct inode *new_inode = new_dentry->d_inode;
2380c486
JR
4370+ struct iattr attr = {
4371+ .ia_uid = old_inode->i_uid,
4372+ .ia_gid = old_inode->i_gid,
4373+ .ia_valid = ATTR_UID | ATTR_GID
4374+ };
4375+
93de0823
AM
4376+ setattr_copy(new_inode, &attr);
4377+ mark_inode_dirty(new_inode);
2380c486 4378+ }
d337f35e 4379+
e915af4e 4380+ /* lock rename mutex */
a168f21d 4381+ mutex_lock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
2380c486
JR
4382+
4383+ /* drop out late */
4384+ ret = -ENOENT;
a168f21d 4385+ if ((redo = d_unhashed(old_dentry)))
2380c486
JR
4386+ goto out_unlock;
4387+
4388+ vxdprintk(VXD_CBIT(misc, 2),
ba86f833 4389+ "vfs_rename: [" VS_Q("%*s") ":%d] -> [" VS_Q("%*s") ":%d]",
a168f21d
AM
4390+ new_dentry->d_name.len, new_dentry->d_name.name,
4391+ new_dentry->d_name.len,
4392+ old_dentry->d_name.len, old_dentry->d_name.name,
4393+ old_dentry->d_name.len);
8de2f54c 4394+ ret = vfs_rename(par_path.dentry->d_inode, new_dentry,
eafa5b1d 4395+ old_dentry->d_parent->d_inode, old_dentry, NULL, 0);
d337f35e 4396+ vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
2380c486
JR
4397+
4398+out_unlock:
a168f21d 4399+ mutex_unlock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
d337f35e
JR
4400+
4401+out_fput_both:
4402+ vxdprintk(VXD_CBIT(misc, 3),
2380c486 4403+ "fput(new_file=%p[#%ld])", new_file,
4a036bed 4404+ atomic_long_read(&new_file->f_count));
d337f35e
JR
4405+ fput(new_file);
4406+
4407+out_fput_old:
4408+ vxdprintk(VXD_CBIT(misc, 3),
2380c486 4409+ "fput(old_file=%p[#%ld])", old_file,
4a036bed 4410+ atomic_long_read(&old_file->f_count));
d337f35e
JR
4411+ fput(old_file);
4412+
2380c486 4413+out_unlock_new:
8de2f54c
AM
4414+ /* drop references from par_path */
4415+ path_put(&par_path);
e915af4e 4416+
b00e13aa 4417+ if (drop) {
8de2f54c 4418+ /* unlock the inode mutex from filename_create() */
b00e13aa
AM
4419+ mutex_unlock(&dir->d_inode->i_mutex);
4420+
4421+ /* drop write access to mnt */
4422+ mnt_drop_write(new_path->mnt);
4423+ }
e915af4e 4424+
2380c486
JR
4425+ if (!ret)
4426+ goto out_redo;
4427+
4428+ /* error path cleanup */
c2e5f7c8 4429+ vfs_unlink(dir->d_inode, new_dentry, NULL);
2380c486
JR
4430+
4431+out_redo:
4432+ if (!redo)
4433+ goto out_rel_both;
e915af4e
AM
4434+
4435+ /* lookup dentry once again
8de2f54c
AM
4436+ old_path will be freed as old_path in out_rel_old */
4437+ ret = filename_lookup(AT_FDCWD, filename, LOOKUP_FOLLOW, &old_path, NULL);
2380c486
JR
4438+ if (ret)
4439+ goto out_rel_both;
d337f35e 4440+
e915af4e 4441+ /* drop reference on new_dentry */
a168f21d 4442+ dput(new_dentry);
8de2f54c 4443+ new_dentry = old_path.dentry;
e915af4e 4444+ dget(new_dentry);
2380c486 4445+ vxdprintk(VXD_CBIT(misc, 2),
763640ca 4446+ "do_path_lookup(redo): %p [" VS_Q("%.*s") ":%d]",
a168f21d
AM
4447+ new_dentry,
4448+ new_dentry->d_name.len, new_dentry->d_name.name,
4449+ new_dentry->d_name.len);
2380c486
JR
4450+
4451+out_rel_both:
8de2f54c 4452+ dump_path("put (new)", new_path);
e915af4e
AM
4453+ if (new_path)
4454+ path_put(new_path);
d337f35e 4455+out_rel_old:
8de2f54c
AM
4456+ dump_path("put (old)", &old_path);
4457+ path_put(&old_path);
2380c486 4458+out_free_path:
d337f35e 4459+ kfree(path);
2380c486 4460+out:
a168f21d
AM
4461+ if (ret) {
4462+ dput(new_dentry);
4463+ new_dentry = ERR_PTR(ret);
4464+ }
8de2f54c
AM
4465+ // if (!IS_ERR(filename))
4466+ // putname(filename);
a168f21d 4467+ vxdprintk(VXD_CBIT(misc, 3),
e915af4e 4468+ "cow_break_link returning with %p", new_dentry);
a168f21d 4469+ return new_dentry;
d337f35e
JR
4470+}
4471+
4472+#endif
1e8b8f9b
AM
4473+
4474+int vx_info_mnt_namespace(struct mnt_namespace *ns, char *buffer)
4475+{
4476+ struct path path;
4477+ struct vfsmount *vmnt;
4478+ char *pstr, *root;
4479+ int length = 0;
4480+
4481+ pstr = kmalloc(PATH_MAX, GFP_KERNEL);
4482+ if (!pstr)
4483+ return 0;
4484+
4485+ vmnt = &ns->root->mnt;
4486+ path.mnt = vmnt;
4487+ path.dentry = vmnt->mnt_root;
4488+ root = d_path(&path, pstr, PATH_MAX - 2);
4489+ length = sprintf(buffer + length,
4490+ "Namespace:\t%p [#%u]\n"
4491+ "RootPath:\t%s\n",
4492+ ns, atomic_read(&ns->count),
4493+ root);
4494+ kfree(pstr);
4495+ return length;
4496+}
bb20add7 4497+
265de2f7 4498+EXPORT_SYMBOL(vx_info_mnt_namespace);
d337f35e
JR
4499+
4500 /* get the link contents into pagecache */
4501 static char *page_getlink(struct dentry * dentry, struct page **ppage)
4502 {
c2806d43
AM
4503diff -urNp -x '*.orig' linux-4.4/fs/namespace.c linux-4.4/fs/namespace.c
4504--- linux-4.4/fs/namespace.c 2021-02-24 16:56:11.772419221 +0100
4505+++ linux-4.4/fs/namespace.c 2021-02-24 16:56:24.566155465 +0100
978063ce 4506@@ -24,6 +24,11 @@
09be7631 4507 #include <linux/magic.h>
52afa9bd 4508 #include <linux/bootmem.h>
bb20add7 4509 #include <linux/task_work.h>
d337f35e 4510+#include <linux/vs_base.h>
d337f35e
JR
4511+#include <linux/vs_context.h>
4512+#include <linux/vs_tag.h>
2380c486
JR
4513+#include <linux/vserver/space.h>
4514+#include <linux/vserver/global.h>
d337f35e 4515 #include "pnode.h"
db55b927
AM
4516 #include "internal.h"
4517
8931d859 4518@@ -980,6 +985,10 @@ vfs_kern_mount(struct file_system_type *
be261992
AM
4519 if (!type)
4520 return ERR_PTR(-ENODEV);
4521
4522+ if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
4523+ !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
4524+ return ERR_PTR(-EPERM);
4525+
4526 mnt = alloc_vfsmnt(name);
4527 if (!mnt)
4528 return ERR_PTR(-ENOMEM);
8931d859 4529@@ -1056,6 +1065,7 @@ static struct mount *clone_mnt(struct mo
92598135
AM
4530 mnt->mnt.mnt_root = dget(root);
4531 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
4532 mnt->mnt_parent = mnt;
c2e5f7c8
JR
4533+ mnt->mnt_tag = old->mnt_tag;
4534 lock_mount_hash();
92598135 4535 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
c2e5f7c8 4536 unlock_mount_hash();
3cc86a71 4537@@ -1651,7 +1661,8 @@ out_unlock:
c2e5f7c8
JR
4538 */
4539 static inline bool may_mount(void)
4540 {
4541- return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
4542+ return vx_ns_capable(current->nsproxy->mnt_ns->user_ns,
4543+ CAP_SYS_ADMIN, VXC_SECURE_MOUNT);
4544 }
4545
4546 /*
3cc86a71 4547@@ -2158,6 +2169,7 @@ static int do_change_type(struct path *p
763640ca
JR
4548 if (err)
4549 goto out_unlock;
4550 }
4551+ // mnt->mnt_flags = mnt_flags;
4552
c2e5f7c8 4553 lock_mount_hash();
763640ca 4554 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
3cc86a71 4555@@ -2186,12 +2198,14 @@ static bool has_locked_children(struct m
ec22aa5c 4556 * do loopback mount.
d337f35e 4557 */
537831f9 4558 static int do_loopback(struct path *path, const char *old_name,
2380c486 4559- int recurse)
61333608 4560+ vtag_t tag, unsigned long flags, int mnt_flags)
d337f35e 4561 {
ec22aa5c 4562 struct path old_path;
09be7631
JR
4563 struct mount *mnt = NULL, *old, *parent;
4564 struct mountpoint *mp;
d337f35e 4565+ int recurse = flags & MS_REC;
b00e13aa 4566 int err;
2380c486 4567+
d337f35e 4568 if (!old_name || !*old_name)
b00e13aa
AM
4569 return -EINVAL;
4570 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
3cc86a71 4571@@ -2271,7 +2285,7 @@ static int change_mount_flags(struct vfs
ec22aa5c 4572 * on it - tough luck.
d337f35e 4573 */
ec22aa5c 4574 static int do_remount(struct path *path, int flags, int mnt_flags,
d337f35e 4575- void *data)
61333608 4576+ void *data, vxid_t xid)
d337f35e
JR
4577 {
4578 int err;
ec22aa5c 4579 struct super_block *sb = path->mnt->mnt_sb;
3cc86a71 4580@@ -2779,6 +2793,7 @@ long do_mount(const char *dev_name, cons
ec22aa5c 4581 struct path path;
d337f35e
JR
4582 int retval = 0;
4583 int mnt_flags = 0;
61333608 4584+ vtag_t tag = 0;
d337f35e
JR
4585
4586 /* Discard magic */
4587 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
3cc86a71 4588@@ -2804,6 +2819,12 @@ long do_mount(const char *dev_name, cons
ec22aa5c
AM
4589 if (!(flags & MS_NOATIME))
4590 mnt_flags |= MNT_RELATIME;
d337f35e 4591
2380c486
JR
4592+ if (dx_parse_tag(data_page, &tag, 1, &mnt_flags, &flags)) {
4593+ /* FIXME: bind and re-mounts get the tag flag? */
d337f35e
JR
4594+ if (flags & (MS_BIND|MS_REMOUNT))
4595+ flags |= MS_TAGID;
4596+ }
d337f35e
JR
4597+
4598 /* Separate the per-mountpoint flags */
d337f35e
JR
4599 if (flags & MS_NOSUID)
4600 mnt_flags |= MNT_NOSUID;
3cc86a71 4601@@ -2828,15 +2849,17 @@ long do_mount(const char *dev_name, cons
bb20add7
AM
4602 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
4603 }
d337f35e 4604
b00e13aa 4605+ if (!vx_capable(CAP_SYS_ADMIN, VXC_DEV_MOUNT))
d337f35e 4606+ mnt_flags |= MNT_NODEV;
c146dd73 4607 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
ec22aa5c
AM
4608 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
4609 MS_STRICTATIME);
d337f35e
JR
4610
4611 if (flags & MS_REMOUNT)
ec22aa5c 4612 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
d337f35e
JR
4613- data_page);
4614+ data_page, tag);
4615 else if (flags & MS_BIND)
ec22aa5c
AM
4616- retval = do_loopback(&path, dev_name, flags & MS_REC);
4617+ retval = do_loopback(&path, dev_name, tag, flags, mnt_flags);
d337f35e 4618 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
ec22aa5c 4619 retval = do_change_type(&path, flags);
d337f35e 4620 else if (flags & MS_MOVE)
3cc86a71 4621@@ -2956,6 +2979,7 @@ struct mnt_namespace *copy_mnt_ns(unsign
c2e5f7c8 4622 p = next_mnt(p, old);
d337f35e 4623 }
09be7631 4624 namespace_unlock();
2380c486
JR
4625+ atomic_inc(&vs_global_mnt_ns);
4626
4627 if (rootmnt)
4628 mntput(rootmnt);
3cc86a71 4629@@ -3131,9 +3155,10 @@ SYSCALL_DEFINE2(pivot_root, const char _
db55b927
AM
4630 new_mnt = real_mount(new.mnt);
4631 root_mnt = real_mount(root.mnt);
09be7631
JR
4632 old_mnt = real_mount(old.mnt);
4633- if (IS_MNT_SHARED(old_mnt) ||
4634+ if ((IS_MNT_SHARED(old_mnt) ||
db55b927
AM
4635 IS_MNT_SHARED(new_mnt->mnt_parent) ||
4636- IS_MNT_SHARED(root_mnt->mnt_parent))
4637+ IS_MNT_SHARED(root_mnt->mnt_parent)) &&
50e68740 4638+ !vx_flags(VXF_STATE_SETUP, 0))
763640ca 4639 goto out4;
db55b927 4640 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
763640ca 4641 goto out4;
3cc86a71 4642@@ -3271,6 +3296,7 @@ void put_mnt_ns(struct mnt_namespace *ns
c2e5f7c8
JR
4643 if (!atomic_dec_and_test(&ns->count))
4644 return;
4645 drop_collected_mounts(&ns->root->mnt);
2380c486 4646+ atomic_dec(&vs_global_mnt_ns);
b00e13aa 4647 free_mnt_ns(ns);
2380c486 4648 }
db55b927 4649
c2806d43
AM
4650diff -urNp -x '*.orig' linux-4.4/fs/nfs/client.c linux-4.4/fs/nfs/client.c
4651--- linux-4.4/fs/nfs/client.c 2016-01-11 00:01:32.000000000 +0100
4652+++ linux-4.4/fs/nfs/client.c 2021-02-24 16:56:24.566155465 +0100
927ca606 4653@@ -583,6 +583,9 @@ int nfs_init_server_rpcclient(struct nfs
2380c486
JR
4654 if (server->flags & NFS_MOUNT_SOFT)
4655 server->client->cl_softrtry = 1;
d337f35e
JR
4656
4657+ server->client->cl_tag = 0;
4658+ if (server->flags & NFS_MOUNT_TAGGED)
4659+ server->client->cl_tag = 1;
4660 return 0;
4661 }
92598135 4662 EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);
927ca606 4663@@ -760,6 +763,10 @@ static void nfs_server_set_fsinfo(struct
d337f35e
JR
4664 server->acdirmin = server->acdirmax = 0;
4665 }
4666
4667+ /* FIXME: needs fsinfo
4668+ if (server->flags & NFS_MOUNT_TAGGED)
4669+ sb->s_flags |= MS_TAGGED; */
4670+
4671 server->maxfilesize = fsinfo->maxfilesize;
4672
ab30d09f 4673 server->time_delta = fsinfo->time_delta;
c2806d43
AM
4674diff -urNp -x '*.orig' linux-4.4/fs/nfs/dir.c linux-4.4/fs/nfs/dir.c
4675--- linux-4.4/fs/nfs/dir.c 2021-02-24 16:56:11.775752660 +0100
4676+++ linux-4.4/fs/nfs/dir.c 2021-02-24 16:56:24.566155465 +0100
c2e5f7c8 4677@@ -37,6 +37,7 @@
2380c486 4678 #include <linux/sched.h>
ab30d09f 4679 #include <linux/kmemleak.h>
d33d7b00 4680 #include <linux/xattr.h>
d337f35e
JR
4681+#include <linux/vs_tag.h>
4682
d337f35e 4683 #include "delegation.h"
ab30d09f 4684 #include "iostat.h"
c2806d43 4685@@ -1420,6 +1421,7 @@ struct dentry *nfs_lookup(struct inode *
42bc425c
AM
4686 /* Success: notify readdir to use READDIRPLUS */
4687 nfs_advise_use_readdirplus(dir);
d337f35e
JR
4688
4689+ dx_propagate_tag(nd, inode);
4690 no_entry:
927ca606 4691 res = d_splice_alias(inode, dentry);
d337f35e 4692 if (res != NULL) {
c2806d43
AM
4693diff -urNp -x '*.orig' linux-4.4/fs/nfs/inode.c linux-4.4/fs/nfs/inode.c
4694--- linux-4.4/fs/nfs/inode.c 2021-02-24 16:56:11.779086098 +0100
4695+++ linux-4.4/fs/nfs/inode.c 2021-02-24 16:56:24.566155465 +0100
c2e5f7c8
JR
4696@@ -38,6 +38,7 @@
4697 #include <linux/slab.h>
d33d7b00 4698 #include <linux/compat.h>
db55b927 4699 #include <linux/freezer.h>
d337f35e
JR
4700+#include <linux/vs_tag.h>
4701
d337f35e 4702 #include <asm/uaccess.h>
1e8b8f9b 4703
927ca606 4704@@ -376,6 +377,8 @@ nfs_fhget(struct super_block *sb, struct
ec22aa5c
AM
4705 if (inode->i_state & I_NEW) {
4706 struct nfs_inode *nfsi = NFS_I(inode);
4707 unsigned long now = jiffies;
a4a22af8
AM
4708+ kuid_t kuid;
4709+ kgid_t kgid;
ec22aa5c
AM
4710
4711 /* We set i_ino for the few things that still rely on it,
4712 * such as stat(2) */
927ca606 4713@@ -419,8 +422,8 @@ nfs_fhget(struct super_block *sb, struct
f6c5ef8b 4714 inode->i_version = 0;
ec22aa5c 4715 inode->i_size = 0;
f6c5ef8b 4716 clear_nlink(inode);
b00e13aa
AM
4717- inode->i_uid = make_kuid(&init_user_ns, -2);
4718- inode->i_gid = make_kgid(&init_user_ns, -2);
a4a22af8
AM
4719+ kuid = make_kuid(&init_user_ns, -2);
4720+ kgid = make_kgid(&init_user_ns, -2);
ec22aa5c
AM
4721 inode->i_blocks = 0;
4722 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
42bc425c 4723 nfsi->write_io = 0;
927ca606 4724@@ -455,11 +458,11 @@ nfs_fhget(struct super_block *sb, struct
7e46296a 4725 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
bb20add7 4726 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
ec22aa5c
AM
4727 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
4728- inode->i_uid = fattr->uid;
a4a22af8 4729+ kuid = fattr->uid;
7e46296a 4730 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
bb20add7 4731 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
ec22aa5c
AM
4732 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
4733- inode->i_gid = fattr->gid;
a4a22af8 4734+ kgid = fattr->gid;
7e46296a 4735 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
bb20add7 4736 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
42bc425c 4737 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
927ca606 4738@@ -470,6 +473,10 @@ nfs_fhget(struct super_block *sb, struct
ec22aa5c
AM
4739 */
4740 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
4741 }
a4a22af8
AM
4742+ inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
4743+ inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
4744+ inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, GLOBAL_ROOT_TAG);
ec22aa5c 4745+ /* maybe fattr->xid someday */
c2e5f7c8
JR
4746
4747 nfs_setsecurity(inode, fattr, label);
4748
927ca606 4749@@ -611,6 +618,8 @@ void nfs_setattr_update_inode(struct ino
d337f35e
JR
4750 inode->i_uid = attr->ia_uid;
4751 if ((attr->ia_valid & ATTR_GID) != 0)
4752 inode->i_gid = attr->ia_gid;
4753+ if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
4754+ inode->i_tag = attr->ia_tag;
bb20add7
AM
4755 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
4756 | NFS_INO_INVALID_ACL);
927ca606 4757 }
3cc86a71 4758@@ -1236,7 +1245,9 @@ static int nfs_check_inode_attributes(st
d337f35e
JR
4759 struct nfs_inode *nfsi = NFS_I(inode);
4760 loff_t cur_size, new_isize;
2380c486 4761 unsigned long invalid = 0;
a4a22af8 4762-
b00e13aa
AM
4763+ kuid_t kuid;
4764+ kgid_t kgid;
4765+ ktag_t ktag;
d337f35e 4766
42bc425c 4767 if (nfs_have_delegated_attributes(inode))
a4a22af8 4768 return 0;
3cc86a71 4769@@ -1263,13 +1274,18 @@ static int nfs_check_inode_attributes(st
927ca606
AM
4770 if (nfsi->nrequests != 0)
4771 invalid &= ~NFS_INO_REVAL_PAGECACHE;
d337f35e 4772
a4a22af8
AM
4773+ kuid = INOTAG_KUID(DX_TAG(inode), fattr->uid, fattr->gid);
4774+ kgid = INOTAG_KGID(DX_TAG(inode), fattr->uid, fattr->gid);
4775+ ktag = INOTAG_KTAG(DX_TAG(inode), fattr->uid, fattr->gid, GLOBAL_ROOT_TAG);
d337f35e
JR
4776+
4777 /* Have any file permissions changed? */
ec22aa5c 4778 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
9474138d 4779 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
b00e13aa
AM
4780- if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
4781+ if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, kuid))
ec22aa5c 4782 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
b00e13aa
AM
4783- if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
4784+ if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, kgid))
ec22aa5c
AM
4785 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4786+ /* maybe check for tag too? */
d337f35e
JR
4787
4788 /* Has the link count changed? */
ec22aa5c 4789 if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
3cc86a71 4790@@ -1643,6 +1659,9 @@ static int nfs_update_inode(struct inode
2380c486 4791 unsigned long now = jiffies;
7e46296a 4792 unsigned long save_cache_validity;
927ca606 4793 bool cache_revalidated = true;
a4a22af8
AM
4794+ kuid_t kuid;
4795+ kgid_t kgid;
4796+ ktag_t ktag;
d337f35e 4797
bb20add7 4798 dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
2380c486 4799 __func__, inode->i_sb->s_id, inode->i_ino,
3cc86a71 4800@@ -1753,6 +1772,9 @@ static int nfs_update_inode(struct inode
927ca606
AM
4801 cache_revalidated = false;
4802 }
d337f35e 4803
a4a22af8
AM
4804+ kuid = TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag);
4805+ kgid = TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag);
4806+ ktag = TAGINO_KTAG(DX_TAG(inode), inode->i_tag);
ec22aa5c
AM
4807
4808 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
4809 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
3cc86a71 4810@@ -1807,6 +1829,10 @@ static int nfs_update_inode(struct inode
927ca606
AM
4811 cache_revalidated = false;
4812 }
ec22aa5c 4813
a4a22af8
AM
4814+ inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
4815+ inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
4816+ inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, ktag);
ec22aa5c
AM
4817+
4818 if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
4819 if (inode->i_nlink != fattr->nlink) {
4820 invalid |= NFS_INO_INVALID_ATTR;
c2806d43
AM
4821diff -urNp -x '*.orig' linux-4.4/fs/nfs/nfs3xdr.c linux-4.4/fs/nfs/nfs3xdr.c
4822--- linux-4.4/fs/nfs/nfs3xdr.c 2016-01-11 00:01:32.000000000 +0100
4823+++ linux-4.4/fs/nfs/nfs3xdr.c 2021-02-24 16:56:24.566155465 +0100
78865d5b 4824@@ -20,6 +20,7 @@
d337f35e
JR
4825 #include <linux/nfs3.h>
4826 #include <linux/nfs_fs.h>
4827 #include <linux/nfsacl.h>
4828+#include <linux/vs_tag.h>
4829 #include "internal.h"
4830
4831 #define NFSDBG_FACILITY NFSDBG_XDR
b00e13aa 4832@@ -558,7 +559,8 @@ static __be32 *xdr_decode_nfstime3(__be3
d33d7b00
AM
4833 * set_mtime mtime;
4834 * };
4835 */
4836-static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
4837+static void encode_sattr3(struct xdr_stream *xdr,
4838+ const struct iattr *attr, int tag)
d337f35e 4839 {
d33d7b00
AM
4840 u32 nbytes;
4841 __be32 *p;
b00e13aa 4842@@ -590,15 +592,19 @@ static void encode_sattr3(struct xdr_str
d33d7b00 4843 } else
d337f35e 4844 *p++ = xdr_zero;
d33d7b00 4845
d337f35e
JR
4846- if (attr->ia_valid & ATTR_UID) {
4847+ if (attr->ia_valid & ATTR_UID ||
4848+ (tag && (attr->ia_valid & ATTR_TAG))) {
4849 *p++ = xdr_one;
b00e13aa 4850- *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
a4a22af8
AM
4851+ *p++ = cpu_to_be32(from_kuid(&init_user_ns,
4852+ TAGINO_KUID(tag, attr->ia_uid, attr->ia_tag)));
d33d7b00 4853 } else
d337f35e 4854 *p++ = xdr_zero;
d33d7b00 4855
d337f35e
JR
4856- if (attr->ia_valid & ATTR_GID) {
4857+ if (attr->ia_valid & ATTR_GID ||
4858+ (tag && (attr->ia_valid & ATTR_TAG))) {
4859 *p++ = xdr_one;
b00e13aa 4860- *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
a4a22af8
AM
4861+ *p++ = cpu_to_be32(from_kgid(&init_user_ns,
4862+ TAGINO_KGID(tag, attr->ia_gid, attr->ia_tag)));
d33d7b00 4863 } else
d337f35e 4864 *p++ = xdr_zero;
d33d7b00 4865
b00e13aa 4866@@ -887,7 +893,7 @@ static void nfs3_xdr_enc_setattr3args(st
d33d7b00 4867 const struct nfs3_sattrargs *args)
d337f35e 4868 {
d33d7b00
AM
4869 encode_nfs_fh3(xdr, args->fh);
4870- encode_sattr3(xdr, args->sattr);
4871+ encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
4872 encode_sattrguard3(xdr, args);
4873 }
d337f35e 4874
b00e13aa 4875@@ -1037,13 +1043,13 @@ static void nfs3_xdr_enc_write3args(stru
d33d7b00
AM
4876 * };
4877 */
4878 static void encode_createhow3(struct xdr_stream *xdr,
4879- const struct nfs3_createargs *args)
4880+ const struct nfs3_createargs *args, int tag)
d337f35e 4881 {
d33d7b00
AM
4882 encode_uint32(xdr, args->createmode);
4883 switch (args->createmode) {
4884 case NFS3_CREATE_UNCHECKED:
4885 case NFS3_CREATE_GUARDED:
4886- encode_sattr3(xdr, args->sattr);
4887+ encode_sattr3(xdr, args->sattr, tag);
4888 break;
4889 case NFS3_CREATE_EXCLUSIVE:
4890 encode_createverf3(xdr, args->verifier);
b00e13aa 4891@@ -1058,7 +1064,7 @@ static void nfs3_xdr_enc_create3args(str
d33d7b00
AM
4892 const struct nfs3_createargs *args)
4893 {
4894 encode_diropargs3(xdr, args->fh, args->name, args->len);
4895- encode_createhow3(xdr, args);
4896+ encode_createhow3(xdr, args, req->rq_task->tk_client->cl_tag);
4897 }
4898
4899 /*
b00e13aa 4900@@ -1074,7 +1080,7 @@ static void nfs3_xdr_enc_mkdir3args(stru
d33d7b00
AM
4901 const struct nfs3_mkdirargs *args)
4902 {
4903 encode_diropargs3(xdr, args->fh, args->name, args->len);
4904- encode_sattr3(xdr, args->sattr);
4905+ encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
d337f35e 4906 }
d33d7b00
AM
4907
4908 /*
b00e13aa 4909@@ -1091,9 +1097,9 @@ static void nfs3_xdr_enc_mkdir3args(stru
d33d7b00
AM
4910 * };
4911 */
4912 static void encode_symlinkdata3(struct xdr_stream *xdr,
4913- const struct nfs3_symlinkargs *args)
4914+ const struct nfs3_symlinkargs *args, int tag)
4915 {
4916- encode_sattr3(xdr, args->sattr);
4917+ encode_sattr3(xdr, args->sattr, tag);
4918 encode_nfspath3(xdr, args->pages, args->pathlen);
4919 }
4920
b00e13aa 4921@@ -1102,7 +1108,7 @@ static void nfs3_xdr_enc_symlink3args(st
d33d7b00
AM
4922 const struct nfs3_symlinkargs *args)
4923 {
4924 encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
4925- encode_symlinkdata3(xdr, args);
4926+ encode_symlinkdata3(xdr, args, req->rq_task->tk_client->cl_tag);
927ca606 4927 xdr->buf->flags |= XDRBUF_WRITE;
d33d7b00
AM
4928 }
4929
927ca606 4930@@ -1131,24 +1137,24 @@ static void nfs3_xdr_enc_symlink3args(st
d33d7b00
AM
4931 * };
4932 */
4933 static void encode_devicedata3(struct xdr_stream *xdr,
4934- const struct nfs3_mknodargs *args)
4935+ const struct nfs3_mknodargs *args, int tag)
4936 {
4937- encode_sattr3(xdr, args->sattr);
4938+ encode_sattr3(xdr, args->sattr, tag);
4939 encode_specdata3(xdr, args->rdev);
4940 }
4941
4942 static void encode_mknoddata3(struct xdr_stream *xdr,
4943- const struct nfs3_mknodargs *args)
4944+ const struct nfs3_mknodargs *args, int tag)
4945 {
4946 encode_ftype3(xdr, args->type);
4947 switch (args->type) {
4948 case NF3CHR:
4949 case NF3BLK:
4950- encode_devicedata3(xdr, args);
4951+ encode_devicedata3(xdr, args, tag);
4952 break;
4953 case NF3SOCK:
4954 case NF3FIFO:
4955- encode_sattr3(xdr, args->sattr);
4956+ encode_sattr3(xdr, args->sattr, tag);
4957 break;
4958 case NF3REG:
4959 case NF3DIR:
927ca606 4960@@ -1163,7 +1169,7 @@ static void nfs3_xdr_enc_mknod3args(stru
d33d7b00 4961 const struct nfs3_mknodargs *args)
d337f35e 4962 {
d33d7b00
AM
4963 encode_diropargs3(xdr, args->fh, args->name, args->len);
4964- encode_mknoddata3(xdr, args);
4965+ encode_mknoddata3(xdr, args, req->rq_task->tk_client->cl_tag);
4966 }
4967
4968 /*
c2806d43
AM
4969diff -urNp -x '*.orig' linux-4.4/fs/nfs/super.c linux-4.4/fs/nfs/super.c
4970--- linux-4.4/fs/nfs/super.c 2021-02-24 16:56:11.785752975 +0100
4971+++ linux-4.4/fs/nfs/super.c 2021-02-24 16:56:24.569488903 +0100
927ca606 4972@@ -54,6 +54,7 @@
b00e13aa 4973 #include <linux/parser.h>
1e8b8f9b
AM
4974 #include <linux/nsproxy.h>
4975 #include <linux/rcupdate.h>
d337f35e
JR
4976+#include <linux/vs_tag.h>
4977
d337f35e 4978 #include <asm/uaccess.h>
1e8b8f9b 4979
927ca606 4980@@ -102,6 +103,7 @@ enum {
1e8b8f9b 4981 Opt_mountport,
ab30d09f 4982 Opt_mountvers,
ab30d09f
AM
4983 Opt_minorversion,
4984+ Opt_tagid,
4985
4986 /* Mount options that take string arguments */
1e8b8f9b 4987 Opt_nfsvers,
927ca606 4988@@ -114,6 +116,9 @@ enum {
537831f9
AM
4989 /* Special mount options */
4990 Opt_userspace, Opt_deprecated, Opt_sloppy,
4991
4992+ /* Linux-VServer tagging options */
4993+ Opt_tag, Opt_notag,
4994+
4995 Opt_err
4996 };
4997
927ca606 4998@@ -183,6 +188,10 @@ static const match_table_t nfs_mount_opt
537831f9
AM
4999 { Opt_fscache_uniq, "fsc=%s" },
5000 { Opt_local_lock, "local_lock=%s" },
ab30d09f
AM
5001
5002+ { Opt_tag, "tag" },
5003+ { Opt_notag, "notag" },
5004+ { Opt_tagid, "tagid=%u" },
5005+
537831f9
AM
5006 /* The following needs to be listed after all other options */
5007 { Opt_nfsvers, "v%s" },
ab30d09f 5008
927ca606 5009@@ -642,6 +651,7 @@ static void nfs_show_mount_options(struc
2380c486 5010 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
ec22aa5c
AM
5011 { NFS_MOUNT_UNSHARED, ",nosharecache", "" },
5012 { NFS_MOUNT_NORESVPORT, ",noresvport", "" },
d337f35e
JR
5013+ { NFS_MOUNT_TAGGED, ",tag", "" },
5014 { 0, NULL, NULL }
5015 };
5016 const struct proc_nfs_info *nfs_infop;
927ca606 5017@@ -1324,6 +1334,14 @@ static int nfs_parse_mount_options(char
537831f9 5018 case Opt_nomigration:
927ca606 5019 mnt->options &= ~NFS_OPTION_MIGRATION;
ab30d09f
AM
5020 break;
5021+#ifndef CONFIG_TAGGING_NONE
5022+ case Opt_tag:
5023+ mnt->flags |= NFS_MOUNT_TAGGED;
5024+ break;
5025+ case Opt_notag:
5026+ mnt->flags &= ~NFS_MOUNT_TAGGED;
5027+ break;
5028+#endif
5029
5030 /*
5031 * options that take numeric values
927ca606 5032@@ -1410,6 +1428,12 @@ static int nfs_parse_mount_options(char
ab30d09f
AM
5033 goto out_invalid_value;
5034 mnt->minorversion = option;
5035 break;
5036+#ifdef CONFIG_PROPAGATE
5037+ case Opt_tagid:
5038+ /* use args[0] */
5039+ nfs_data.flags |= NFS_MOUNT_TAGGED;
5040+ break;
5041+#endif
5042
5043 /*
5044 * options that take text values
c2806d43
AM
5045diff -urNp -x '*.orig' linux-4.4/fs/nfsd/auth.c linux-4.4/fs/nfsd/auth.c
5046--- linux-4.4/fs/nfsd/auth.c 2021-02-24 16:56:11.785752975 +0100
5047+++ linux-4.4/fs/nfsd/auth.c 2021-02-24 16:56:24.569488903 +0100
bb20add7
AM
5048@@ -1,6 +1,7 @@
5049 /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
2bf5ad28
AM
5050
5051 #include <linux/sched.h>
d337f35e 5052+#include <linux/vs_tag.h>
2bf5ad28 5053 #include "nfsd.h"
2380c486 5054 #include "auth.h"
d337f35e 5055
bb20add7 5056@@ -35,6 +36,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
d337f35e 5057
ec22aa5c
AM
5058 new->fsuid = rqstp->rq_cred.cr_uid;
5059 new->fsgid = rqstp->rq_cred.cr_gid;
5060+ /* FIXME: this desperately needs a tag :)
61333608 5061+ new->xid = (vxid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0);
ec22aa5c 5062+ */
d337f35e 5063
ec22aa5c
AM
5064 rqgi = rqstp->rq_cred.cr_group_info;
5065
c2806d43
AM
5066diff -urNp -x '*.orig' linux-4.4/fs/nfsd/nfs3xdr.c linux-4.4/fs/nfsd/nfs3xdr.c
5067--- linux-4.4/fs/nfsd/nfs3xdr.c 2021-02-24 16:56:11.785752975 +0100
5068+++ linux-4.4/fs/nfsd/nfs3xdr.c 2021-02-24 16:56:24.569488903 +0100
b00e13aa 5069@@ -8,6 +8,7 @@
2bf5ad28
AM
5070
5071 #include <linux/namei.h>
b00e13aa 5072 #include <linux/sunrpc/svc_xprt.h>
d337f35e 5073+#include <linux/vs_tag.h>
2bf5ad28 5074 #include "xdr3.h"
2380c486 5075 #include "auth.h"
b00e13aa
AM
5076 #include "netns.h"
5077@@ -98,6 +99,8 @@ static __be32 *
d337f35e
JR
5078 decode_sattr3(__be32 *p, struct iattr *iap)
5079 {
5080 u32 tmp;
a4a22af8
AM
5081+ kuid_t kuid = GLOBAL_ROOT_UID;
5082+ kgid_t kgid = GLOBAL_ROOT_GID;
d337f35e
JR
5083
5084 iap->ia_valid = 0;
5085
b00e13aa
AM
5086@@ -106,15 +109,18 @@ decode_sattr3(__be32 *p, struct iattr *i
5087 iap->ia_mode = ntohl(*p++);
d337f35e
JR
5088 }
5089 if (*p++) {
b00e13aa 5090- iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++));
a4a22af8 5091+ kuid = make_kuid(&init_user_ns, ntohl(*p++));
b00e13aa
AM
5092 if (uid_valid(iap->ia_uid))
5093 iap->ia_valid |= ATTR_UID;
d337f35e
JR
5094 }
5095 if (*p++) {
b00e13aa 5096- iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++));
a4a22af8 5097+ kgid = make_kgid(&init_user_ns, ntohl(*p++));
b00e13aa
AM
5098 if (gid_valid(iap->ia_gid))
5099 iap->ia_valid |= ATTR_GID;
d337f35e 5100 }
a4a22af8
AM
5101+ iap->ia_uid = INOTAG_KUID(DX_TAG_NFSD, kuid, kgid);
5102+ iap->ia_gid = INOTAG_KGID(DX_TAG_NFSD, kuid, kgid);
5103+ iap->ia_tag = INOTAG_KTAG(DX_TAG_NFSD, kuid, kgid, GLOBAL_ROOT_TAG);
d337f35e
JR
5104 if (*p++) {
5105 u64 newsize;
5106
bb20add7 5107@@ -167,8 +173,12 @@ encode_fattr3(struct svc_rqst *rqstp, __
d337f35e 5108 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
bb20add7 5109 *p++ = htonl((u32) (stat->mode & S_IALLUGO));
d337f35e 5110 *p++ = htonl((u32) stat->nlink);
b00e13aa
AM
5111- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
5112- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
5113+ *p++ = htonl((u32) from_kuid(&init_user_ns,
a4a22af8 5114+ TAGINO_KUID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
2380c486 5115+ stat->uid, stat->tag)));
b00e13aa 5116+ *p++ = htonl((u32) from_kgid(&init_user_ns,
a4a22af8 5117+ TAGINO_KGID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
2380c486 5118+ stat->gid, stat->tag)));
d337f35e
JR
5119 if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
5120 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
5121 } else {
c2806d43
AM
5122diff -urNp -x '*.orig' linux-4.4/fs/nfsd/nfs4xdr.c linux-4.4/fs/nfsd/nfs4xdr.c
5123--- linux-4.4/fs/nfsd/nfs4xdr.c 2021-02-24 16:56:11.789086413 +0100
5124+++ linux-4.4/fs/nfsd/nfs4xdr.c 2021-02-24 16:56:24.569488903 +0100
927ca606 5125@@ -40,6 +40,7 @@
d33d7b00 5126 #include <linux/utsname.h>
a168f21d 5127 #include <linux/pagemap.h>
2380c486 5128 #include <linux/sunrpc/svcauth_gss.h>
d337f35e
JR
5129+#include <linux/vs_tag.h>
5130
d33d7b00
AM
5131 #include "idmap.h"
5132 #include "acl.h"
8931d859 5133@@ -2639,12 +2640,16 @@ out_acl:
bb20add7 5134 *p++ = cpu_to_be32(stat.nlink);
d337f35e
JR
5135 }
5136 if (bmval1 & FATTR4_WORD1_OWNER) {
bb20add7
AM
5137- status = nfsd4_encode_user(xdr, rqstp, stat.uid);
5138+ status = nfsd4_encode_user(xdr, rqstp,
a4a22af8 5139+ TAGINO_KUID(DX_TAG(dentry->d_inode),
bb20add7 5140+ stat.uid, stat.tag));
d337f35e
JR
5141 if (status)
5142 goto out;
5143 }
5144 if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
bb20add7
AM
5145- status = nfsd4_encode_group(xdr, rqstp, stat.gid);
5146+ status = nfsd4_encode_group(xdr, rqstp,
a4a22af8 5147+ TAGINO_KGID(DX_TAG(dentry->d_inode),
bb20add7 5148+ stat.gid, stat.tag));
d337f35e 5149 if (status)
f15949f2
JR
5150 goto out;
5151 }
c2806d43
AM
5152diff -urNp -x '*.orig' linux-4.4/fs/nfsd/nfsxdr.c linux-4.4/fs/nfsd/nfsxdr.c
5153--- linux-4.4/fs/nfsd/nfsxdr.c 2021-02-24 16:56:11.789086413 +0100
5154+++ linux-4.4/fs/nfsd/nfsxdr.c 2021-02-24 16:56:24.569488903 +0100
b00e13aa
AM
5155@@ -7,6 +7,7 @@
5156 #include "vfs.h"
2bf5ad28 5157 #include "xdr.h"
2380c486 5158 #include "auth.h"
2bf5ad28 5159+#include <linux/vs_tag.h>
d337f35e
JR
5160
5161 #define NFSDDBG_FACILITY NFSDDBG_XDR
2bf5ad28 5162
b00e13aa 5163@@ -89,6 +90,8 @@ static __be32 *
d337f35e
JR
5164 decode_sattr(__be32 *p, struct iattr *iap)
5165 {
5166 u32 tmp, tmp1;
a4a22af8
AM
5167+ kuid_t kuid = GLOBAL_ROOT_UID;
5168+ kgid_t kgid = GLOBAL_ROOT_GID;
d337f35e
JR
5169
5170 iap->ia_valid = 0;
5171
b00e13aa
AM
5172@@ -101,15 +104,18 @@ decode_sattr(__be32 *p, struct iattr *ia
5173 iap->ia_mode = tmp;
d337f35e
JR
5174 }
5175 if ((tmp = ntohl(*p++)) != (u32)-1) {
b00e13aa 5176- iap->ia_uid = make_kuid(&init_user_ns, tmp);
a4a22af8 5177+ kuid = make_kuid(&init_user_ns, tmp);
b00e13aa
AM
5178 if (uid_valid(iap->ia_uid))
5179 iap->ia_valid |= ATTR_UID;
d337f35e
JR
5180 }
5181 if ((tmp = ntohl(*p++)) != (u32)-1) {
b00e13aa 5182- iap->ia_gid = make_kgid(&init_user_ns, tmp);
a4a22af8 5183+ kgid = make_kgid(&init_user_ns, tmp);
b00e13aa
AM
5184 if (gid_valid(iap->ia_gid))
5185 iap->ia_valid |= ATTR_GID;
d337f35e 5186 }
a4a22af8
AM
5187+ iap->ia_uid = INOTAG_KUID(DX_TAG_NFSD, kuid, kgid);
5188+ iap->ia_gid = INOTAG_KGID(DX_TAG_NFSD, kuid, kgid);
5189+ iap->ia_tag = INOTAG_KTAG(DX_TAG_NFSD, kuid, kgid, GLOBAL_ROOT_TAG);
d337f35e
JR
5190 if ((tmp = ntohl(*p++)) != (u32)-1) {
5191 iap->ia_valid |= ATTR_SIZE;
5192 iap->ia_size = tmp;
b00e13aa 5193@@ -154,8 +160,10 @@ encode_fattr(struct svc_rqst *rqstp, __b
d337f35e
JR
5194 *p++ = htonl(nfs_ftypes[type >> 12]);
5195 *p++ = htonl((u32) stat->mode);
5196 *p++ = htonl((u32) stat->nlink);
b00e13aa
AM
5197- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
5198- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
5199+ *p++ = htonl((u32) from_kuid(&init_user_ns,
a4a22af8 5200+ TAGINO_KUID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
b00e13aa 5201+ *p++ = htonl((u32) from_kgid(&init_user_ns,
a4a22af8 5202+ TAGINO_KGID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
d337f35e
JR
5203
5204 if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
5205 *p++ = htonl(NFS_MAXPATHLEN);
c2806d43
AM
5206diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/dlmglue.c linux-4.4/fs/ocfs2/dlmglue.c
5207--- linux-4.4/fs/ocfs2/dlmglue.c 2021-02-24 16:56:11.799086728 +0100
5208+++ linux-4.4/fs/ocfs2/dlmglue.c 2021-02-24 16:56:24.569488903 +0100
927ca606 5209@@ -2128,6 +2128,7 @@ static void __ocfs2_stuff_meta_lvb(struc
d337f35e 5210 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
b00e13aa
AM
5211 lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode));
5212 lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
a4a22af8 5213+ lvb->lvb_itag = cpu_to_be16(i_tag_read(inode));
d337f35e
JR
5214 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
5215 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
5216 lvb->lvb_iatime_packed =
927ca606 5217@@ -2178,6 +2179,7 @@ static void ocfs2_refresh_inode_from_lvb
d337f35e 5218
b00e13aa
AM
5219 i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
5220 i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
5221+ i_tag_write(inode, be16_to_cpu(lvb->lvb_itag));
d337f35e 5222 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
f6c5ef8b 5223 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
d337f35e 5224 ocfs2_unpack_timespec(&inode->i_atime,
c2806d43
AM
5225diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/dlmglue.h linux-4.4/fs/ocfs2/dlmglue.h
5226--- linux-4.4/fs/ocfs2/dlmglue.h 2021-02-24 16:56:11.799086728 +0100
5227+++ linux-4.4/fs/ocfs2/dlmglue.h 2021-02-24 16:56:24.569488903 +0100
2380c486
JR
5228@@ -46,7 +46,8 @@ struct ocfs2_meta_lvb {
5229 __be16 lvb_inlink;
5230 __be32 lvb_iattr;
5231 __be32 lvb_igeneration;
5232- __be32 lvb_reserved2;
d337f35e 5233+ __be16 lvb_itag;
2380c486
JR
5234+ __be16 lvb_reserved2;
5235 };
5236
ec22aa5c 5237 #define OCFS2_QINFO_LVB_VERSION 1
c2806d43
AM
5238diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/file.c linux-4.4/fs/ocfs2/file.c
5239--- linux-4.4/fs/ocfs2/file.c 2021-02-24 16:56:11.802420166 +0100
5240+++ linux-4.4/fs/ocfs2/file.c 2021-02-24 16:56:24.569488903 +0100
927ca606 5241@@ -1151,7 +1151,7 @@ int ocfs2_setattr(struct dentry *dentry,
763640ca 5242 attr->ia_valid &= ~ATTR_SIZE;
d337f35e
JR
5243
5244 #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
5245- | ATTR_GID | ATTR_UID | ATTR_MODE)
5246+ | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE)
763640ca 5247 if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
d337f35e 5248 return 0;
763640ca 5249
c2806d43
AM
5250diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/inode.c linux-4.4/fs/ocfs2/inode.c
5251--- linux-4.4/fs/ocfs2/inode.c 2016-01-11 00:01:32.000000000 +0100
5252+++ linux-4.4/fs/ocfs2/inode.c 2021-02-24 16:56:24.569488903 +0100
78865d5b 5253@@ -28,6 +28,7 @@
d337f35e
JR
5254 #include <linux/highmem.h>
5255 #include <linux/pagemap.h>
ec22aa5c 5256 #include <linux/quotaops.h>
d337f35e
JR
5257+#include <linux/vs_tag.h>
5258
5259 #include <asm/byteorder.h>
5260
537831f9 5261@@ -78,11 +79,13 @@ void ocfs2_set_inode_flags(struct inode
2380c486
JR
5262 {
5263 unsigned int flags = OCFS2_I(inode)->ip_attr;
5264
5265- inode->i_flags &= ~(S_IMMUTABLE |
5266+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
5267 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
d337f35e
JR
5268
5269 if (flags & OCFS2_IMMUTABLE_FL)
5270 inode->i_flags |= S_IMMUTABLE;
2380c486
JR
5271+ if (flags & OCFS2_IXUNLINK_FL)
5272+ inode->i_flags |= S_IXUNLINK;
d337f35e
JR
5273
5274 if (flags & OCFS2_SYNC_FL)
5275 inode->i_flags |= S_SYNC;
537831f9 5276@@ -92,25 +95,44 @@ void ocfs2_set_inode_flags(struct inode
2380c486
JR
5277 inode->i_flags |= S_NOATIME;
5278 if (flags & OCFS2_DIRSYNC_FL)
d337f35e 5279 inode->i_flags |= S_DIRSYNC;
2380c486
JR
5280+
5281+ inode->i_vflags &= ~(V_BARRIER | V_COW);
5282+
5283+ if (flags & OCFS2_BARRIER_FL)
5284+ inode->i_vflags |= V_BARRIER;
5285+ if (flags & OCFS2_COW_FL)
5286+ inode->i_vflags |= V_COW;
d337f35e
JR
5287 }
5288
2380c486
JR
5289 /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
5290 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
5291 {
5292 unsigned int flags = oi->vfs_inode.i_flags;
5293+ unsigned int vflags = oi->vfs_inode.i_vflags;
5294+
5295+ oi->ip_attr &= ~(OCFS2_SYNC_FL | OCFS2_APPEND_FL |
5296+ OCFS2_IMMUTABLE_FL | OCFS2_IXUNLINK_FL |
5297+ OCFS2_NOATIME_FL | OCFS2_DIRSYNC_FL |
5298+ OCFS2_BARRIER_FL | OCFS2_COW_FL);
5299+
5300+ if (flags & S_IMMUTABLE)
5301+ oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5302+ if (flags & S_IXUNLINK)
5303+ oi->ip_attr |= OCFS2_IXUNLINK_FL;
5304
5305- oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
5306- OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
5307 if (flags & S_SYNC)
5308 oi->ip_attr |= OCFS2_SYNC_FL;
5309 if (flags & S_APPEND)
5310 oi->ip_attr |= OCFS2_APPEND_FL;
5311- if (flags & S_IMMUTABLE)
5312- oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5313 if (flags & S_NOATIME)
5314 oi->ip_attr |= OCFS2_NOATIME_FL;
5315 if (flags & S_DIRSYNC)
5316 oi->ip_attr |= OCFS2_DIRSYNC_FL;
5317+
5318+ if (vflags & V_BARRIER)
5319+ oi->ip_attr |= OCFS2_BARRIER_FL;
5320+ if (vflags & V_COW)
5321+ oi->ip_attr |= OCFS2_COW_FL;
2380c486
JR
5322 }
5323
ec22aa5c 5324 struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
bb20add7 5325@@ -268,6 +290,8 @@ void ocfs2_populate_inode(struct inode *
d337f35e
JR
5326 struct super_block *sb;
5327 struct ocfs2_super *osb;
ec22aa5c 5328 int use_plocks = 1;
d337f35e
JR
5329+ uid_t uid;
5330+ gid_t gid;
5331
763640ca
JR
5332 sb = inode->i_sb;
5333 osb = OCFS2_SB(sb);
bb20add7 5334@@ -296,8 +320,12 @@ void ocfs2_populate_inode(struct inode *
d337f35e
JR
5335 inode->i_generation = le32_to_cpu(fe->i_generation);
5336 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
5337 inode->i_mode = le16_to_cpu(fe->i_mode);
b00e13aa
AM
5338- i_uid_write(inode, le32_to_cpu(fe->i_uid));
5339- i_gid_write(inode, le32_to_cpu(fe->i_gid));
d337f35e
JR
5340+ uid = le32_to_cpu(fe->i_uid);
5341+ gid = le32_to_cpu(fe->i_gid);
b00e13aa
AM
5342+ i_uid_write(inode, INOTAG_UID(DX_TAG(inode), uid, gid));
5343+ i_gid_write(inode, INOTAG_GID(DX_TAG(inode), uid, gid));
5344+ i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), uid, gid,
5345+ /* le16_to_cpu(raw_inode->i_raw_tag) */ 0));
d337f35e
JR
5346
5347 /* Fast symlinks will have i_size but no allocated clusters. */
42bc425c 5348 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) {
c2806d43
AM
5349diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/inode.h linux-4.4/fs/ocfs2/inode.h
5350--- linux-4.4/fs/ocfs2/inode.h 2016-01-11 00:01:32.000000000 +0100
5351+++ linux-4.4/fs/ocfs2/inode.h 2021-02-24 16:56:24.569488903 +0100
927ca606 5352@@ -161,6 +161,7 @@ struct buffer_head *ocfs2_bread(struct i
d337f35e
JR
5353
5354 void ocfs2_set_inode_flags(struct inode *inode);
2380c486 5355 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
d4263eb0 5356+int ocfs2_sync_flags(struct inode *inode, int, int);
d337f35e 5357
2380c486
JR
5358 static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
5359 {
c2806d43
AM
5360diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/ioctl.c linux-4.4/fs/ocfs2/ioctl.c
5361--- linux-4.4/fs/ocfs2/ioctl.c 2021-02-24 16:56:11.802420166 +0100
5362+++ linux-4.4/fs/ocfs2/ioctl.c 2021-02-24 16:56:24.572822341 +0100
1e8b8f9b 5363@@ -76,7 +76,41 @@ static int ocfs2_get_inode_attr(struct i
d337f35e
JR
5364 return status;
5365 }
5366
5367-static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
d4263eb0
JR
5368+int ocfs2_sync_flags(struct inode *inode, int flags, int vflags)
5369+{
5370+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5371+ struct buffer_head *bh = NULL;
5372+ handle_t *handle = NULL;
5373+ int status;
5374+
5375+ status = ocfs2_inode_lock(inode, &bh, 1);
5376+ if (status < 0) {
5377+ mlog_errno(status);
5378+ return status;
5379+ }
5380+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5381+ if (IS_ERR(handle)) {
5382+ status = PTR_ERR(handle);
5383+ mlog_errno(status);
5384+ goto bail_unlock;
5385+ }
5386+
5387+ inode->i_flags = flags;
5388+ inode->i_vflags = vflags;
5389+ ocfs2_get_inode_flags(OCFS2_I(inode));
5390+
5391+ status = ocfs2_mark_inode_dirty(handle, inode, bh);
5392+ if (status < 0)
5393+ mlog_errno(status);
5394+
5395+ ocfs2_commit_trans(osb, handle);
5396+bail_unlock:
5397+ ocfs2_inode_unlock(inode, 1);
5398+ brelse(bh);
5399+ return status;
5400+}
5401+
d337f35e
JR
5402+int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
5403 unsigned mask)
5404 {
5405 struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
09be7631
JR
5406@@ -116,6 +150,11 @@ static int ocfs2_set_inode_attr(struct i
5407 goto bail_unlock;
5408 }
2380c486
JR
5409
5410+ if (IS_BARRIER(inode)) {
5411+ vxwprintk_task(1, "messing with the barrier.");
5412+ goto bail_unlock;
5413+ }
5414+
5415 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5416 if (IS_ERR(handle)) {
5417 status = PTR_ERR(handle);
bb20add7 5418@@ -841,6 +880,7 @@ bail:
d4263eb0
JR
5419 return status;
5420 }
d337f35e 5421
d337f35e 5422+
d4263eb0
JR
5423 long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
5424 {
b00e13aa 5425 struct inode *inode = file_inode(filp);
c2806d43
AM
5426diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/namei.c linux-4.4/fs/ocfs2/namei.c
5427--- linux-4.4/fs/ocfs2/namei.c 2021-02-24 16:56:11.802420166 +0100
5428+++ linux-4.4/fs/ocfs2/namei.c 2021-02-24 16:56:24.572822341 +0100
ec22aa5c 5429@@ -41,6 +41,7 @@
d337f35e
JR
5430 #include <linux/slab.h>
5431 #include <linux/highmem.h>
ec22aa5c 5432 #include <linux/quotaops.h>
d337f35e
JR
5433+#include <linux/vs_tag.h>
5434
d337f35e 5435 #include <cluster/masklog.h>
763640ca 5436
927ca606 5437@@ -516,6 +517,7 @@ static int __ocfs2_mknod_locked(struct i
93de0823 5438 struct ocfs2_extent_list *fel;
ec22aa5c 5439 u16 feat;
265de2f7 5440 struct ocfs2_inode_info *oi = OCFS2_I(inode);
a4a22af8 5441+ ktag_t ktag;
d337f35e 5442
7e46296a
AM
5443 *new_fe_bh = NULL;
5444
927ca606 5445@@ -553,8 +555,13 @@ static int __ocfs2_mknod_locked(struct i
76514441 5446 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
d337f35e 5447 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
2380c486 5448 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
b00e13aa
AM
5449- fe->i_uid = cpu_to_le32(i_uid_read(inode));
5450- fe->i_gid = cpu_to_le32(i_gid_read(inode));
d337f35e 5451+
a4a22af8
AM
5452+ ktag = make_ktag(&init_user_ns, dx_current_fstag(osb->sb));
5453+ fe->i_uid = cpu_to_le32(from_kuid(&init_user_ns,
5454+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, ktag)));
5455+ fe->i_gid = cpu_to_le32(from_kgid(&init_user_ns,
5456+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, ktag)));
5457+ inode->i_tag = ktag; /* is this correct? */
ec22aa5c
AM
5458 fe->i_mode = cpu_to_le16(inode->i_mode);
5459 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
d337f35e 5460 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
c2806d43
AM
5461diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/ocfs2.h linux-4.4/fs/ocfs2/ocfs2.h
5462--- linux-4.4/fs/ocfs2/ocfs2.h 2021-02-24 16:56:11.802420166 +0100
5463+++ linux-4.4/fs/ocfs2/ocfs2.h 2021-02-24 16:56:24.572822341 +0100
5464@@ -289,6 +289,7 @@ enum ocfs2_mount_options
5465 OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
5466 OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
5467 OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
5468+ OCFS2_MOUNT_TAGGED = 1 << 18, /* use tagging */
5469 };
5470
5471 #define OCFS2_OSB_SOFT_RO 0x0001
5472diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/ocfs2_fs.h linux-4.4/fs/ocfs2/ocfs2_fs.h
5473--- linux-4.4/fs/ocfs2/ocfs2_fs.h 2021-02-24 16:56:11.802420166 +0100
5474+++ linux-4.4/fs/ocfs2/ocfs2_fs.h 2021-02-24 16:56:24.572822341 +0100
927ca606 5475@@ -275,6 +275,11 @@
93de0823
AM
5476 #define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
5477 #define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
2380c486 5478
93de0823
AM
5479+#define OCFS2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
5480+
5481+#define OCFS2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
5482+#define OCFS2_COW_FL FS_COW_FL /* Copy on Write marker */
5483+
5484 #define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
5485 #define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
5486
c2806d43
AM
5487diff -urNp -x '*.orig' linux-4.4/fs/ocfs2/super.c linux-4.4/fs/ocfs2/super.c
5488--- linux-4.4/fs/ocfs2/super.c 2021-02-24 16:56:11.805753604 +0100
5489+++ linux-4.4/fs/ocfs2/super.c 2021-02-24 16:56:24.572822341 +0100
927ca606 5490@@ -193,6 +193,7 @@ enum {
76514441 5491 Opt_dir_resv_level,
927ca606
AM
5492 Opt_journal_async_commit,
5493 Opt_err_cont,
d337f35e
JR
5494+ Opt_tag, Opt_notag, Opt_tagid,
5495 Opt_err,
5496 };
5497
927ca606 5498@@ -226,6 +227,9 @@ static const match_table_t tokens = {
76514441 5499 {Opt_dir_resv_level, "dir_resv_level=%u"},
927ca606
AM
5500 {Opt_journal_async_commit, "journal_async_commit"},
5501 {Opt_err_cont, "errors=continue"},
d337f35e 5502+ {Opt_tag, "tag"},
d337f35e
JR
5503+ {Opt_notag, "notag"},
5504+ {Opt_tagid, "tagid=%u"},
5505 {Opt_err, NULL}
5506 };
5507
8931d859 5508@@ -676,6 +680,13 @@ static int ocfs2_remount(struct super_bl
d337f35e
JR
5509 goto out;
5510 }
5511
d4263eb0
JR
5512+ if ((osb->s_mount_opt & OCFS2_MOUNT_TAGGED) !=
5513+ (parsed_options.mount_opt & OCFS2_MOUNT_TAGGED)) {
d337f35e
JR
5514+ ret = -EINVAL;
5515+ mlog(ML_ERROR, "Cannot change tagging on remount\n");
5516+ goto out;
5517+ }
5518+
ab30d09f
AM
5519 /* We're going to/from readonly mode. */
5520 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
5521 /* Disable quota accounting before remounting RO */
8931d859 5522@@ -1165,6 +1176,9 @@ static int ocfs2_fill_super(struct super
d337f35e
JR
5523
5524 ocfs2_complete_mount_recovery(osb);
5525
5526+ if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED)
5527+ sb->s_flags |= MS_TAGGED;
5528+
2380c486
JR
5529 if (ocfs2_mount_local(osb))
5530 snprintf(nodestr, sizeof(nodestr), "local");
5531 else
8931d859 5532@@ -1485,6 +1499,20 @@ static int ocfs2_parse_options(struct su
927ca606
AM
5533 case Opt_journal_async_commit:
5534 mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
d337f35e
JR
5535 break;
5536+#ifndef CONFIG_TAGGING_NONE
5537+ case Opt_tag:
2380c486 5538+ mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
d337f35e
JR
5539+ break;
5540+ case Opt_notag:
2380c486 5541+ mopt->mount_opt &= ~OCFS2_MOUNT_TAGGED;
d337f35e
JR
5542+ break;
5543+#endif
5544+#ifdef CONFIG_PROPAGATE
5545+ case Opt_tagid:
5546+ /* use args[0] */
2380c486 5547+ mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
d337f35e
JR
5548+ break;
5549+#endif
5550 default:
5551 mlog(ML_ERROR,
5552 "Unrecognized mount option \"%s\" "
c2806d43
AM
5553diff -urNp -x '*.orig' linux-4.4/fs/open.c linux-4.4/fs/open.c
5554--- linux-4.4/fs/open.c 2021-02-24 16:56:11.805753604 +0100
5555+++ linux-4.4/fs/open.c 2021-02-24 16:56:24.572822341 +0100
b00e13aa 5556@@ -31,6 +31,11 @@
2bf5ad28 5557 #include <linux/ima.h>
93de0823 5558 #include <linux/dnotify.h>
b00e13aa 5559 #include <linux/compat.h>
d337f35e
JR
5560+#include <linux/vs_base.h>
5561+#include <linux/vs_limit.h>
d337f35e
JR
5562+#include <linux/vs_tag.h>
5563+#include <linux/vs_cowbl.h>
78865d5b 5564+#include <linux/vserver/dlimit.h>
d337f35e 5565
2bf5ad28
AM
5566 #include "internal.h"
5567
927ca606 5568@@ -70,6 +75,11 @@ long vfs_truncate(struct path *path, lof
b00e13aa
AM
5569 struct inode *inode;
5570 long error;
5571
76514441 5572+#ifdef CONFIG_VSERVER_COWBL
b00e13aa 5573+ error = cow_check_and_break(path);
d337f35e 5574+ if (error)
b00e13aa 5575+ goto out;
76514441 5576+#endif
b00e13aa 5577 inode = path->dentry->d_inode;
d337f35e 5578
a168f21d 5579 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
3cc86a71 5580@@ -567,6 +577,13 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons
b00e13aa
AM
5581 unsigned int lookup_flags = LOOKUP_FOLLOW;
5582 retry:
5583 error = user_path_at(dfd, filename, lookup_flags, &path);
a168f21d 5584+#ifdef CONFIG_VSERVER_COWBL
b00e13aa 5585+ if (!error) {
a168f21d 5586+ error = cow_check_and_break(&path);
b00e13aa
AM
5587+ if (error)
5588+ path_put(&path);
5589+ }
a168f21d 5590+#endif
b00e13aa 5591 if (!error) {
a168f21d
AM
5592 error = chmod_common(&path, mode);
5593 path_put(&path);
3cc86a71 5594@@ -601,13 +618,15 @@ retry_deleg:
42bc425c
AM
5595 if (!uid_valid(uid))
5596 return -EINVAL;
d337f35e 5597 newattrs.ia_valid |= ATTR_UID;
42bc425c 5598- newattrs.ia_uid = uid;
8ce283e1
AM
5599+ newattrs.ia_uid = make_kuid(&init_user_ns,
5600+ dx_map_uid(user));
d337f35e
JR
5601 }
5602 if (group != (gid_t) -1) {
42bc425c
AM
5603 if (!gid_valid(gid))
5604 return -EINVAL;
d337f35e 5605 newattrs.ia_valid |= ATTR_GID;
42bc425c 5606- newattrs.ia_gid = gid;
8ce283e1
AM
5607+ newattrs.ia_gid = make_kgid(&init_user_ns,
5608+ dx_map_gid(group));
d337f35e
JR
5609 }
5610 if (!S_ISDIR(inode->i_mode))
2380c486 5611 newattrs.ia_valid |=
3cc86a71 5612@@ -645,6 +664,10 @@ retry:
2380c486 5613 error = mnt_want_write(path.mnt);
d337f35e 5614 if (error)
2380c486 5615 goto out_release;
d337f35e 5616+#ifdef CONFIG_VSERVER_COWBL
2380c486 5617+ error = cow_check_and_break(&path);
d337f35e 5618+ if (!error)
d337f35e 5619+#endif
2bf5ad28 5620 error = chown_common(&path, user, group);
2380c486
JR
5621 mnt_drop_write(path.mnt);
5622 out_release:
c2806d43
AM
5623diff -urNp -x '*.orig' linux-4.4/fs/proc/array.c linux-4.4/fs/proc/array.c
5624--- linux-4.4/fs/proc/array.c 2021-02-24 16:56:11.809087043 +0100
5625+++ linux-4.4/fs/proc/array.c 2021-02-24 16:56:24.572822341 +0100
8931d859 5626@@ -84,6 +84,8 @@
2380c486 5627 #include <linux/tracehook.h>
927ca606 5628 #include <linux/string_helpers.h>
42bc425c 5629 #include <linux/user_namespace.h>
d337f35e
JR
5630+#include <linux/vs_context.h>
5631+#include <linux/vs_network.h>
5632
d337f35e 5633 #include <asm/pgtable.h>
2380c486 5634 #include <asm/processor.h>
8931d859 5635@@ -155,6 +157,9 @@ static inline void task_state(struct seq
2380c486
JR
5636 ppid = pid_alive(p) ?
5637 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
927ca606 5638
2380c486
JR
5639+ if (unlikely(vx_current_initpid(p->pid)))
5640+ ppid = 0;
5641+
927ca606
AM
5642 tracer = ptrace_parent(p);
5643 if (tracer)
5644 tpid = task_pid_nr_ns(tracer, ns);
8931d859 5645@@ -293,8 +298,8 @@ static inline void task_sig(struct seq_f
bb20add7 5646 render_sigset_t(m, "SigCgt:\t", &caught);
2380c486
JR
5647 }
5648
bb20add7 5649-static void render_cap_t(struct seq_file *m, const char *header,
2380c486 5650- kernel_cap_t *a)
bb20add7 5651+void render_cap_t(struct seq_file *m, const char *header,
2380c486 5652+ struct vx_info *vxi, kernel_cap_t *a)
d337f35e 5653 {
2380c486
JR
5654 unsigned __capi;
5655
8931d859 5656@@ -321,11 +326,12 @@ static inline void task_cap(struct seq_f
927ca606 5657 cap_ambient = cred->cap_ambient;
bb20add7 5658 rcu_read_unlock();
2380c486 5659
ec22aa5c
AM
5660- render_cap_t(m, "CapInh:\t", &cap_inheritable);
5661- render_cap_t(m, "CapPrm:\t", &cap_permitted);
5662- render_cap_t(m, "CapEff:\t", &cap_effective);
5663- render_cap_t(m, "CapBnd:\t", &cap_bset);
927ca606 5664- render_cap_t(m, "CapAmb:\t", &cap_ambient);
ec22aa5c
AM
5665+ /* FIXME: maybe move the p->vx_info masking to __task_cred() ? */
5666+ render_cap_t(m, "CapInh:\t", p->vx_info, &cap_inheritable);
5667+ render_cap_t(m, "CapPrm:\t", p->vx_info, &cap_permitted);
5668+ render_cap_t(m, "CapEff:\t", p->vx_info, &cap_effective);
5669+ render_cap_t(m, "CapBnd:\t", p->vx_info, &cap_bset);
927ca606 5670+ render_cap_t(m, "CapAmb:\t", p->vx_info, &cap_ambient);
d337f35e
JR
5671 }
5672
b00e13aa 5673 static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
8931d859 5674@@ -377,6 +383,43 @@ static void task_cpus_allowed(struct seq
927ca606 5675 cpumask_pr_args(&task->cpus_allowed));
2380c486
JR
5676 }
5677
5678+int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
5679+ struct pid *pid, struct task_struct *task)
5680+{
5681+ seq_printf(m, "Proxy:\t%p(%c)\n"
5682+ "Count:\t%u\n"
5683+ "uts:\t%p(%c)\n"
5684+ "ipc:\t%p(%c)\n"
5685+ "mnt:\t%p(%c)\n"
5686+ "pid:\t%p(%c)\n"
5687+ "net:\t%p(%c)\n",
5688+ task->nsproxy,
5689+ (task->nsproxy == init_task.nsproxy ? 'I' : '-'),
5690+ atomic_read(&task->nsproxy->count),
5691+ task->nsproxy->uts_ns,
5692+ (task->nsproxy->uts_ns == init_task.nsproxy->uts_ns ? 'I' : '-'),
5693+ task->nsproxy->ipc_ns,
5694+ (task->nsproxy->ipc_ns == init_task.nsproxy->ipc_ns ? 'I' : '-'),
5695+ task->nsproxy->mnt_ns,
5696+ (task->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns ? 'I' : '-'),
c2e5f7c8
JR
5697+ task->nsproxy->pid_ns_for_children,
5698+ (task->nsproxy->pid_ns_for_children ==
5699+ init_task.nsproxy->pid_ns_for_children ? 'I' : '-'),
2380c486
JR
5700+ task->nsproxy->net_ns,
5701+ (task->nsproxy->net_ns == init_task.nsproxy->net_ns ? 'I' : '-'));
5702+ return 0;
5703+}
d337f35e 5704+
2380c486
JR
5705+void task_vs_id(struct seq_file *m, struct task_struct *task)
5706+{
d337f35e 5707+ if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
2380c486
JR
5708+ return;
5709+
bb20add7
AM
5710+ seq_printf(m, "VxID:\t%d\n", vx_task_xid(task));
5711+ seq_printf(m, "NxID:\t%d\n", nx_task_nid(task));
2380c486
JR
5712+}
5713+
5714+
5715 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
5716 struct pid *pid, struct task_struct *task)
5717 {
8931d859 5718@@ -394,6 +437,7 @@ int proc_pid_status(struct seq_file *m,
b00e13aa 5719 task_seccomp(m, task);
2bf5ad28 5720 task_cpus_allowed(m, task);
2380c486
JR
5721 cpuset_task_status_allowed(m, task);
5722+ task_vs_id(m, task);
152aeb71
JR
5723 task_context_switch_counts(m, task);
5724 return 0;
5725 }
48cb6a3c 5726@@ -509,6 +553,17 @@ static int do_task_stat(struct seq_file
d337f35e 5727 /* convert nsec -> ticks */
bb20add7 5728 start_time = nsec_to_clock_t(task->real_start_time);
d337f35e
JR
5729
5730+ /* fixup start time for virt uptime */
5731+ if (vx_flags(VXF_VIRT_UPTIME, 0)) {
5732+ unsigned long long bias =
5733+ current->vx_info->cvirt.bias_clock;
5734+
5735+ if (start_time > bias)
5736+ start_time -= bias;
5737+ else
5738+ start_time = 0;
5739+ }
5740+
1e8b8f9b
AM
5741 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
5742 seq_put_decimal_ll(m, ' ', ppid);
5743 seq_put_decimal_ll(m, ' ', pgid);
c2806d43
AM
5744diff -urNp -x '*.orig' linux-4.4/fs/proc/base.c linux-4.4/fs/proc/base.c
5745--- linux-4.4/fs/proc/base.c 2021-02-24 16:56:11.809087043 +0100
5746+++ linux-4.4/fs/proc/base.c 2021-02-24 16:56:24.572822341 +0100
09be7631 5747@@ -87,6 +87,8 @@
78865d5b 5748 #include <linux/slab.h>
db55b927 5749 #include <linux/flex_array.h>
09be7631 5750 #include <linux/posix-timers.h>
d337f35e
JR
5751+#include <linux/vs_context.h>
5752+#include <linux/vs_network.h>
763640ca
JR
5753 #ifdef CONFIG_HARDWALL
5754 #include <asm/hardwall.h>
5755 #endif
3cc86a71 5756@@ -1125,11 +1127,15 @@ static ssize_t oom_adj_write(struct file
537831f9 5757 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
7e46296a 5758
537831f9
AM
5759 if (oom_adj < task->signal->oom_score_adj &&
5760- !capable(CAP_SYS_RESOURCE)) {
5761+ !vx_capable(CAP_SYS_RESOURCE, VXC_OOM_ADJUST)) {
ab30d09f
AM
5762 err = -EACCES;
5763 goto err_sighand;
4a036bed 5764 }
7e46296a 5765
4a036bed 5766+ /* prevent guest processes from circumventing the oom killer */
537831f9
AM
5767+ if (vx_current_xid() && (oom_adj == OOM_DISABLE))
5768+ oom_adj = OOM_ADJUST_MIN;
7e46296a 5769+
f6c5ef8b 5770 /*
537831f9
AM
5771 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
5772 * /proc/pid/oom_score_adj instead.
3cc86a71 5773@@ -1694,6 +1700,8 @@ struct inode *proc_pid_make_inode(struct
ec22aa5c
AM
5774 inode->i_gid = cred->egid;
5775 rcu_read_unlock();
d337f35e
JR
5776 }
5777+ /* procfs is xid tagged */
61333608 5778+ i_tag_write(inode, (vtag_t)vx_task_xid(task));
d337f35e
JR
5779 security_task_to_inode(task, inode);
5780
5781 out:
3cc86a71 5782@@ -1739,6 +1747,8 @@ int pid_getattr(struct vfsmount *mnt, st
d33d7b00
AM
5783
5784 /* dentry stuff */
5785
bb20add7 5786+// static unsigned name_to_int(struct dentry *dentry);
d33d7b00
AM
5787+
5788 /*
5789 * Exceptional case: normally we are not allowed to unhash a busy
5790 * directory. In this case, however, we can do it - no aliasing problems
3cc86a71 5791@@ -1767,6 +1777,19 @@ int pid_revalidate(struct dentry *dentry
d33d7b00
AM
5792 task = get_proc_task(inode);
5793
5794 if (task) {
bb20add7
AM
5795+ unsigned pid = name_to_int(&dentry->d_name);
5796+
5797+ if (pid != ~0U && pid != vx_map_pid(task->pid) &&
5798+ pid != __task_pid_nr_ns(task, PIDTYPE_PID,
5799+ task_active_pid_ns(task))) {
5800+ vxdprintk(VXD_CBIT(misc, 10),
5801+ VS_Q("%*s") " dropped by pid_revalidate(%d!=%d)",
5802+ dentry->d_name.len, dentry->d_name.name,
5803+ pid, vx_map_pid(task->pid));
d33d7b00 5804+ put_task_struct(task);
bb20add7
AM
5805+ d_drop(dentry);
5806+ return 0;
d33d7b00
AM
5807+ }
5808 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
5809 task_dumpable(task)) {
5810 rcu_read_lock();
3cc86a71 5811@@ -2336,6 +2359,13 @@ static struct dentry *proc_pident_lookup
d337f35e
JR
5812 if (!task)
5813 goto out_no_task;
5814
2380c486 5815+ /* TODO: maybe we can come up with a generic approach? */
d337f35e
JR
5816+ if (task_vx_flags(task, VXF_HIDE_VINFO, 0) &&
5817+ (dentry->d_name.len == 5) &&
5818+ (!memcmp(dentry->d_name.name, "vinfo", 5) ||
5819+ !memcmp(dentry->d_name.name, "ninfo", 5)))
5820+ goto out;
5821+
5822 /*
5823 * Yes, it does not scale. And it should not. Don't add
5824 * new entries into /proc/<tgid>/ without very good reasons.
3cc86a71 5825@@ -2778,6 +2808,11 @@ static int proc_pid_personality(struct s
2380c486
JR
5826 static const struct file_operations proc_task_operations;
5827 static const struct inode_operations proc_task_inode_operations;
d337f35e 5828
bb20add7
AM
5829+extern int proc_pid_vx_info(struct seq_file *,
5830+ struct pid_namespace *, struct pid *, struct task_struct *);
5831+extern int proc_pid_nx_info(struct seq_file *,
5832+ struct pid_namespace *, struct pid *, struct task_struct *);
d337f35e 5833+
2380c486 5834 static const struct pid_entry tgid_base_stuff[] = {
ec22aa5c
AM
5835 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
5836 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3cc86a71 5837@@ -2842,6 +2877,8 @@ static const struct pid_entry tgid_base_
2380c486 5838 #ifdef CONFIG_CGROUPS
bb20add7 5839 ONE("cgroup", S_IRUGO, proc_cgroup_show),
d337f35e 5840 #endif
bb20add7
AM
5841+ ONE("vinfo", S_IRUGO, proc_pid_vx_info),
5842+ ONE("ninfo", S_IRUGO, proc_pid_nx_info),
5843 ONE("oom_score", S_IRUGO, proc_oom_score),
537831f9 5844 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
93de0823 5845 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3cc86a71 5846@@ -3056,7 +3093,7 @@ retry:
2380c486
JR
5847 iter.task = NULL;
5848 pid = find_ge_pid(iter.tgid, ns);
5849 if (pid) {
5850- iter.tgid = pid_nr_ns(pid, ns);
5851+ iter.tgid = pid_unmapped_nr_ns(pid, ns);
5852 iter.task = pid_task(pid, PIDTYPE_PID);
5853 /* What we to know is if the pid we have find is the
5854 * pid of a thread_group_leader. Testing for task
3cc86a71 5855@@ -3116,8 +3153,10 @@ int proc_pid_readdir(struct file *file,
c2e5f7c8
JR
5856 if (!has_pid_permissions(ns, iter.task, 2))
5857 continue;
db55b927 5858
c2e5f7c8
JR
5859- len = snprintf(name, sizeof(name), "%d", iter.tgid);
5860+ len = snprintf(name, sizeof(name), "%d", vx_map_tgid(iter.tgid));
5861 ctx->pos = iter.tgid + TGID_OFFSET;
2380c486 5862+ if (!vx_proc_task_visible(iter.task))
d337f35e 5863+ continue;
c2e5f7c8
JR
5864 if (!proc_fill_cache(file, ctx, name, len,
5865 proc_pid_instantiate, iter.task, NULL)) {
2380c486 5866 put_task_struct(iter.task);
3cc86a71 5867@@ -3254,6 +3293,7 @@ static const struct pid_entry tid_base_s
09be7631 5868 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
bb20add7 5869 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
09be7631
JR
5870 #endif
5871+ ONE("nsproxy", S_IRUGO, proc_pid_nsproxy),
5872 };
5873
c2e5f7c8 5874 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
3cc86a71 5875@@ -3320,6 +3360,8 @@ static struct dentry *proc_task_lookup(s
bb20add7 5876 tid = name_to_int(&dentry->d_name);
d337f35e
JR
5877 if (tid == ~0U)
5878 goto out;
5879+ if (vx_current_initpid(tid))
5880+ goto out;
5881
2380c486 5882 ns = dentry->d_sb->s_fs_info;
d337f35e 5883 rcu_read_lock();
c2806d43
AM
5884diff -urNp -x '*.orig' linux-4.4/fs/proc/generic.c linux-4.4/fs/proc/generic.c
5885--- linux-4.4/fs/proc/generic.c 2021-02-24 16:56:11.809087043 +0100
5886+++ linux-4.4/fs/proc/generic.c 2021-02-24 16:56:24.572822341 +0100
927ca606 5887@@ -22,6 +22,7 @@
d337f35e
JR
5888 #include <linux/bitops.h>
5889 #include <linux/spinlock.h>
2380c486 5890 #include <linux/completion.h>
d337f35e
JR
5891+#include <linux/vserver/inode.h>
5892 #include <asm/uaccess.h>
5893
5894 #include "internal.h"
927ca606
AM
5895@@ -66,8 +67,16 @@ static struct proc_dir_entry *pde_subdir
5896 node = node->rb_left;
5897 else if (result > 0)
5898 node = node->rb_right;
5899- else
5900+ else {
5901+ if (!vx_hide_check(0, de->vx_flags)) {
5902+ vxdprintk(VXD_CBIT(misc, 9),
5903+ VS_Q("%*s")
5904+ " hidden in pde_subdir_find()",
5905+ de->namelen, de->name);
5906+ return 0;
5907+ }
5908 return de;
bb20add7 5909+ }
927ca606
AM
5910 }
5911 return NULL;
5912 }
5913@@ -241,6 +250,8 @@ struct dentry *proc_lookup_de(struct pro
5914 return ERR_PTR(-ENOMEM);
5915 d_set_d_op(dentry, &simple_dentry_operations);
5916 d_add(dentry, inode);
ba86f833 5917+ /* generic proc entries belong to the host */
537831f9 5918+ i_tag_write(inode, 0);
927ca606 5919 return NULL;
2380c486 5920 }
927ca606
AM
5921 read_unlock(&proc_subdir_lock);
5922@@ -287,6 +298,12 @@ int proc_readdir_de(struct proc_dir_entr
c2e5f7c8
JR
5923 do {
5924 struct proc_dir_entry *next;
5925 pde_get(de);
bb20add7
AM
5926+ if (!vx_hide_check(0, de->vx_flags)) {
5927+ vxdprintk(VXD_CBIT(misc, 9),
5928+ VS_Q("%*s") " hidden in proc_readdir_de()",
5929+ de->namelen, de->name);
c2e5f7c8 5930+ goto skip;
bb20add7 5931+ }
927ca606 5932 read_unlock(&proc_subdir_lock);
c2e5f7c8
JR
5933 if (!dir_emit(ctx, de->name, de->namelen,
5934 de->low_ino, de->mode >> 12)) {
927ca606 5935@@ -294,6 +311,7 @@ int proc_readdir_de(struct proc_dir_entr
c2e5f7c8
JR
5936 return 0;
5937 }
927ca606 5938 read_lock(&proc_subdir_lock);
c2e5f7c8
JR
5939+ skip:
5940 ctx->pos++;
927ca606 5941 next = pde_subdir_next(de);
c2e5f7c8 5942 pde_put(de);
927ca606 5943@@ -387,6 +405,7 @@ static struct proc_dir_entry *__proc_cre
537831f9 5944 ent->mode = mode;
d337f35e 5945 ent->nlink = nlink;
927ca606 5946 ent->subdir = RB_ROOT;
d337f35e 5947+ ent->vx_flags = IATTR_PROC_DEFAULT;
537831f9 5948 atomic_set(&ent->count, 1);
2380c486 5949 spin_lock_init(&ent->pde_unload_lock);
2380c486 5950 INIT_LIST_HEAD(&ent->pde_openers);
927ca606 5951@@ -411,7 +430,8 @@ struct proc_dir_entry *proc_symlink(cons
d337f35e
JR
5952 kfree(ent->data);
5953 kfree(ent);
5954 ent = NULL;
5955- }
5956+ } else
5957+ ent->vx_flags = IATTR_PROC_SYMLINK;
5958 } else {
5959 kfree(ent);
5960 ent = NULL;
c2806d43
AM
5961diff -urNp -x '*.orig' linux-4.4/fs/proc/inode.c linux-4.4/fs/proc/inode.c
5962--- linux-4.4/fs/proc/inode.c 2021-02-24 16:56:11.809087043 +0100
5963+++ linux-4.4/fs/proc/inode.c 2021-02-24 16:56:24.576155779 +0100
927ca606 5964@@ -431,6 +431,8 @@ struct inode *proc_get_inode(struct supe
d337f35e
JR
5965 inode->i_uid = de->uid;
5966 inode->i_gid = de->gid;
5967 }
5968+ if (de->vx_flags)
5969+ PROC_I(inode)->vx_flags = de->vx_flags;
5970 if (de->size)
5971 inode->i_size = de->size;
5972 if (de->nlink)
c2806d43
AM
5973diff -urNp -x '*.orig' linux-4.4/fs/proc/internal.h linux-4.4/fs/proc/internal.h
5974--- linux-4.4/fs/proc/internal.h 2016-01-11 00:01:32.000000000 +0100
5975+++ linux-4.4/fs/proc/internal.h 2021-02-24 16:56:24.576155779 +0100
09be7631
JR
5976@@ -14,6 +14,7 @@
5977 #include <linux/spinlock.h>
5978 #include <linux/atomic.h>
b00e13aa 5979 #include <linux/binfmts.h>
d337f35e
JR
5980+#include <linux/vs_pid.h>
5981
09be7631
JR
5982 struct ctl_table_header;
5983 struct mempolicy;
927ca606 5984@@ -34,6 +35,7 @@ struct proc_dir_entry {
09be7631
JR
5985 nlink_t nlink;
5986 kuid_t uid;
5987 kgid_t gid;
5988+ int vx_flags;
5989 loff_t size;
5990 const struct inode_operations *proc_iops;
5991 const struct file_operations *proc_fops;
927ca606 5992@@ -51,15 +53,22 @@ struct proc_dir_entry {
09be7631
JR
5993 char name[];
5994 };
5995
5996+struct vx_info;
5997+struct nx_info;
2380c486 5998+
09be7631
JR
5999 union proc_op {
6000 int (*proc_get_link)(struct dentry *, struct path *);
09be7631
JR
6001 int (*proc_show)(struct seq_file *m,
6002 struct pid_namespace *ns, struct pid *pid,
6003 struct task_struct *task);
6004+ int (*proc_vs_read)(char *page);
6005+ int (*proc_vxi_read)(struct vx_info *vxi, char *page);
6006+ int (*proc_nxi_read)(struct nx_info *nxi, char *page);
6007 };
2380c486 6008
09be7631
JR
6009 struct proc_inode {
6010 struct pid *pid;
6011+ int vx_flags;
6012 int fd;
6013 union proc_op op;
6014 struct proc_dir_entry *pde;
927ca606 6015@@ -92,11 +101,16 @@ static inline struct pid *proc_pid(struc
d337f35e
JR
6016 return PROC_I(inode)->pid;
6017 }
6018
6019-static inline struct task_struct *get_proc_task(struct inode *inode)
6020+static inline struct task_struct *get_proc_task_real(struct inode *inode)
6021 {
6022 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
6023 }
6024
6025+static inline struct task_struct *get_proc_task(struct inode *inode)
6026+{
6027+ return vx_get_proc_task(inode, proc_pid(inode));
6028+}
6029+
09be7631 6030 static inline int task_dumpable(struct task_struct *task)
d337f35e 6031 {
09be7631 6032 int dumpable = 0;
927ca606 6033@@ -155,6 +169,8 @@ extern int proc_pid_status(struct seq_fi
09be7631
JR
6034 struct pid *, struct task_struct *);
6035 extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
6036 struct pid *, struct task_struct *);
6037+extern int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
6038+ struct pid *pid, struct task_struct *task);
6039
6040 /*
6041 * base.c
c2806d43
AM
6042diff -urNp -x '*.orig' linux-4.4/fs/proc/loadavg.c linux-4.4/fs/proc/loadavg.c
6043--- linux-4.4/fs/proc/loadavg.c 2016-01-11 00:01:32.000000000 +0100
6044+++ linux-4.4/fs/proc/loadavg.c 2021-02-24 16:56:24.576155779 +0100
ec22aa5c 6045@@ -12,15 +12,27 @@
1bc743c0 6046
ec22aa5c 6047 static int loadavg_proc_show(struct seq_file *m, void *v)
1bc743c0
JR
6048 {
6049+ unsigned long running;
6050+ unsigned int threads;
ec22aa5c 6051 unsigned long avnrun[3];
1bc743c0 6052
ec22aa5c 6053 get_avenrun(avnrun, FIXED_1/200, 0);
bd427b06 6054
ec22aa5c 6055+ if (vx_flags(VXF_VIRT_LOAD, 0)) {
eab5a9a6 6056+ struct vx_info *vxi = current_vx_info();
ec22aa5c
AM
6057+
6058+ running = atomic_read(&vxi->cvirt.nr_running);
6059+ threads = atomic_read(&vxi->cvirt.nr_threads);
6060+ } else {
6061+ running = nr_running();
6062+ threads = nr_threads;
6063+ }
6064+
6065 seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
6066 LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
6067 LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
6068 LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
1bc743c0
JR
6069- nr_running(), nr_threads,
6070+ running, threads,
6071 task_active_pid_ns(current)->last_pid);
ec22aa5c 6072 return 0;
1bc743c0 6073 }
c2806d43
AM
6074diff -urNp -x '*.orig' linux-4.4/fs/proc/meminfo.c linux-4.4/fs/proc/meminfo.c
6075--- linux-4.4/fs/proc/meminfo.c 2021-02-24 16:56:11.809087043 +0100
6076+++ linux-4.4/fs/proc/meminfo.c 2021-02-24 16:56:24.576155779 +0100
48cb6a3c 6077@@ -40,7 +40,8 @@ static int meminfo_proc_show(struct seq_
c2e5f7c8
JR
6078 si_swapinfo(&i);
6079 committed = percpu_counter_read_positive(&vm_committed_as);
e3afe727
AM
6080
6081- cached = global_page_state(NR_FILE_PAGES) -
6082+ cached = vx_flags(VXF_VIRT_MEM, 0) ?
6083+ vx_vsi_cached(&i) : global_page_state(NR_FILE_PAGES) -
b00e13aa 6084 total_swapcache_pages() - i.bufferram;
e3afe727 6085 if (cached < 0)
d337f35e 6086 cached = 0;
c2806d43
AM
6087diff -urNp -x '*.orig' linux-4.4/fs/proc/root.c linux-4.4/fs/proc/root.c
6088--- linux-4.4/fs/proc/root.c 2021-02-24 16:56:11.809087043 +0100
6089+++ linux-4.4/fs/proc/root.c 2021-02-24 16:56:24.576155779 +0100
b00e13aa 6090@@ -20,9 +20,14 @@
2380c486
JR
6091 #include <linux/mount.h>
6092 #include <linux/pid_namespace.h>
db55b927 6093 #include <linux/parser.h>
2380c486 6094+#include <linux/vserver/inode.h>
d337f35e 6095
2380c486 6096 #include "internal.h"
d337f35e 6097
d337f35e
JR
6098+struct proc_dir_entry *proc_virtual;
6099+
6100+extern void proc_vx_init(void);
2380c486
JR
6101+
6102 static int proc_test_super(struct super_block *sb, void *data)
6103 {
6104 return sb->s_fs_info == data;
927ca606
AM
6105@@ -113,7 +118,8 @@ static struct dentry *proc_mount(struct
6106 options = data;
c2e5f7c8
JR
6107
6108 /* Does the mounter have privilege over the pid namespace? */
6109- if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
6110+ if (!vx_ns_capable(ns->user_ns,
6111+ CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
6112 return ERR_PTR(-EPERM);
6113 }
6114
927ca606 6115@@ -196,6 +202,7 @@ void __init proc_root_init(void)
bb20add7 6116 proc_tty_init();
2380c486
JR
6117 proc_mkdir("bus", NULL);
6118 proc_sys_init();
d337f35e
JR
6119+ proc_vx_init();
6120 }
6121
6122 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
927ca606 6123@@ -257,6 +264,7 @@ struct proc_dir_entry proc_root = {
2380c486
JR
6124 .proc_iops = &proc_root_inode_operations,
6125 .proc_fops = &proc_root_operations,
6126 .parent = &proc_root,
6127+ .vx_flags = IATTR_ADMIN | IATTR_WATCH,
927ca606 6128 .subdir = RB_ROOT,
a168f21d 6129 .name = "/proc",
2380c486 6130 };
c2806d43
AM
6131diff -urNp -x '*.orig' linux-4.4/fs/proc/self.c linux-4.4/fs/proc/self.c
6132--- linux-4.4/fs/proc/self.c 2021-02-24 16:56:11.809087043 +0100
6133+++ linux-4.4/fs/proc/self.c 2021-02-24 16:56:24.576155779 +0100
927ca606
AM
6134@@ -1,6 +1,7 @@
6135 #include <linux/sched.h>
09be7631
JR
6136 #include <linux/slab.h>
6137 #include <linux/pid_namespace.h>
b00e13aa 6138+#include <linux/vserver/inode.h>
09be7631 6139 #include "internal.h"
b00e13aa
AM
6140
6141 /*
c2806d43 6142@@ -59,6 +60,8 @@ int proc_setup_self(struct super_block *
09be7631
JR
6143 self = d_alloc_name(s->s_root, "self");
6144 if (self) {
48cb6a3c 6145 struct inode *inode = new_inode(s);
09be7631
JR
6146+
6147+ // self->vx_flags = IATTR_PROC_SYMLINK;
6148 if (inode) {
6149 inode->i_ino = self_inum;
6150 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
c2806d43
AM
6151diff -urNp -x '*.orig' linux-4.4/fs/proc/stat.c linux-4.4/fs/proc/stat.c
6152--- linux-4.4/fs/proc/stat.c 2016-01-11 00:01:32.000000000 +0100
6153+++ linux-4.4/fs/proc/stat.c 2021-02-24 16:56:24.576155779 +0100
537831f9 6154@@ -9,8 +9,10 @@
1e8b8f9b
AM
6155 #include <linux/slab.h>
6156 #include <linux/time.h>
6157 #include <linux/irqnr.h>
6158+#include <linux/vserver/cvirt.h>
265de2f7 6159 #include <linux/cputime.h>
1e8b8f9b 6160 #include <linux/tick.h>
537831f9
AM
6161+#include <linux/cpuset.h>
6162
6163 #ifndef arch_irq_stat_cpu
6164 #define arch_irq_stat_cpu(cpu) 0
6165@@ -87,14 +89,26 @@ static int show_stat(struct seq_file *p,
6166 u64 sum_softirq = 0;
6167 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
6168 struct timespec boottime;
6169+ cpumask_var_t cpus_allowed;
6170+ bool virt_cpu = vx_flags(VXF_VIRT_CPU, 0);
6171
6172 user = nice = system = idle = iowait =
1e8b8f9b
AM
6173 irq = softirq = steal = 0;
6174 guest = guest_nice = 0;
6175 getboottime(&boottime);
6176+
6177+ if (vx_flags(VXF_VIRT_UPTIME, 0))
6178+ vx_vsi_boottime(&boottime);
537831f9
AM
6179+
6180+ if (virt_cpu)
6181+ cpuset_cpus_allowed(current, cpus_allowed);
1e8b8f9b
AM
6182+
6183 jif = boottime.tv_sec;
6184
6185 for_each_possible_cpu(i) {
537831f9
AM
6186+ if (virt_cpu && !cpumask_test_cpu(i, cpus_allowed))
6187+ continue;
6188+
6189 user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
6190 nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
6191 system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
6192@@ -131,6 +145,9 @@ static int show_stat(struct seq_file *p,
6193 seq_putc(p, '\n');
6194
6195 for_each_online_cpu(i) {
6196+ if (virt_cpu && !cpumask_test_cpu(i, cpus_allowed))
6197+ continue;
6198+
6199 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
6200 user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
6201 nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
c2806d43
AM
6202diff -urNp -x '*.orig' linux-4.4/fs/proc/uptime.c linux-4.4/fs/proc/uptime.c
6203--- linux-4.4/fs/proc/uptime.c 2016-01-11 00:01:32.000000000 +0100
6204+++ linux-4.4/fs/proc/uptime.c 2021-02-24 16:56:24.576155779 +0100
f6c5ef8b 6205@@ -5,6 +5,7 @@
ec22aa5c
AM
6206 #include <linux/seq_file.h>
6207 #include <linux/time.h>
f6c5ef8b 6208 #include <linux/kernel_stat.h>
ec22aa5c 6209+#include <linux/vserver/cvirt.h>
265de2f7 6210 #include <linux/cputime.h>
ec22aa5c
AM
6211
6212 static int uptime_proc_show(struct seq_file *m, void *v)
c2e5f7c8 6213@@ -24,6 +25,10 @@ static int uptime_proc_show(struct seq_f
f6c5ef8b
AM
6214 nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
6215 idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
6216 idle.tv_nsec = rem;
ec22aa5c
AM
6217+
6218+ if (vx_flags(VXF_VIRT_UPTIME, 0))
6219+ vx_vsi_uptime(&uptime, &idle);
6220+
6221 seq_printf(m, "%lu.%02lu %lu.%02lu\n",
6222 (unsigned long) uptime.tv_sec,
6223 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
c2806d43
AM
6224diff -urNp -x '*.orig' linux-4.4/fs/proc_namespace.c linux-4.4/fs/proc_namespace.c
6225--- linux-4.4/fs/proc_namespace.c 2021-02-24 16:56:11.812420481 +0100
6226+++ linux-4.4/fs/proc_namespace.c 2021-02-24 16:56:24.576155779 +0100
927ca606 6227@@ -46,6 +46,8 @@ static int show_sb_opts(struct seq_file
db55b927
AM
6228 { MS_DIRSYNC, ",dirsync" },
6229 { MS_MANDLOCK, ",mand" },
927ca606 6230 { MS_LAZYTIME, ",lazytime" },
db55b927
AM
6231+ { MS_TAGGED, ",tag" },
6232+ { MS_NOTAGCHECK, ",notagcheck" },
6233 { 0, NULL }
6234 };
6235 const struct proc_fs_info *fs_infop;
927ca606 6236@@ -82,6 +84,38 @@ static inline void mangle(struct seq_fil
db55b927
AM
6237 seq_escape(m, s, " \t\n\\");
6238 }
6239
61b0c03f
JR
6240+#ifdef CONFIG_VSERVER_EXTRA_MNT_CHECK
6241+
db55b927
AM
6242+static int mnt_is_reachable(struct vfsmount *vfsmnt)
6243+{
6244+ struct path root;
6245+ struct dentry *point;
6246+ struct mount *mnt = real_mount(vfsmnt);
6247+ struct mount *root_mnt;
6248+ int ret;
6249+
6250+ if (mnt == mnt->mnt_ns->root)
6251+ return 1;
6252+
98d9a5b1 6253+ rcu_read_lock();
db55b927
AM
6254+ root = current->fs->root;
6255+ root_mnt = real_mount(root.mnt);
6256+ point = root.dentry;
6257+
6258+ while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) {
6259+ point = mnt->mnt_mountpoint;
6260+ mnt = mnt->mnt_parent;
6261+ }
98d9a5b1 6262+ rcu_read_unlock();
db55b927
AM
6263+
6264+ ret = (mnt == root_mnt) && is_subdir(point, root.dentry);
db55b927
AM
6265+ return ret;
6266+}
61b0c03f
JR
6267+
6268+#else
6269+#define mnt_is_reachable(v) (1)
6270+#endif
db55b927
AM
6271+
6272 static void show_type(struct seq_file *m, struct super_block *sb)
6273 {
6274 mangle(m, sb->s_type->name);
927ca606 6275@@ -99,6 +133,17 @@ static int show_vfsmnt(struct seq_file *
db55b927
AM
6276 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
6277 struct super_block *sb = mnt_path.dentry->d_sb;
6278
6279+ if (vx_flags(VXF_HIDE_MOUNT, 0))
6280+ return SEQ_SKIP;
6281+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6282+ return SEQ_SKIP;
6283+
6284+ if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
6285+ mnt == current->fs->root.mnt) {
6286+ seq_puts(m, "/dev/root / ");
6287+ goto type;
6288+ }
6289+
6290 if (sb->s_op->show_devname) {
6291 err = sb->s_op->show_devname(m, mnt_path.dentry);
6292 if (err)
927ca606
AM
6293@@ -112,6 +157,7 @@ static int show_vfsmnt(struct seq_file *
6294 if (err)
6295 goto out;
db55b927
AM
6296 seq_putc(m, ' ');
6297+type:
6298 show_type(m, sb);
6299 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
6300 err = show_sb_opts(m, sb);
927ca606
AM
6301@@ -133,6 +179,11 @@ static int show_mountinfo(struct seq_fil
6302 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
db55b927
AM
6303 int err = 0;
6304
6305+ if (vx_flags(VXF_HIDE_MOUNT, 0))
6306+ return SEQ_SKIP;
6307+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6308+ return SEQ_SKIP;
6309+
6310 seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
6311 MAJOR(sb->s_dev), MINOR(sb->s_dev));
6312 if (sb->s_op->show_path)
927ca606 6313@@ -193,6 +244,17 @@ static int show_vfsstat(struct seq_file
db55b927
AM
6314 struct super_block *sb = mnt_path.dentry->d_sb;
6315 int err = 0;
6316
6317+ if (vx_flags(VXF_HIDE_MOUNT, 0))
6318+ return SEQ_SKIP;
6319+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6320+ return SEQ_SKIP;
6321+
6322+ if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
6323+ mnt == current->fs->root.mnt) {
6324+ seq_puts(m, "device /dev/root mounted on / ");
6325+ goto type;
6326+ }
6327+
6328 /* device */
6329 if (sb->s_op->show_devname) {
6330 seq_puts(m, "device ");
927ca606
AM
6331@@ -214,7 +276,7 @@ static int show_vfsstat(struct seq_file
6332 if (err)
6333 goto out;
db55b927
AM
6334 seq_putc(m, ' ');
6335-
6336+type:
6337 /* file system type */
6338 seq_puts(m, "with fstype ");
6339 show_type(m, sb);
c2806d43
AM
6340diff -urNp -x '*.orig' linux-4.4/fs/quota/dquot.c linux-4.4/fs/quota/dquot.c
6341--- linux-4.4/fs/quota/dquot.c 2021-02-24 16:56:11.812420481 +0100
6342+++ linux-4.4/fs/quota/dquot.c 2021-02-24 16:56:24.576155779 +0100
48cb6a3c 6343@@ -1644,6 +1644,9 @@ int __dquot_alloc_space(struct inode *in
76514441 6344 int reserve = flags & DQUOT_SPACE_RESERVE;
927ca606 6345 struct dquot **dquots;
76514441
AM
6346
6347+ if ((ret = dl_alloc_space(inode, number)))
6348+ return ret;
6349+
bb20add7
AM
6350 if (!dquot_active(inode)) {
6351 inode_incr_space(inode, number, reserve);
6352 goto out;
48cb6a3c 6353@@ -1696,6 +1699,9 @@ int dquot_alloc_inode(struct inode *inod
1e8b8f9b 6354 struct dquot_warn warn[MAXQUOTAS];
927ca606 6355 struct dquot * const *dquots;
76514441
AM
6356
6357+ if ((ret = dl_alloc_inode(inode)))
6358+ return ret;
6359+
93de0823 6360 if (!dquot_active(inode))
bb20add7
AM
6361 return 0;
6362 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
48cb6a3c 6363@@ -1798,6 +1804,8 @@ void __dquot_free_space(struct inode *in
927ca606 6364 struct dquot **dquots;
bb20add7 6365 int reserve = flags & DQUOT_SPACE_RESERVE, index;
76514441
AM
6366
6367+ dl_free_space(inode, number);
6368+
93de0823 6369 if (!dquot_active(inode)) {
bb20add7
AM
6370 inode_decr_space(inode, number, reserve);
6371 return;
48cb6a3c 6372@@ -1842,6 +1850,8 @@ void dquot_free_inode(struct inode *inod
927ca606 6373 struct dquot * const *dquots;
bb20add7 6374 int index;
76514441
AM
6375
6376+ dl_free_inode(inode);
6377+
93de0823 6378 if (!dquot_active(inode))
bb20add7
AM
6379 return;
6380
c2806d43
AM
6381diff -urNp -x '*.orig' linux-4.4/fs/quota/quota.c linux-4.4/fs/quota/quota.c
6382--- linux-4.4/fs/quota/quota.c 2021-02-24 16:56:11.812420481 +0100
6383+++ linux-4.4/fs/quota/quota.c 2021-02-24 16:56:24.576155779 +0100
78865d5b
AM
6384@@ -8,6 +8,7 @@
6385 #include <linux/fs.h>
6386 #include <linux/namei.h>
6387 #include <linux/slab.h>
d337f35e 6388+#include <linux/vs_context.h>
78865d5b 6389 #include <asm/current.h>
92598135 6390 #include <linux/uaccess.h>
78865d5b 6391 #include <linux/kernel.h>
8931d859 6392@@ -39,7 +40,7 @@ static int check_quotactl_permission(str
78865d5b
AM
6393 break;
6394 /*FALLTHROUGH*/
6395 default:
d337f35e
JR
6396- if (!capable(CAP_SYS_ADMIN))
6397+ if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
6398 return -EPERM;
6399 }
6400
8931d859 6401@@ -704,6 +705,46 @@ static int do_quotactl(struct super_bloc
b00e13aa
AM
6402
6403 #ifdef CONFIG_BLOCK
d337f35e 6404
d337f35e
JR
6405+#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6406+
6407+#include <linux/vroot.h>
2380c486
JR
6408+#include <linux/major.h>
6409+#include <linux/module.h>
d337f35e 6410+#include <linux/kallsyms.h>
2380c486 6411+#include <linux/vserver/debug.h>
d337f35e
JR
6412+
6413+static vroot_grb_func *vroot_get_real_bdev = NULL;
6414+
763640ca 6415+static DEFINE_SPINLOCK(vroot_grb_lock);
d337f35e
JR
6416+
6417+int register_vroot_grb(vroot_grb_func *func) {
6418+ int ret = -EBUSY;
6419+
6420+ spin_lock(&vroot_grb_lock);
6421+ if (!vroot_get_real_bdev) {
6422+ vroot_get_real_bdev = func;
6423+ ret = 0;
6424+ }
6425+ spin_unlock(&vroot_grb_lock);
6426+ return ret;
6427+}
6428+EXPORT_SYMBOL(register_vroot_grb);
6429+
6430+int unregister_vroot_grb(vroot_grb_func *func) {
6431+ int ret = -EINVAL;
6432+
6433+ spin_lock(&vroot_grb_lock);
6434+ if (vroot_get_real_bdev) {
6435+ vroot_get_real_bdev = NULL;
6436+ ret = 0;
6437+ }
6438+ spin_unlock(&vroot_grb_lock);
6439+ return ret;
6440+}
6441+EXPORT_SYMBOL(unregister_vroot_grb);
6442+
6443+#endif
6444+
db55b927
AM
6445 /* Return 1 if 'cmd' will block on frozen filesystem */
6446 static int quotactl_cmd_write(int cmd)
6447 {
8931d859 6448@@ -739,6 +780,22 @@ static struct super_block *quotactl_bloc
2380c486
JR
6449 putname(tmp);
6450 if (IS_ERR(bdev))
6451 return ERR_CAST(bdev);
6452+#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6453+ if (bdev && bdev->bd_inode &&
537831f9 6454+ imajor(bdev->bd_inode) == VROOT_MAJOR) {
2380c486
JR
6455+ struct block_device *bdnew = (void *)-EINVAL;
6456+
6457+ if (vroot_get_real_bdev)
6458+ bdnew = vroot_get_real_bdev(bdev);
6459+ else
6460+ vxdprintk(VXD_CBIT(misc, 0),
6461+ "vroot_get_real_bdev not set");
6462+ bdput(bdev);
6463+ if (IS_ERR(bdnew))
6464+ return ERR_PTR(PTR_ERR(bdnew));
6465+ bdev = bdnew;
6466+ }
6467+#endif
db55b927
AM
6468 if (quotactl_cmd_write(cmd))
6469 sb = get_super_thawed(bdev);
6470 else
c2806d43
AM
6471diff -urNp -x '*.orig' linux-4.4/fs/stat.c linux-4.4/fs/stat.c
6472--- linux-4.4/fs/stat.c 2021-02-24 16:56:11.822420796 +0100
6473+++ linux-4.4/fs/stat.c 2021-02-24 16:56:24.576155779 +0100
2380c486 6474@@ -26,6 +26,7 @@ void generic_fillattr(struct inode *inod
d337f35e
JR
6475 stat->nlink = inode->i_nlink;
6476 stat->uid = inode->i_uid;
6477 stat->gid = inode->i_gid;
6478+ stat->tag = inode->i_tag;
6479 stat->rdev = inode->i_rdev;
a168f21d 6480 stat->size = i_size_read(inode);
d337f35e 6481 stat->atime = inode->i_atime;
c2806d43
AM
6482diff -urNp -x '*.orig' linux-4.4/fs/statfs.c linux-4.4/fs/statfs.c
6483--- linux-4.4/fs/statfs.c 2016-01-11 00:01:32.000000000 +0100
6484+++ linux-4.4/fs/statfs.c 2021-02-24 16:56:24.576155779 +0100
93de0823 6485@@ -7,6 +7,8 @@
76514441
AM
6486 #include <linux/statfs.h>
6487 #include <linux/security.h>
6488 #include <linux/uaccess.h>
6489+#include <linux/vs_base.h>
6490+#include <linux/vs_dlimit.h>
db55b927 6491 #include "internal.h"
76514441 6492
93de0823 6493 static int flags_by_mnt(int mnt_flags)
db55b927 6494@@ -60,6 +62,8 @@ static int statfs_by_dentry(struct dentr
93de0823
AM
6495 retval = dentry->d_sb->s_op->statfs(dentry, buf);
6496 if (retval == 0 && buf->f_frsize == 0)
6497 buf->f_frsize = buf->f_bsize;
6498+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
6499+ vx_vsi_statfs(dentry->d_sb, buf);
76514441
AM
6500 return retval;
6501 }
93de0823 6502
c2806d43
AM
6503diff -urNp -x '*.orig' linux-4.4/fs/super.c linux-4.4/fs/super.c
6504--- linux-4.4/fs/super.c 2021-02-24 16:56:11.822420796 +0100
6505+++ linux-4.4/fs/super.c 2021-02-24 16:56:24.576155779 +0100
bb20add7 6506@@ -33,6 +33,8 @@
be261992 6507 #include <linux/cleancache.h>
1e8b8f9b 6508 #include <linux/fsnotify.h>
92598135 6509 #include <linux/lockdep.h>
1e8b8f9b 6510+#include <linux/magic.h>
be261992
AM
6511+#include <linux/vs_context.h>
6512 #include "internal.h"
6513
6514
3cc86a71
AM
6515@@ -1153,6 +1155,13 @@ mount_fs(struct file_system_type *type,
6516 smp_wmb();
be261992
AM
6517 sb->s_flags |= MS_BORN;
6518
6519+ error = -EPERM;
6520+ if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) &&
6521+ !sb->s_bdev &&
6522+ (sb->s_magic != PROC_SUPER_MAGIC) &&
6523+ (sb->s_magic != DEVPTS_SUPER_MAGIC))
6524+ goto out_sb;
6525+
6526 error = security_sb_kern_mount(sb, flags, secdata);
6527 if (error)
6528 goto out_sb;
c2806d43
AM
6529diff -urNp -x '*.orig' linux-4.4/fs/utimes.c linux-4.4/fs/utimes.c
6530--- linux-4.4/fs/utimes.c 2021-02-24 16:56:11.829087673 +0100
6531+++ linux-4.4/fs/utimes.c 2021-02-24 16:56:24.576155779 +0100
2380c486
JR
6532@@ -8,6 +8,8 @@
6533 #include <linux/stat.h>
d337f35e 6534 #include <linux/utime.h>
2380c486 6535 #include <linux/syscalls.h>
d337f35e
JR
6536+#include <linux/mount.h>
6537+#include <linux/vs_cowbl.h>
6538 #include <asm/uaccess.h>
6539 #include <asm/unistd.h>
6540
c2e5f7c8 6541@@ -52,13 +54,19 @@ static int utimes_common(struct path *pa
76514441
AM
6542 {
6543 int error;
6544 struct iattr newattrs;
6545- struct inode *inode = path->dentry->d_inode;
c2e5f7c8 6546 struct inode *delegated_inode = NULL;
76514441 6547+ struct inode *inode;
b00e13aa
AM
6548+
6549+ error = cow_check_and_break(path);
6550+ if (error)
6551+ goto out;
76514441
AM
6552
6553 error = mnt_want_write(path->mnt);
6554 if (error)
6555 goto out;
6556
76514441
AM
6557+ inode = path->dentry->d_inode;
6558+
6559 if (times && times[0].tv_nsec == UTIME_NOW &&
6560 times[1].tv_nsec == UTIME_NOW)
6561 times = NULL;
c2806d43
AM
6562diff -urNp -x '*.orig' linux-4.4/fs/xattr.c linux-4.4/fs/xattr.c
6563--- linux-4.4/fs/xattr.c 2021-02-24 16:56:11.829087673 +0100
6564+++ linux-4.4/fs/xattr.c 2021-02-24 16:56:24.579489218 +0100
537831f9 6565@@ -21,6 +21,7 @@
d337f35e 6566 #include <linux/audit.h>
1e8b8f9b 6567 #include <linux/vmalloc.h>
537831f9 6568 #include <linux/posix_acl_xattr.h>
d337f35e 6569+#include <linux/mount.h>
d337f35e 6570
1e8b8f9b 6571 #include <asm/uaccess.h>
d337f35e 6572
537831f9 6573@@ -52,7 +53,7 @@ xattr_permission(struct inode *inode, co
763640ca 6574 * The trusted.* namespace can only be accessed by privileged users.
e03b8c3c 6575 */
763640ca
JR
6576 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
6577- if (!capable(CAP_SYS_ADMIN))
a168f21d
AM
6578+ if (!vx_capable(CAP_SYS_ADMIN, VXC_FS_TRUSTED))
6579 return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
6580 return 0;
6581 }
c2806d43
AM
6582diff -urNp -x '*.orig' linux-4.4/include/linux/capability.h linux-4.4/include/linux/capability.h
6583--- linux-4.4/include/linux/capability.h 2021-02-24 16:56:11.855755179 +0100
6584+++ linux-4.4/include/linux/capability.h 2021-02-24 16:56:24.579489218 +0100
927ca606 6585@@ -77,7 +77,8 @@ extern const kernel_cap_t __cap_init_eff
bb20add7
AM
6586 #else /* HAND-CODED capability initializers */
6587
6588 #define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1)
6589-#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1)
6590+#define CAP_LAST_U32_VALID_MASK ((CAP_TO_MASK(CAP_LAST_CAP + 1) -1) \
6591+ | CAP_TO_MASK(CAP_CONTEXT))
6592
6593 # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }})
6594 # define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }})
c2806d43
AM
6595diff -urNp -x '*.orig' linux-4.4/include/linux/cred.h linux-4.4/include/linux/cred.h
6596--- linux-4.4/include/linux/cred.h 2021-02-24 16:56:11.859088617 +0100
6597+++ linux-4.4/include/linux/cred.h 2021-02-24 16:56:24.579489218 +0100
3cc86a71 6598@@ -165,6 +165,7 @@ extern void exit_creds(struct task_struc
1163e6ab
AM
6599 extern int copy_creds(struct task_struct *, unsigned long);
6600 extern const struct cred *get_task_cred(struct task_struct *);
6601 extern struct cred *cred_alloc_blank(void);
6602+extern struct cred *__prepare_creds(const struct cred *);
6603 extern struct cred *prepare_creds(void);
6604 extern struct cred *prepare_exec_creds(void);
6605 extern int commit_creds(struct cred *);
3cc86a71 6606@@ -225,6 +226,31 @@ static inline bool cap_ambient_invariant
927ca606 6607 cred->cap_inheritable));
3bac966d 6608 }
3bac966d
AM
6609
6610+static inline void set_cred_subscribers(struct cred *cred, int n)
6611+{
6612+#ifdef CONFIG_DEBUG_CREDENTIALS
6613+ atomic_set(&cred->subscribers, n);
6614+#endif
6615+}
6616+
6617+static inline int read_cred_subscribers(const struct cred *cred)
6618+{
6619+#ifdef CONFIG_DEBUG_CREDENTIALS
6620+ return atomic_read(&cred->subscribers);
6621+#else
6622+ return 0;
6623+#endif
6624+}
6625+
6626+static inline void alter_cred_subscribers(const struct cred *_cred, int n)
6627+{
6628+#ifdef CONFIG_DEBUG_CREDENTIALS
6629+ struct cred *cred = (struct cred *) _cred;
6630+
6631+ atomic_add(n, &cred->subscribers);
6632+#endif
6633+}
6634+
6635 /**
6636 * get_new_cred - Get a reference on a new set of credentials
6637 * @cred: The new credentials to reference
c2806d43
AM
6638diff -urNp -x '*.orig' linux-4.4/include/linux/dcache.h linux-4.4/include/linux/dcache.h
6639--- linux-4.4/include/linux/dcache.h 2021-02-24 16:56:11.859088617 +0100
6640+++ linux-4.4/include/linux/dcache.h 2021-02-24 16:56:24.579489218 +0100
927ca606
AM
6641@@ -10,6 +10,7 @@
6642 #include <linux/cache.h>
6643 #include <linux/rcupdate.h>
6644 #include <linux/lockref.h>
6645+// #include <linux/vs_limit.h>
6646
6647 struct path;
6648 struct vfsmount;
8931d859 6649@@ -352,8 +353,10 @@ extern char *dentry_path(struct dentry *
927ca606
AM
6650 */
6651 static inline struct dentry *dget_dlock(struct dentry *dentry)
6652 {
6653- if (dentry)
6654+ if (dentry) {
6655 dentry->d_lockref.count++;
6656+ // vx_dentry_inc(dentry);
6657+ }
6658 return dentry;
6659 }
6660
c2806d43
AM
6661diff -urNp -x '*.orig' linux-4.4/include/linux/devpts_fs.h linux-4.4/include/linux/devpts_fs.h
6662--- linux-4.4/include/linux/devpts_fs.h 2021-02-24 16:56:11.859088617 +0100
6663+++ linux-4.4/include/linux/devpts_fs.h 2021-02-24 16:56:24.579489218 +0100
927ca606 6664@@ -35,5 +35,4 @@ void devpts_pty_kill(struct inode *inode
2380c486
JR
6665
6666 #endif
d337f35e 6667
2380c486 6668-
d337f35e 6669 #endif /* _LINUX_DEVPTS_FS_H */
c2806d43
AM
6670diff -urNp -x '*.orig' linux-4.4/include/linux/fs.h linux-4.4/include/linux/fs.h
6671--- linux-4.4/include/linux/fs.h 2021-02-24 16:56:11.862422056 +0100
6672+++ linux-4.4/include/linux/fs.h 2021-02-24 16:56:24.579489218 +0100
3cc86a71 6673@@ -232,6 +232,7 @@ typedef void (dax_iodone_t)(struct buffe
2380c486
JR
6674 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
6675 #define ATTR_TIMES_SET (1 << 16)
927ca606
AM
6676 #define ATTR_TOUCH (1 << 17)
6677+#define ATTR_TAG (1 << 18)
d337f35e
JR
6678
6679 /*
bb20add7 6680 * Whiteout is represented by a char device. The following constants define the
3cc86a71 6681@@ -254,6 +255,7 @@ struct iattr {
d337f35e 6682 umode_t ia_mode;
42bc425c
AM
6683 kuid_t ia_uid;
6684 kgid_t ia_gid;
537831f9 6685+ ktag_t ia_tag;
d337f35e
JR
6686 loff_t ia_size;
6687 struct timespec ia_atime;
6688 struct timespec ia_mtime;
3cc86a71 6689@@ -592,7 +594,9 @@ struct inode {
a168f21d 6690 unsigned short i_opflags;
42bc425c
AM
6691 kuid_t i_uid;
6692 kgid_t i_gid;
2380c486 6693- unsigned int i_flags;
537831f9 6694+ ktag_t i_tag;
2380c486
JR
6695+ unsigned short i_flags;
6696+ unsigned short i_vflags;
a168f21d
AM
6697
6698 #ifdef CONFIG_FS_POSIX_ACL
6699 struct posix_acl *i_acl;
3cc86a71 6700@@ -621,6 +625,7 @@ struct inode {
f6c5ef8b
AM
6701 unsigned int __i_nlink;
6702 };
d33d7b00
AM
6703 dev_t i_rdev;
6704+ dev_t i_mdev;
42bc425c 6705 loff_t i_size;
a168f21d
AM
6706 struct timespec i_atime;
6707 struct timespec i_mtime;
c2806d43 6708@@ -820,6 +825,11 @@ static inline gid_t i_gid_read(const str
537831f9
AM
6709 return from_kgid(&init_user_ns, inode->i_gid);
6710 }
6711
61333608 6712+static inline vtag_t i_tag_read(const struct inode *inode)
537831f9
AM
6713+{
6714+ return from_ktag(&init_user_ns, inode->i_tag);
6715+}
6716+
6717 static inline void i_uid_write(struct inode *inode, uid_t uid)
6718 {
6719 inode->i_uid = make_kuid(&init_user_ns, uid);
c2806d43 6720@@ -830,14 +840,19 @@ static inline void i_gid_write(struct in
537831f9
AM
6721 inode->i_gid = make_kgid(&init_user_ns, gid);
6722 }
2380c486 6723
61333608 6724+static inline void i_tag_write(struct inode *inode, vtag_t tag)
537831f9
AM
6725+{
6726+ inode->i_tag = make_ktag(&init_user_ns, tag);
6727+}
6728+
2380c486
JR
6729 static inline unsigned iminor(const struct inode *inode)
6730 {
6731- return MINOR(inode->i_rdev);
6732+ return MINOR(inode->i_mdev);
6733 }
6734
6735 static inline unsigned imajor(const struct inode *inode)
6736 {
6737- return MAJOR(inode->i_rdev);
6738+ return MAJOR(inode->i_mdev);
6739 }
6740
6741 extern struct block_device *I_BDEV(struct inode *inode);
c2806d43 6742@@ -894,6 +909,7 @@ struct file {
d337f35e
JR
6743 loff_t f_pos;
6744 struct fown_struct f_owner;
ec22aa5c 6745 const struct cred *f_cred;
61333608 6746+ vxid_t f_xid;
d337f35e
JR
6747 struct file_ra_state f_ra;
6748
2380c486 6749 u64 f_version;
c2806d43 6750@@ -1028,6 +1044,7 @@ struct file_lock {
2380c486 6751 struct file *fl_file;
d337f35e
JR
6752 loff_t fl_start;
6753 loff_t fl_end;
61333608 6754+ vxid_t fl_xid;
d337f35e
JR
6755
6756 struct fasync_struct * fl_fasync; /* for lease break notifications */
f6c5ef8b 6757 /* for lease breaks: */
c2806d43 6758@@ -1705,6 +1722,7 @@ struct inode_operations {
d4263eb0
JR
6759 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
6760 ssize_t (*listxattr) (struct dentry *, char *, size_t);
6761 int (*removexattr) (struct dentry *, const char *);
6762+ int (*sync_flags) (struct inode *, int, int);
d33d7b00
AM
6763 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
6764 u64 len);
42bc425c 6765 int (*update_time)(struct inode *, struct timespec *, int);
c2806d43 6766@@ -1719,6 +1737,7 @@ ssize_t rw_copy_check_uvector(int type,
537831f9
AM
6767 unsigned long nr_segs, unsigned long fast_segs,
6768 struct iovec *fast_pointer,
6769 struct iovec **ret_pointer);
d337f35e
JR
6770+ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
6771
927ca606
AM
6772 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
6773 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
c2806d43 6774@@ -1784,6 +1803,14 @@ struct super_operations {
927ca606
AM
6775 #else
6776 #define S_DAX 0 /* Make all the DAX code disappear */
6777 #endif
6778+#define S_IXUNLINK 16384 /* Immutable Invert on unlink */
537831f9
AM
6779+
6780+/* Linux-VServer related Inode flags */
6781+
6782+#define V_VALID 1
6783+#define V_XATTR 2
6784+#define V_BARRIER 4 /* Barrier for chroot() */
6785+#define V_COW 8 /* Copy on Write */
6786
6787 /*
6788 * Note that nosuid etc flags are inode-specific: setting some file-system
c2806d43 6789@@ -1808,10 +1835,13 @@ struct super_operations {
537831f9
AM
6790 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
6791 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
6792 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
6793+#define IS_TAGGED(inode) __IS_FLG(inode, MS_TAGGED)
6794
6795 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
6796 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
6797 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
6798+#define IS_IXUNLINK(inode) ((inode)->i_flags & S_IXUNLINK)
6799+#define IS_IXORUNLINK(inode) ((IS_IXUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
6800 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
6801
6802 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
c2806d43 6803@@ -1826,6 +1856,16 @@ struct super_operations {
bb20add7
AM
6804 #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
6805 (inode)->i_rdev == WHITEOUT_DEV)
537831f9
AM
6806
6807+#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_vflags & V_BARRIER))
6808+
6809+#ifdef CONFIG_VSERVER_COWBL
6810+# define IS_COW(inode) (IS_IXUNLINK(inode) && IS_IMMUTABLE(inode))
6811+# define IS_COW_LINK(inode) (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
6812+#else
6813+# define IS_COW(inode) (0)
6814+# define IS_COW_LINK(inode) (0)
6815+#endif
6816+
6817 /*
6818 * Inode state bits. Protected by inode->i_lock
6819 *
c2806d43 6820@@ -2086,6 +2126,9 @@ extern struct kobject *fs_kobj;
bb20add7 6821 extern int locks_mandatory_locked(struct file *);
537831f9
AM
6822 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
6823
6824+#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */
6825+#define ATTR_FLAG_IXUNLINK 1024 /* Immutable invert on unlink */
6826+
6827 /*
6828 * Candidates for mandatory locking have the setgid bit set
6829 * but no group execute bit - an otherwise meaningless combination.
c2806d43 6830@@ -2842,6 +2885,7 @@ extern int dcache_dir_open(struct inode
d337f35e
JR
6831 extern int dcache_dir_close(struct inode *, struct file *);
6832 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
c2e5f7c8
JR
6833 extern int dcache_readdir(struct file *, struct dir_context *);
6834+extern int dcache_readdir_filter(struct file *, struct dir_context *, int (*)(struct dentry *));
76514441 6835 extern int simple_setattr(struct dentry *, struct iattr *);
d337f35e
JR
6836 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
6837 extern int simple_statfs(struct dentry *, struct kstatfs *);
c2806d43
AM
6838diff -urNp -x '*.orig' linux-4.4/include/linux/init_task.h linux-4.4/include/linux/init_task.h
6839--- linux-4.4/include/linux/init_task.h 2021-02-24 16:56:11.865755494 +0100
6840+++ linux-4.4/include/linux/init_task.h 2021-02-24 16:56:24.579489218 +0100
48cb6a3c 6841@@ -269,6 +269,10 @@ extern struct task_group root_task_group
b00e13aa 6842 INIT_VTIME(tsk) \
927ca606
AM
6843 INIT_NUMA_BALANCING(tsk) \
6844 INIT_KASAN(tsk) \
d337f35e
JR
6845+ .xid = 0, \
6846+ .vx_info = NULL, \
6847+ .nid = 0, \
6848+ .nx_info = NULL, \
6849 }
6850
6851
c2806d43
AM
6852diff -urNp -x '*.orig' linux-4.4/include/linux/ipc.h linux-4.4/include/linux/ipc.h
6853--- linux-4.4/include/linux/ipc.h 2016-01-11 00:01:32.000000000 +0100
6854+++ linux-4.4/include/linux/ipc.h 2021-02-24 16:56:24.579489218 +0100
537831f9 6855@@ -16,6 +16,7 @@ struct kern_ipc_perm
d337f35e 6856 key_t key;
537831f9
AM
6857 kuid_t uid;
6858 kgid_t gid;
61333608 6859+ vxid_t xid;
537831f9
AM
6860 kuid_t cuid;
6861 kgid_t cgid;
db55b927 6862 umode_t mode;
c2806d43
AM
6863diff -urNp -x '*.orig' linux-4.4/include/linux/memcontrol.h linux-4.4/include/linux/memcontrol.h
6864--- linux-4.4/include/linux/memcontrol.h 2021-02-24 16:56:11.872422371 +0100
6865+++ linux-4.4/include/linux/memcontrol.h 2021-02-24 16:56:24.579489218 +0100
927ca606
AM
6866@@ -113,6 +113,7 @@ struct cg_proto {
6867 struct mem_cgroup *memcg;
6868 };
6869
6870+
6871 #ifdef CONFIG_MEMCG
6872 struct mem_cgroup_stat_cpu {
6873 long count[MEM_CGROUP_STAT_NSTATS];
1d9ad342 6874@@ -338,6 +339,12 @@ static inline bool mem_cgroup_is_descend
927ca606
AM
6875 return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
6876 }
6877
1d9ad342
AM
6878+extern unsigned long mem_cgroup_mem_usage_pages(struct mem_cgroup *memcg);
6879+extern unsigned long mem_cgroup_mem_limit_pages(struct mem_cgroup *memcg);
6880+extern unsigned long mem_cgroup_memsw_usage_pages(struct mem_cgroup *memcg);
6881+extern unsigned long mem_cgroup_memsw_limit_pages(struct mem_cgroup *memcg);
6882+extern void dump_mem_cgroup(struct mem_cgroup *memcg);
927ca606
AM
6883+
6884 static inline bool mm_match_cgroup(struct mm_struct *mm,
6885 struct mem_cgroup *memcg)
e3afe727 6886 {
c2806d43
AM
6887diff -urNp -x '*.orig' linux-4.4/include/linux/mount.h linux-4.4/include/linux/mount.h
6888--- linux-4.4/include/linux/mount.h 2021-02-24 16:56:11.875755809 +0100
6889+++ linux-4.4/include/linux/mount.h 2021-02-24 16:56:24.579489218 +0100
927ca606 6890@@ -63,6 +63,9 @@ struct mnt_namespace;
bb20add7 6891 #define MNT_MARKED 0x4000000
927ca606 6892 #define MNT_UMOUNT 0x8000000
d337f35e 6893
2380c486
JR
6894+#define MNT_TAGID 0x10000
6895+#define MNT_NOTAG 0x20000
6896+
d337f35e 6897 struct vfsmount {
db55b927
AM
6898 struct dentry *mnt_root; /* root of the mounted tree */
6899 struct super_block *mnt_sb; /* pointer to superblock */
c2806d43
AM
6900diff -urNp -x '*.orig' linux-4.4/include/linux/net.h linux-4.4/include/linux/net.h
6901--- linux-4.4/include/linux/net.h 2021-02-24 16:56:11.879089247 +0100
6902+++ linux-4.4/include/linux/net.h 2021-02-24 16:56:24.582822656 +0100
927ca606
AM
6903@@ -43,6 +43,7 @@ struct net;
6904 #define SOCK_NOSPACE 2
d337f35e
JR
6905 #define SOCK_PASSCRED 3
6906 #define SOCK_PASSSEC 4
927ca606 6907+#define SOCK_USER_SOCKET 5
d337f35e
JR
6908
6909 #ifndef ARCH_HAS_SOCKET_TYPES
6910 /**
c2806d43
AM
6911diff -urNp -x '*.orig' linux-4.4/include/linux/netdevice.h linux-4.4/include/linux/netdevice.h
6912--- linux-4.4/include/linux/netdevice.h 2021-02-24 16:56:12.542443470 +0100
6913+++ linux-4.4/include/linux/netdevice.h 2021-02-24 16:56:24.579489218 +0100
6914@@ -2311,6 +2311,7 @@ static inline int dev_recursion_level(vo
6915
6916 struct net_device *dev_get_by_index(struct net *net, int ifindex);
6917 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
6918+struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex);
6919 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
6920 int netdev_get_name(struct net *net, char *name, int ifindex);
6921 int dev_restart(struct net_device *dev);
6922diff -urNp -x '*.orig' linux-4.4/include/linux/nsproxy.h linux-4.4/include/linux/nsproxy.h
6923--- linux-4.4/include/linux/nsproxy.h 2016-01-11 00:01:32.000000000 +0100
6924+++ linux-4.4/include/linux/nsproxy.h 2021-02-24 16:56:24.582822656 +0100
2380c486 6925@@ -3,6 +3,7 @@
d337f35e 6926
2380c486
JR
6927 #include <linux/spinlock.h>
6928 #include <linux/sched.h>
6929+#include <linux/vserver/debug.h>
6930
6931 struct mnt_namespace;
6932 struct uts_namespace;
bb20add7
AM
6933@@ -63,6 +64,7 @@ extern struct nsproxy init_nsproxy;
6934 */
2380c486
JR
6935
6936 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
6937+struct nsproxy *copy_nsproxy(struct nsproxy *orig);
6938 void exit_task_namespaces(struct task_struct *tsk);
6939 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
6940 void free_nsproxy(struct nsproxy *ns);
bb20add7 6941@@ -70,16 +72,26 @@ int unshare_nsproxy_namespaces(unsigned
b00e13aa 6942 struct cred *, struct fs_struct *);
a168f21d 6943 int __init nsproxy_cache_init(void);
2380c486
JR
6944
6945-static inline void put_nsproxy(struct nsproxy *ns)
6946+#define get_nsproxy(n) __get_nsproxy(n, __FILE__, __LINE__)
d337f35e 6947+
2380c486
JR
6948+static inline void __get_nsproxy(struct nsproxy *ns,
6949+ const char *_file, int _line)
6950 {
6951- if (atomic_dec_and_test(&ns->count)) {
6952- free_nsproxy(ns);
6953- }
6954+ vxlprintk(VXD_CBIT(space, 0), "get_nsproxy(%p[%u])",
6955+ ns, atomic_read(&ns->count), _file, _line);
d337f35e 6956+ atomic_inc(&ns->count);
2380c486
JR
6957 }
6958
6959-static inline void get_nsproxy(struct nsproxy *ns)
6960+#define put_nsproxy(n) __put_nsproxy(n, __FILE__, __LINE__)
d337f35e 6961+
2380c486
JR
6962+static inline void __put_nsproxy(struct nsproxy *ns,
6963+ const char *_file, int _line)
6964 {
6965- atomic_inc(&ns->count);
6966+ vxlprintk(VXD_CBIT(space, 0), "put_nsproxy(%p[%u])",
6967+ ns, atomic_read(&ns->count), _file, _line);
6968+ if (atomic_dec_and_test(&ns->count)) {
6969+ free_nsproxy(ns);
6970+ }
6971 }
d337f35e 6972
763640ca 6973 #endif
c2806d43
AM
6974diff -urNp -x '*.orig' linux-4.4/include/linux/pid.h linux-4.4/include/linux/pid.h
6975--- linux-4.4/include/linux/pid.h 2021-02-24 16:56:11.882422685 +0100
6976+++ linux-4.4/include/linux/pid.h 2021-02-24 16:56:24.582822656 +0100
927ca606 6977@@ -10,7 +10,8 @@ enum pid_type
d337f35e 6978 PIDTYPE_SID,
927ca606
AM
6979 PIDTYPE_MAX,
6980 /* only valid to __task_pid_nr_ns() */
6981- __PIDTYPE_TGID
6982+ __PIDTYPE_TGID,
6983+ __PIDTYPE_REALPID
d337f35e
JR
6984 };
6985
6986 /*
927ca606 6987@@ -172,6 +173,7 @@ static inline pid_t pid_nr(struct pid *p
2380c486
JR
6988 }
6989
6990 pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
6991+pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns);
6992 pid_t pid_vnr(struct pid *pid);
6993
6994 #define do_each_pid_task(pid, type, task) \
c2806d43
AM
6995diff -urNp -x '*.orig' linux-4.4/include/linux/quotaops.h linux-4.4/include/linux/quotaops.h
6996--- linux-4.4/include/linux/quotaops.h 2021-02-24 16:56:11.885756124 +0100
6997+++ linux-4.4/include/linux/quotaops.h 2021-02-24 16:56:24.582822656 +0100
e22b5178
AM
6998@@ -8,6 +8,7 @@
6999 #define _LINUX_QUOTAOPS_
7000
7001 #include <linux/fs.h>
7002+#include <linux/vs_dlimit.h>
7003
76514441
AM
7004 #define DQUOT_SPACE_WARN 0x1
7005 #define DQUOT_SPACE_RESERVE 0x2
48cb6a3c 7006@@ -221,11 +222,12 @@ static inline void dquot_drop(struct ino
76514441 7007
927ca606 7008 static inline int dquot_alloc_inode(struct inode *inode)
76514441
AM
7009 {
7010- return 0;
7011+ return dl_alloc_inode(inode);
7012 }
7013
927ca606 7014 static inline void dquot_free_inode(struct inode *inode)
e22b5178 7015 {
76514441
AM
7016+ dl_free_inode(inode);
7017 }
7018
7019 static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
48cb6a3c 7020@@ -236,6 +238,10 @@ static inline int dquot_transfer(struct
76514441
AM
7021 static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
7022 int flags)
7023 {
7024+ int ret = 0;
7025+
7026+ if ((ret = dl_alloc_space(inode, number)))
7027+ return ret;
7028 if (!(flags & DQUOT_SPACE_RESERVE))
7029 inode_add_bytes(inode, number);
7030 return 0;
48cb6a3c 7031@@ -246,6 +252,7 @@ static inline void __dquot_free_space(st
76514441
AM
7032 {
7033 if (!(flags & DQUOT_SPACE_RESERVE))
7034 inode_sub_bytes(inode, number);
7035+ dl_free_space(inode, number);
7036 }
7037
7038 static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
c2806d43
AM
7039diff -urNp -x '*.orig' linux-4.4/include/linux/sched.h linux-4.4/include/linux/sched.h
7040--- linux-4.4/include/linux/sched.h 2021-02-24 16:56:11.889089562 +0100
7041+++ linux-4.4/include/linux/sched.h 2021-02-24 16:56:24.582822656 +0100
48cb6a3c 7042@@ -1611,6 +1611,14 @@ struct task_struct {
2380c486 7043 #endif
42bc425c 7044 struct seccomp seccomp;
2380c486
JR
7045
7046+/* vserver context data */
7047+ struct vx_info *vx_info;
7048+ struct nx_info *nx_info;
d337f35e 7049+
61333608
AM
7050+ vxid_t xid;
7051+ vnid_t nid;
7052+ vtag_t tag;
2380c486
JR
7053+
7054 /* Thread group tracking */
c2806d43
AM
7055 u64 parent_exec_id;
7056 u64 self_exec_id;
7057@@ -1940,6 +1948,11 @@ struct pid_namespace;
ec22aa5c
AM
7058 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
7059 struct pid_namespace *ns);
d337f35e 7060
2380c486
JR
7061+#include <linux/vserver/base.h>
7062+#include <linux/vserver/context.h>
7063+#include <linux/vserver/debug.h>
7064+#include <linux/vserver/pid.h>
7065+
7066 static inline pid_t task_pid_nr(struct task_struct *tsk)
7067 {
7068 return tsk->pid;
c2806d43 7069@@ -1953,7 +1966,8 @@ static inline pid_t task_pid_nr_ns(struc
d337f35e 7070
2380c486
JR
7071 static inline pid_t task_pid_vnr(struct task_struct *tsk)
7072 {
ec22aa5c
AM
7073- return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
7074+ // return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
7075+ return vx_map_pid(__task_pid_nr_ns(tsk, PIDTYPE_PID, NULL));
2380c486 7076 }
d337f35e 7077
d337f35e 7078
c2806d43
AM
7079diff -urNp -x '*.orig' linux-4.4/include/linux/shmem_fs.h linux-4.4/include/linux/shmem_fs.h
7080--- linux-4.4/include/linux/shmem_fs.h 2021-02-24 16:56:11.889089562 +0100
7081+++ linux-4.4/include/linux/shmem_fs.h 2021-02-24 16:56:24.582822656 +0100
bb20add7 7082@@ -10,6 +10,9 @@
2380c486 7083
a168f21d 7084 /* inode in-kernel data */
2380c486
JR
7085
7086+#define TMPFS_SUPER_MAGIC 0x01021994
7087+
7088+
7089 struct shmem_inode_info {
7090 spinlock_t lock;
bb20add7 7091 unsigned int seals; /* shmem seals */
c2806d43
AM
7092diff -urNp -x '*.orig' linux-4.4/include/linux/stat.h linux-4.4/include/linux/stat.h
7093--- linux-4.4/include/linux/stat.h 2016-01-11 00:01:32.000000000 +0100
7094+++ linux-4.4/include/linux/stat.h 2021-02-24 16:56:24.582822656 +0100
537831f9 7095@@ -25,6 +25,7 @@ struct kstat {
2380c486 7096 unsigned int nlink;
42bc425c
AM
7097 kuid_t uid;
7098 kgid_t gid;
8ce283e1 7099+ ktag_t tag;
2380c486
JR
7100 dev_t rdev;
7101 loff_t size;
7102 struct timespec atime;
c2806d43
AM
7103diff -urNp -x '*.orig' linux-4.4/include/linux/sunrpc/auth.h linux-4.4/include/linux/sunrpc/auth.h
7104--- linux-4.4/include/linux/sunrpc/auth.h 2016-01-11 00:01:32.000000000 +0100
7105+++ linux-4.4/include/linux/sunrpc/auth.h 2021-02-24 16:56:24.582822656 +0100
927ca606 7106@@ -40,6 +40,7 @@ enum {
2380c486 7107 struct auth_cred {
b00e13aa
AM
7108 kuid_t uid;
7109 kgid_t gid;
7110+ ktag_t tag;
2380c486 7111 struct group_info *group_info;
db55b927 7112 const char *principal;
c2e5f7c8 7113 unsigned long ac_flags;
c2806d43
AM
7114diff -urNp -x '*.orig' linux-4.4/include/linux/sunrpc/clnt.h linux-4.4/include/linux/sunrpc/clnt.h
7115--- linux-4.4/include/linux/sunrpc/clnt.h 2021-02-24 16:56:11.892423000 +0100
7116+++ linux-4.4/include/linux/sunrpc/clnt.h 2021-02-24 16:56:24.582822656 +0100
c2e5f7c8 7117@@ -51,7 +51,8 @@ struct rpc_clnt {
2380c486 7118 cl_discrtry : 1,/* disconnect before retry */
c2e5f7c8 7119 cl_noretranstimeo: 1,/* No retransmit timeouts */
2380c486
JR
7120 cl_autobind : 1,/* use getport() */
7121- cl_chatty : 1;/* be verbose */
7122+ cl_chatty : 1,/* be verbose */
7123+ cl_tag : 1;/* context tagging */
d337f35e 7124
2380c486
JR
7125 struct rpc_rtt * cl_rtt; /* RTO estimator data */
7126 const struct rpc_timeout *cl_timeout; /* Timeout strategy */
c2806d43
AM
7127diff -urNp -x '*.orig' linux-4.4/include/linux/types.h linux-4.4/include/linux/types.h
7128--- linux-4.4/include/linux/types.h 2016-01-11 00:01:32.000000000 +0100
7129+++ linux-4.4/include/linux/types.h 2021-02-24 16:56:24.582822656 +0100
537831f9 7130@@ -32,6 +32,9 @@ typedef __kernel_uid32_t uid_t;
2380c486
JR
7131 typedef __kernel_gid32_t gid_t;
7132 typedef __kernel_uid16_t uid16_t;
7133 typedef __kernel_gid16_t gid16_t;
61333608
AM
7134+typedef unsigned int vxid_t;
7135+typedef unsigned int vnid_t;
7136+typedef unsigned int vtag_t;
2380c486
JR
7137
7138 typedef unsigned long uintptr_t;
7139
c2806d43
AM
7140diff -urNp -x '*.orig' linux-4.4/include/linux/uidgid.h linux-4.4/include/linux/uidgid.h
7141--- linux-4.4/include/linux/uidgid.h 2016-01-11 00:01:32.000000000 +0100
7142+++ linux-4.4/include/linux/uidgid.h 2021-02-24 16:56:24.582822656 +0100
bb20add7 7143@@ -21,13 +21,17 @@ typedef struct {
537831f9
AM
7144 uid_t val;
7145 } kuid_t;
7146
7147-
7148 typedef struct {
7149 gid_t val;
7150 } kgid_t;
7151
7152+typedef struct {
61333608 7153+ vtag_t val;
537831f9
AM
7154+} ktag_t;
7155+
7156 #define KUIDT_INIT(value) (kuid_t){ value }
7157 #define KGIDT_INIT(value) (kgid_t){ value }
7158+#define KTAGT_INIT(value) (ktag_t){ value }
7159
927ca606 7160 #ifdef CONFIG_MULTIUSER
537831f9 7161 static inline uid_t __kuid_val(kuid_t uid)
927ca606 7162@@ -51,11 +55,18 @@ static inline gid_t __kgid_val(kgid_t gi
537831f9 7163 }
927ca606 7164 #endif
537831f9 7165
61333608 7166+static inline vtag_t __ktag_val(ktag_t tag)
537831f9
AM
7167+{
7168+ return tag.val;
7169+}
7170+
537831f9
AM
7171 #define GLOBAL_ROOT_UID KUIDT_INIT(0)
7172 #define GLOBAL_ROOT_GID KGIDT_INIT(0)
7173+#define GLOBAL_ROOT_TAG KTAGT_INIT(0)
7174
7175 #define INVALID_UID KUIDT_INIT(-1)
7176 #define INVALID_GID KGIDT_INIT(-1)
7177+#define INVALID_TAG KTAGT_INIT(-1)
7178
7179 static inline bool uid_eq(kuid_t left, kuid_t right)
7180 {
927ca606 7181@@ -67,6 +78,11 @@ static inline bool gid_eq(kgid_t left, k
537831f9
AM
7182 return __kgid_val(left) == __kgid_val(right);
7183 }
7184
7185+static inline bool tag_eq(ktag_t left, ktag_t right)
7186+{
7187+ return __ktag_val(left) == __ktag_val(right);
7188+}
7189+
7190 static inline bool uid_gt(kuid_t left, kuid_t right)
7191 {
7192 return __kuid_val(left) > __kuid_val(right);
927ca606
AM
7193@@ -117,13 +133,21 @@ static inline bool gid_valid(kgid_t gid)
7194 return __kgid_val(gid) != (gid_t) -1;
537831f9
AM
7195 }
7196
7197+static inline bool tag_valid(ktag_t tag)
7198+{
7199+ return !tag_eq(tag, INVALID_TAG);
7200+}
7201+
7202 #ifdef CONFIG_USER_NS
7203
7204 extern kuid_t make_kuid(struct user_namespace *from, uid_t uid);
7205 extern kgid_t make_kgid(struct user_namespace *from, gid_t gid);
c90fe048 7206+extern ktag_t make_ktag(struct user_namespace *from, gid_t gid);
537831f9
AM
7207
7208 extern uid_t from_kuid(struct user_namespace *to, kuid_t uid);
7209 extern gid_t from_kgid(struct user_namespace *to, kgid_t gid);
61333608 7210+extern vtag_t from_ktag(struct user_namespace *to, ktag_t tag);
537831f9
AM
7211+
7212 extern uid_t from_kuid_munged(struct user_namespace *to, kuid_t uid);
7213 extern gid_t from_kgid_munged(struct user_namespace *to, kgid_t gid);
7214
927ca606 7215@@ -149,6 +173,11 @@ static inline kgid_t make_kgid(struct us
537831f9
AM
7216 return KGIDT_INIT(gid);
7217 }
7218
61333608 7219+static inline ktag_t make_ktag(struct user_namespace *from, vtag_t tag)
537831f9
AM
7220+{
7221+ return KTAGT_INIT(tag);
7222+}
7223+
7224 static inline uid_t from_kuid(struct user_namespace *to, kuid_t kuid)
7225 {
7226 return __kuid_val(kuid);
927ca606 7227@@ -159,6 +188,11 @@ static inline gid_t from_kgid(struct use
537831f9
AM
7228 return __kgid_val(kgid);
7229 }
7230
61333608 7231+static inline vtag_t from_ktag(struct user_namespace *to, ktag_t ktag)
537831f9
AM
7232+{
7233+ return __ktag_val(ktag);
7234+}
7235+
7236 static inline uid_t from_kuid_munged(struct user_namespace *to, kuid_t kuid)
7237 {
7238 uid_t uid = from_kuid(to, kuid);
c2806d43
AM
7239diff -urNp -x '*.orig' linux-4.4/include/linux/vroot.h linux-4.4/include/linux/vroot.h
7240--- linux-4.4/include/linux/vroot.h 1970-01-01 01:00:00.000000000 +0100
7241+++ linux-4.4/include/linux/vroot.h 2021-02-24 16:56:24.582822656 +0100
2380c486
JR
7242@@ -0,0 +1,51 @@
7243+
7244+/*
7245+ * include/linux/vroot.h
7246+ *
c2806d43
AM
7247+ * written by Herbert P?tzl, 9/11/2002
7248+ * ported to 2.6 by Herbert P?tzl, 30/12/2004
2380c486 7249+ *
c2806d43 7250+ * Copyright (C) 2002-2007 by Herbert P?tzl.
2380c486
JR
7251+ * Redistribution of this file is permitted under the
7252+ * GNU General Public License.
7253+ */
7254+
7255+#ifndef _LINUX_VROOT_H
7256+#define _LINUX_VROOT_H
7257+
7258+
7259+#ifdef __KERNEL__
7260+
7261+/* Possible states of device */
7262+enum {
7263+ Vr_unbound,
7264+ Vr_bound,
7265+};
7266+
7267+struct vroot_device {
7268+ int vr_number;
7269+ int vr_refcnt;
7270+
7271+ struct semaphore vr_ctl_mutex;
7272+ struct block_device *vr_device;
7273+ int vr_state;
7274+};
7275+
7276+
7277+typedef struct block_device *(vroot_grb_func)(struct block_device *);
7278+
7279+extern int register_vroot_grb(vroot_grb_func *);
7280+extern int unregister_vroot_grb(vroot_grb_func *);
7281+
7282+#endif /* __KERNEL__ */
7283+
7284+#define MAX_VROOT_DEFAULT 8
7285+
7286+/*
7287+ * IOCTL commands --- we will commandeer 0x56 ('V')
7288+ */
7289+
7290+#define VROOT_SET_DEV 0x5600
7291+#define VROOT_CLR_DEV 0x5601
7292+
7293+#endif /* _LINUX_VROOT_H */
c2806d43
AM
7294diff -urNp -x '*.orig' linux-4.4/include/linux/vs_base.h linux-4.4/include/linux/vs_base.h
7295--- linux-4.4/include/linux/vs_base.h 1970-01-01 01:00:00.000000000 +0100
7296+++ linux-4.4/include/linux/vs_base.h 2021-02-24 16:56:24.582822656 +0100
2380c486
JR
7297@@ -0,0 +1,10 @@
7298+#ifndef _VS_BASE_H
7299+#define _VS_BASE_H
7300+
7301+#include "vserver/base.h"
7302+#include "vserver/check.h"
7303+#include "vserver/debug.h"
7304+
7305+#else
7306+#warning duplicate inclusion
7307+#endif
c2806d43
AM
7308diff -urNp -x '*.orig' linux-4.4/include/linux/vs_context.h linux-4.4/include/linux/vs_context.h
7309--- linux-4.4/include/linux/vs_context.h 1970-01-01 01:00:00.000000000 +0100
7310+++ linux-4.4/include/linux/vs_context.h 2021-02-24 16:56:24.582822656 +0100
4a036bed 7311@@ -0,0 +1,242 @@
2380c486
JR
7312+#ifndef _VS_CONTEXT_H
7313+#define _VS_CONTEXT_H
7314+
7315+#include "vserver/base.h"
7316+#include "vserver/check.h"
7317+#include "vserver/context.h"
7318+#include "vserver/history.h"
7319+#include "vserver/debug.h"
7320+
7321+#include <linux/sched.h>
7322+
7323+
7324+#define get_vx_info(i) __get_vx_info(i, __FILE__, __LINE__, __HERE__)
7325+
7326+static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
7327+ const char *_file, int _line, void *_here)
7328+{
7329+ if (!vxi)
7330+ return NULL;
7331+
7332+ vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
7333+ vxi, vxi ? vxi->vx_id : 0,
7334+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7335+ _file, _line);
7336+ __vxh_get_vx_info(vxi, _here);
7337+
7338+ atomic_inc(&vxi->vx_usecnt);
7339+ return vxi;
7340+}
7341+
7342+
7343+extern void free_vx_info(struct vx_info *);
7344+
7345+#define put_vx_info(i) __put_vx_info(i, __FILE__, __LINE__, __HERE__)
7346+
7347+static inline void __put_vx_info(struct vx_info *vxi,
7348+ const char *_file, int _line, void *_here)
7349+{
7350+ if (!vxi)
7351+ return;
7352+
7353+ vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
7354+ vxi, vxi ? vxi->vx_id : 0,
7355+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7356+ _file, _line);
7357+ __vxh_put_vx_info(vxi, _here);
7358+
7359+ if (atomic_dec_and_test(&vxi->vx_usecnt))
7360+ free_vx_info(vxi);
7361+}
7362+
7363+
7364+#define init_vx_info(p, i) \
7365+ __init_vx_info(p, i, __FILE__, __LINE__, __HERE__)
7366+
7367+static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
7368+ const char *_file, int _line, void *_here)
7369+{
7370+ if (vxi) {
7371+ vxlprintk(VXD_CBIT(xid, 3),
7372+ "init_vx_info(%p[#%d.%d])",
7373+ vxi, vxi ? vxi->vx_id : 0,
7374+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7375+ _file, _line);
7376+ __vxh_init_vx_info(vxi, vxp, _here);
7377+
7378+ atomic_inc(&vxi->vx_usecnt);
7379+ }
7380+ *vxp = vxi;
7381+}
7382+
7383+
7384+#define set_vx_info(p, i) \
7385+ __set_vx_info(p, i, __FILE__, __LINE__, __HERE__)
7386+
7387+static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
7388+ const char *_file, int _line, void *_here)
7389+{
7390+ struct vx_info *vxo;
7391+
7392+ if (!vxi)
7393+ return;
7394+
7395+ vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
7396+ vxi, vxi ? vxi->vx_id : 0,
7397+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7398+ _file, _line);
7399+ __vxh_set_vx_info(vxi, vxp, _here);
7400+
7401+ atomic_inc(&vxi->vx_usecnt);
7402+ vxo = xchg(vxp, vxi);
7403+ BUG_ON(vxo);
7404+}
7405+
7406+
7407+#define clr_vx_info(p) __clr_vx_info(p, __FILE__, __LINE__, __HERE__)
7408+
7409+static inline void __clr_vx_info(struct vx_info **vxp,
7410+ const char *_file, int _line, void *_here)
7411+{
7412+ struct vx_info *vxo;
7413+
7414+ vxo = xchg(vxp, NULL);
7415+ if (!vxo)
7416+ return;
7417+
7418+ vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
7419+ vxo, vxo ? vxo->vx_id : 0,
7420+ vxo ? atomic_read(&vxo->vx_usecnt) : 0,
7421+ _file, _line);
7422+ __vxh_clr_vx_info(vxo, vxp, _here);
7423+
7424+ if (atomic_dec_and_test(&vxo->vx_usecnt))
7425+ free_vx_info(vxo);
7426+}
7427+
7428+
7429+#define claim_vx_info(v, p) \
7430+ __claim_vx_info(v, p, __FILE__, __LINE__, __HERE__)
7431+
7432+static inline void __claim_vx_info(struct vx_info *vxi,
7433+ struct task_struct *task,
7434+ const char *_file, int _line, void *_here)
7435+{
7436+ vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
7437+ vxi, vxi ? vxi->vx_id : 0,
7438+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7439+ vxi ? atomic_read(&vxi->vx_tasks) : 0,
7440+ task, _file, _line);
7441+ __vxh_claim_vx_info(vxi, task, _here);
7442+
7443+ atomic_inc(&vxi->vx_tasks);
7444+}
7445+
7446+
7447+extern void unhash_vx_info(struct vx_info *);
7448+
7449+#define release_vx_info(v, p) \
7450+ __release_vx_info(v, p, __FILE__, __LINE__, __HERE__)
7451+
7452+static inline void __release_vx_info(struct vx_info *vxi,
7453+ struct task_struct *task,
7454+ const char *_file, int _line, void *_here)
7455+{
7456+ vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
7457+ vxi, vxi ? vxi->vx_id : 0,
7458+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7459+ vxi ? atomic_read(&vxi->vx_tasks) : 0,
7460+ task, _file, _line);
7461+ __vxh_release_vx_info(vxi, task, _here);
7462+
7463+ might_sleep();
7464+
7465+ if (atomic_dec_and_test(&vxi->vx_tasks))
7466+ unhash_vx_info(vxi);
7467+}
7468+
7469+
7470+#define task_get_vx_info(p) \
7471+ __task_get_vx_info(p, __FILE__, __LINE__, __HERE__)
7472+
7473+static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
7474+ const char *_file, int _line, void *_here)
7475+{
7476+ struct vx_info *vxi;
7477+
7478+ task_lock(p);
7479+ vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
7480+ p, _file, _line);
7481+ vxi = __get_vx_info(p->vx_info, _file, _line, _here);
7482+ task_unlock(p);
7483+ return vxi;
7484+}
7485+
7486+
7487+static inline void __wakeup_vx_info(struct vx_info *vxi)
7488+{
7489+ if (waitqueue_active(&vxi->vx_wait))
7490+ wake_up_interruptible(&vxi->vx_wait);
7491+}
7492+
7493+
7494+#define enter_vx_info(v, s) __enter_vx_info(v, s, __FILE__, __LINE__)
7495+
7496+static inline void __enter_vx_info(struct vx_info *vxi,
7497+ struct vx_info_save *vxis, const char *_file, int _line)
7498+{
7499+ vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
7500+ vxi, vxi ? vxi->vx_id : 0, vxis, current,
7501+ current->xid, current->vx_info, _file, _line);
7502+ vxis->vxi = xchg(&current->vx_info, vxi);
7503+ vxis->xid = current->xid;
7504+ current->xid = vxi ? vxi->vx_id : 0;
7505+}
7506+
7507+#define leave_vx_info(s) __leave_vx_info(s, __FILE__, __LINE__)
7508+
7509+static inline void __leave_vx_info(struct vx_info_save *vxis,
7510+ const char *_file, int _line)
7511+{
7512+ vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
7513+ vxis, vxis->xid, vxis->vxi, current,
7514+ current->xid, current->vx_info, _file, _line);
7515+ (void)xchg(&current->vx_info, vxis->vxi);
7516+ current->xid = vxis->xid;
7517+}
7518+
7519+
7520+static inline void __enter_vx_admin(struct vx_info_save *vxis)
7521+{
7522+ vxis->vxi = xchg(&current->vx_info, NULL);
61333608 7523+ vxis->xid = xchg(&current->xid, (vxid_t)0);
2380c486
JR
7524+}
7525+
7526+static inline void __leave_vx_admin(struct vx_info_save *vxis)
7527+{
7528+ (void)xchg(&current->xid, vxis->xid);
7529+ (void)xchg(&current->vx_info, vxis->vxi);
7530+}
7531+
4a036bed
AM
7532+#define task_is_init(p) \
7533+ __task_is_init(p, __FILE__, __LINE__, __HERE__)
7534+
7535+static inline int __task_is_init(struct task_struct *p,
7536+ const char *_file, int _line, void *_here)
7537+{
7538+ int is_init = is_global_init(p);
7539+
7540+ task_lock(p);
7541+ if (p->vx_info)
7542+ is_init = p->vx_info->vx_initpid == p->pid;
7543+ task_unlock(p);
7544+ return is_init;
7545+}
7546+
2380c486
JR
7547+extern void exit_vx_info(struct task_struct *, int);
7548+extern void exit_vx_info_early(struct task_struct *, int);
7549+
7550+
7551+#else
7552+#warning duplicate inclusion
7553+#endif
c2806d43
AM
7554diff -urNp -x '*.orig' linux-4.4/include/linux/vs_cowbl.h linux-4.4/include/linux/vs_cowbl.h
7555--- linux-4.4/include/linux/vs_cowbl.h 1970-01-01 01:00:00.000000000 +0100
7556+++ linux-4.4/include/linux/vs_cowbl.h 2021-02-24 16:56:24.582822656 +0100
78865d5b 7557@@ -0,0 +1,48 @@
2380c486
JR
7558+#ifndef _VS_COWBL_H
7559+#define _VS_COWBL_H
7560+
7561+#include <linux/fs.h>
7562+#include <linux/dcache.h>
7563+#include <linux/namei.h>
78865d5b 7564+#include <linux/slab.h>
2380c486
JR
7565+
7566+extern struct dentry *cow_break_link(const char *pathname);
7567+
7568+static inline int cow_check_and_break(struct path *path)
7569+{
7570+ struct inode *inode = path->dentry->d_inode;
7571+ int error = 0;
7572+
7573+ /* do we need this check? */
7574+ if (IS_RDONLY(inode))
7575+ return -EROFS;
7576+
7577+ if (IS_COW(inode)) {
7578+ if (IS_COW_LINK(inode)) {
7579+ struct dentry *new_dentry, *old_dentry = path->dentry;
7580+ char *pp, *buf;
7581+
7582+ buf = kmalloc(PATH_MAX, GFP_KERNEL);
7583+ if (!buf) {
7584+ return -ENOMEM;
7585+ }
7586+ pp = d_path(path, buf, PATH_MAX);
7587+ new_dentry = cow_break_link(pp);
7588+ kfree(buf);
7589+ if (!IS_ERR(new_dentry)) {
7590+ path->dentry = new_dentry;
7591+ dput(old_dentry);
7592+ } else
7593+ error = PTR_ERR(new_dentry);
7594+ } else {
7595+ inode->i_flags &= ~(S_IXUNLINK | S_IMMUTABLE);
7596+ inode->i_ctime = CURRENT_TIME;
7597+ mark_inode_dirty(inode);
7598+ }
7599+ }
7600+ return error;
7601+}
7602+
7603+#else
7604+#warning duplicate inclusion
7605+#endif
c2806d43
AM
7606diff -urNp -x '*.orig' linux-4.4/include/linux/vs_cvirt.h linux-4.4/include/linux/vs_cvirt.h
7607--- linux-4.4/include/linux/vs_cvirt.h 1970-01-01 01:00:00.000000000 +0100
7608+++ linux-4.4/include/linux/vs_cvirt.h 2021-02-24 16:56:24.582822656 +0100
2380c486
JR
7609@@ -0,0 +1,50 @@
7610+#ifndef _VS_CVIRT_H
7611+#define _VS_CVIRT_H
7612+
7613+#include "vserver/cvirt.h"
7614+#include "vserver/context.h"
7615+#include "vserver/base.h"
7616+#include "vserver/check.h"
7617+#include "vserver/debug.h"
7618+
7619+
7620+static inline void vx_activate_task(struct task_struct *p)
7621+{
7622+ struct vx_info *vxi;
7623+
7624+ if ((vxi = p->vx_info)) {
7625+ vx_update_load(vxi);
7626+ atomic_inc(&vxi->cvirt.nr_running);
7627+ }
7628+}
7629+
7630+static inline void vx_deactivate_task(struct task_struct *p)
7631+{
7632+ struct vx_info *vxi;
7633+
7634+ if ((vxi = p->vx_info)) {
7635+ vx_update_load(vxi);
7636+ atomic_dec(&vxi->cvirt.nr_running);
7637+ }
7638+}
7639+
7640+static inline void vx_uninterruptible_inc(struct task_struct *p)
7641+{
7642+ struct vx_info *vxi;
7643+
7644+ if ((vxi = p->vx_info))
7645+ atomic_inc(&vxi->cvirt.nr_uninterruptible);
7646+}
7647+
7648+static inline void vx_uninterruptible_dec(struct task_struct *p)
7649+{
7650+ struct vx_info *vxi;
7651+
7652+ if ((vxi = p->vx_info))
7653+ atomic_dec(&vxi->cvirt.nr_uninterruptible);
7654+}
7655+
7656+
7657+#else
7658+#warning duplicate inclusion
7659+#endif
c2806d43
AM
7660diff -urNp -x '*.orig' linux-4.4/include/linux/vs_device.h linux-4.4/include/linux/vs_device.h
7661--- linux-4.4/include/linux/vs_device.h 1970-01-01 01:00:00.000000000 +0100
7662+++ linux-4.4/include/linux/vs_device.h 2021-02-24 16:56:24.582822656 +0100
2380c486
JR
7663@@ -0,0 +1,45 @@
7664+#ifndef _VS_DEVICE_H
7665+#define _VS_DEVICE_H
7666+
7667+#include "vserver/base.h"
7668+#include "vserver/device.h"
7669+#include "vserver/debug.h"
7670+
7671+
7672+#ifdef CONFIG_VSERVER_DEVICE
7673+
7674+int vs_map_device(struct vx_info *, dev_t, dev_t *, umode_t);
7675+
7676+#define vs_device_perm(v, d, m, p) \
7677+ ((vs_map_device(current_vx_info(), d, NULL, m) & (p)) == (p))
7678+
7679+#else
7680+
7681+static inline
7682+int vs_map_device(struct vx_info *vxi,
7683+ dev_t device, dev_t *target, umode_t mode)
7684+{
7685+ if (target)
7686+ *target = device;
7687+ return ~0;
7688+}
7689+
7690+#define vs_device_perm(v, d, m, p) ((p) == (p))
7691+
7692+#endif
7693+
7694+
7695+#define vs_map_chrdev(d, t, p) \
7696+ ((vs_map_device(current_vx_info(), d, t, S_IFCHR) & (p)) == (p))
7697+#define vs_map_blkdev(d, t, p) \
7698+ ((vs_map_device(current_vx_info(), d, t, S_IFBLK) & (p)) == (p))
7699+
7700+#define vs_chrdev_perm(d, p) \
7701+ vs_device_perm(current_vx_info(), d, S_IFCHR, p)
7702+#define vs_blkdev_perm(d, p) \
7703+ vs_device_perm(current_vx_info(), d, S_IFBLK, p)
7704+
7705+
7706+#else
7707+#warning duplicate inclusion
7708+#endif
c2806d43
AM
7709diff -urNp -x '*.orig' linux-4.4/include/linux/vs_dlimit.h linux-4.4/include/linux/vs_dlimit.h
7710--- linux-4.4/include/linux/vs_dlimit.h 1970-01-01 01:00:00.000000000 +0100
7711+++ linux-4.4/include/linux/vs_dlimit.h 2021-02-24 16:56:24.582822656 +0100
2c8c5bc5 7712@@ -0,0 +1,215 @@
2380c486
JR
7713+#ifndef _VS_DLIMIT_H
7714+#define _VS_DLIMIT_H
7715+
7716+#include <linux/fs.h>
7717+
7718+#include "vserver/dlimit.h"
7719+#include "vserver/base.h"
7720+#include "vserver/debug.h"
7721+
7722+
7723+#define get_dl_info(i) __get_dl_info(i, __FILE__, __LINE__)
7724+
7725+static inline struct dl_info *__get_dl_info(struct dl_info *dli,
7726+ const char *_file, int _line)
7727+{
7728+ if (!dli)
7729+ return NULL;
7730+ vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
7731+ dli, dli ? dli->dl_tag : 0,
7732+ dli ? atomic_read(&dli->dl_usecnt) : 0,
7733+ _file, _line);
7734+ atomic_inc(&dli->dl_usecnt);
7735+ return dli;
7736+}
7737+
7738+
7739+#define free_dl_info(i) \
7740+ call_rcu(&(i)->dl_rcu, rcu_free_dl_info)
7741+
7742+#define put_dl_info(i) __put_dl_info(i, __FILE__, __LINE__)
7743+
7744+static inline void __put_dl_info(struct dl_info *dli,
7745+ const char *_file, int _line)
7746+{
7747+ if (!dli)
7748+ return;
7749+ vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
7750+ dli, dli ? dli->dl_tag : 0,
7751+ dli ? atomic_read(&dli->dl_usecnt) : 0,
7752+ _file, _line);
7753+ if (atomic_dec_and_test(&dli->dl_usecnt))
7754+ free_dl_info(dli);
7755+}
7756+
7757+
7758+#define __dlimit_char(d) ((d) ? '*' : ' ')
7759+
7760+static inline int __dl_alloc_space(struct super_block *sb,
61333608 7761+ vtag_t tag, dlsize_t nr, const char *file, int line)
2380c486
JR
7762+{
7763+ struct dl_info *dli = NULL;
7764+ int ret = 0;
7765+
7766+ if (nr == 0)
7767+ goto out;
7768+ dli = locate_dl_info(sb, tag);
7769+ if (!dli)
7770+ goto out;
7771+
7772+ spin_lock(&dli->dl_lock);
7773+ ret = (dli->dl_space_used + nr > dli->dl_space_total);
7774+ if (!ret)
7775+ dli->dl_space_used += nr;
7776+ spin_unlock(&dli->dl_lock);
7777+ put_dl_info(dli);
7778+out:
7779+ vxlprintk(VXD_CBIT(dlim, 1),
7780+ "ALLOC (%p,#%d)%c %lld bytes (%d)",
7781+ sb, tag, __dlimit_char(dli), (long long)nr,
7782+ ret, file, line);
76514441 7783+ return ret ? -ENOSPC : 0;
2380c486
JR
7784+}
7785+
7786+static inline void __dl_free_space(struct super_block *sb,
61333608 7787+ vtag_t tag, dlsize_t nr, const char *_file, int _line)
2380c486
JR
7788+{
7789+ struct dl_info *dli = NULL;
7790+
7791+ if (nr == 0)
7792+ goto out;
7793+ dli = locate_dl_info(sb, tag);
7794+ if (!dli)
7795+ goto out;
7796+
7797+ spin_lock(&dli->dl_lock);
7798+ if (dli->dl_space_used > nr)
7799+ dli->dl_space_used -= nr;
7800+ else
7801+ dli->dl_space_used = 0;
7802+ spin_unlock(&dli->dl_lock);
7803+ put_dl_info(dli);
7804+out:
7805+ vxlprintk(VXD_CBIT(dlim, 1),
7806+ "FREE (%p,#%d)%c %lld bytes",
7807+ sb, tag, __dlimit_char(dli), (long long)nr,
7808+ _file, _line);
7809+}
7810+
7811+static inline int __dl_alloc_inode(struct super_block *sb,
61333608 7812+ vtag_t tag, const char *_file, int _line)
2380c486
JR
7813+{
7814+ struct dl_info *dli;
7815+ int ret = 0;
d337f35e 7816+
2380c486
JR
7817+ dli = locate_dl_info(sb, tag);
7818+ if (!dli)
7819+ goto out;
d337f35e 7820+
2380c486 7821+ spin_lock(&dli->dl_lock);
2c8c5bc5
AM
7822+ dli->dl_inodes_used++;
7823+ ret = (dli->dl_inodes_used > dli->dl_inodes_total);
2380c486
JR
7824+ spin_unlock(&dli->dl_lock);
7825+ put_dl_info(dli);
7826+out:
7827+ vxlprintk(VXD_CBIT(dlim, 0),
7828+ "ALLOC (%p,#%d)%c inode (%d)",
7829+ sb, tag, __dlimit_char(dli), ret, _file, _line);
76514441 7830+ return ret ? -ENOSPC : 0;
2380c486 7831+}
d337f35e 7832+
2380c486 7833+static inline void __dl_free_inode(struct super_block *sb,
61333608 7834+ vtag_t tag, const char *_file, int _line)
d337f35e 7835+{
2380c486
JR
7836+ struct dl_info *dli;
7837+
7838+ dli = locate_dl_info(sb, tag);
7839+ if (!dli)
7840+ goto out;
7841+
7842+ spin_lock(&dli->dl_lock);
7843+ if (dli->dl_inodes_used > 1)
7844+ dli->dl_inodes_used--;
7845+ else
7846+ dli->dl_inodes_used = 0;
7847+ spin_unlock(&dli->dl_lock);
7848+ put_dl_info(dli);
7849+out:
7850+ vxlprintk(VXD_CBIT(dlim, 0),
7851+ "FREE (%p,#%d)%c inode",
7852+ sb, tag, __dlimit_char(dli), _file, _line);
d337f35e
JR
7853+}
7854+
61333608 7855+static inline void __dl_adjust_block(struct super_block *sb, vtag_t tag,
2380c486
JR
7856+ unsigned long long *free_blocks, unsigned long long *root_blocks,
7857+ const char *_file, int _line)
d337f35e 7858+{
2380c486
JR
7859+ struct dl_info *dli;
7860+ uint64_t broot, bfree;
7861+
7862+ dli = locate_dl_info(sb, tag);
7863+ if (!dli)
7864+ return;
7865+
7866+ spin_lock(&dli->dl_lock);
7867+ broot = (dli->dl_space_total -
7868+ (dli->dl_space_total >> 10) * dli->dl_nrlmult)
7869+ >> sb->s_blocksize_bits;
7870+ bfree = (dli->dl_space_total - dli->dl_space_used)
7871+ >> sb->s_blocksize_bits;
7872+ spin_unlock(&dli->dl_lock);
7873+
7874+ vxlprintk(VXD_CBIT(dlim, 2),
7875+ "ADJUST: %lld,%lld on %lld,%lld [mult=%d]",
7876+ (long long)bfree, (long long)broot,
7877+ *free_blocks, *root_blocks, dli->dl_nrlmult,
7878+ _file, _line);
7879+ if (free_blocks) {
7880+ if (*free_blocks > bfree)
7881+ *free_blocks = bfree;
7882+ }
7883+ if (root_blocks) {
7884+ if (*root_blocks > broot)
7885+ *root_blocks = broot;
7886+ }
7887+ put_dl_info(dli);
d337f35e
JR
7888+}
7889+
e22b5178 7890+#define dl_prealloc_space(in, bytes) \
537831f9 7891+ __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7892+ __FILE__, __LINE__ )
d337f35e 7893+
e22b5178 7894+#define dl_alloc_space(in, bytes) \
537831f9 7895+ __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7896+ __FILE__, __LINE__ )
d337f35e 7897+
e22b5178 7898+#define dl_reserve_space(in, bytes) \
537831f9 7899+ __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7900+ __FILE__, __LINE__ )
d337f35e 7901+
e22b5178
AM
7902+#define dl_claim_space(in, bytes) (0)
7903+
7904+#define dl_release_space(in, bytes) \
537831f9 7905+ __dl_free_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7906+ __FILE__, __LINE__ )
d337f35e 7907+
e22b5178 7908+#define dl_free_space(in, bytes) \
537831f9 7909+ __dl_free_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
e22b5178
AM
7910+ __FILE__, __LINE__ )
7911+
7912+
d337f35e 7913+
e22b5178 7914+#define dl_alloc_inode(in) \
537831f9 7915+ __dl_alloc_inode((in)->i_sb, i_tag_read(in), __FILE__, __LINE__ )
d337f35e 7916+
e22b5178 7917+#define dl_free_inode(in) \
537831f9 7918+ __dl_free_inode((in)->i_sb, i_tag_read(in), __FILE__, __LINE__ )
d337f35e 7919+
d337f35e 7920+
e22b5178 7921+#define dl_adjust_block(sb, tag, fb, rb) \
2380c486 7922+ __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
d337f35e 7923+
d337f35e 7924+
2380c486
JR
7925+#else
7926+#warning duplicate inclusion
7927+#endif
c2806d43
AM
7928diff -urNp -x '*.orig' linux-4.4/include/linux/vs_inet.h linux-4.4/include/linux/vs_inet.h
7929--- linux-4.4/include/linux/vs_inet.h 1970-01-01 01:00:00.000000000 +0100
7930+++ linux-4.4/include/linux/vs_inet.h 2021-02-24 16:56:24.589489533 +0100
7931@@ -0,0 +1,364 @@
7932+#ifndef _VS_INET_H
7933+#define _VS_INET_H
d337f35e 7934+
c2806d43
AM
7935+#include "vserver/base.h"
7936+#include "vserver/network.h"
7937+#include "vserver/debug.h"
d337f35e 7938+
c2806d43 7939+#define IPI_LOOPBACK htonl(INADDR_LOOPBACK)
d337f35e 7940+
c2806d43
AM
7941+#define NXAV4(a) NIPQUAD((a)->ip[0]), NIPQUAD((a)->ip[1]), \
7942+ NIPQUAD((a)->mask), (a)->type
7943+#define NXAV4_FMT "[" NIPQUAD_FMT "-" NIPQUAD_FMT "/" NIPQUAD_FMT ":%04x]"
d337f35e 7944+
c2806d43
AM
7945+#define NIPQUAD(addr) \
7946+ ((unsigned char *)&addr)[0], \
7947+ ((unsigned char *)&addr)[1], \
7948+ ((unsigned char *)&addr)[2], \
7949+ ((unsigned char *)&addr)[3]
d337f35e 7950+
c2806d43 7951+#define NIPQUAD_FMT "%u.%u.%u.%u"
d337f35e 7952+
d337f35e 7953+
c2806d43
AM
7954+static inline
7955+int v4_addr_match(struct nx_addr_v4 *nxa, __be32 addr, uint16_t tmask)
7956+{
7957+ __be32 ip = nxa->ip[0].s_addr;
7958+ __be32 mask = nxa->mask.s_addr;
7959+ __be32 bcast = ip | ~mask;
7960+ int ret = 0;
d337f35e 7961+
c2806d43
AM
7962+ switch (nxa->type & tmask) {
7963+ case NXA_TYPE_MASK:
7964+ ret = (ip == (addr & mask));
7965+ break;
7966+ case NXA_TYPE_ADDR:
7967+ ret = 3;
7968+ if (addr == ip)
7969+ break;
7970+ /* fall through to broadcast */
7971+ case NXA_MOD_BCAST:
7972+ ret = ((tmask & NXA_MOD_BCAST) && (addr == bcast));
7973+ break;
7974+ case NXA_TYPE_RANGE:
7975+ ret = ((nxa->ip[0].s_addr <= addr) &&
7976+ (nxa->ip[1].s_addr > addr));
7977+ break;
7978+ case NXA_TYPE_ANY:
7979+ ret = 2;
7980+ break;
7981+ }
d337f35e 7982+
c2806d43
AM
7983+ vxdprintk(VXD_CBIT(net, 0),
7984+ "v4_addr_match(%p" NXAV4_FMT "," NIPQUAD_FMT ",%04x) = %d",
7985+ nxa, NXAV4(nxa), NIPQUAD(addr), tmask, ret);
7986+ return ret;
7987+}
d337f35e 7988+
c2806d43
AM
7989+static inline
7990+int v4_addr_in_nx_info(struct nx_info *nxi, __be32 addr, uint16_t tmask)
7991+{
7992+ struct nx_addr_v4 *nxa;
7993+ unsigned long irqflags;
7994+ int ret = 1;
d337f35e 7995+
c2806d43
AM
7996+ if (!nxi)
7997+ goto out;
4bf69007 7998+
c2806d43
AM
7999+ ret = 2;
8000+ /* allow 127.0.0.1 when remapping lback */
8001+ if ((tmask & NXA_LOOPBACK) &&
8002+ (addr == IPI_LOOPBACK) &&
8003+ nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
8004+ goto out;
8005+ ret = 3;
8006+ /* check for lback address */
8007+ if ((tmask & NXA_MOD_LBACK) &&
8008+ (nxi->v4_lback.s_addr == addr))
8009+ goto out;
8010+ ret = 4;
8011+ /* check for broadcast address */
8012+ if ((tmask & NXA_MOD_BCAST) &&
8013+ (nxi->v4_bcast.s_addr == addr))
8014+ goto out;
8015+ ret = 5;
d337f35e 8016+
c2806d43
AM
8017+ /* check for v4 addresses */
8018+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
8019+ for (nxa = &nxi->v4; nxa; nxa = nxa->next)
8020+ if (v4_addr_match(nxa, addr, tmask))
8021+ goto out_unlock;
8022+ ret = 0;
8023+out_unlock:
8024+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
8025+out:
8026+ vxdprintk(VXD_CBIT(net, 0),
8027+ "v4_addr_in_nx_info(%p[#%u]," NIPQUAD_FMT ",%04x) = %d",
8028+ nxi, nxi ? nxi->nx_id : 0, NIPQUAD(addr), tmask, ret);
8029+ return ret;
8030+}
d337f35e 8031+
c2806d43
AM
8032+static inline
8033+int v4_nx_addr_match(struct nx_addr_v4 *nxa, struct nx_addr_v4 *addr, uint16_t mask)
8034+{
8035+ /* FIXME: needs full range checks */
8036+ return v4_addr_match(nxa, addr->ip[0].s_addr, mask);
8037+}
d337f35e 8038+
c2806d43
AM
8039+static inline
8040+int v4_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v4 *nxa, uint16_t mask)
8041+{
8042+ struct nx_addr_v4 *ptr;
8043+ unsigned long irqflags;
8044+ int ret = 1;
d337f35e 8045+
c2806d43
AM
8046+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
8047+ for (ptr = &nxi->v4; ptr; ptr = ptr->next)
8048+ if (v4_nx_addr_match(ptr, nxa, mask))
8049+ goto out_unlock;
8050+ ret = 0;
8051+out_unlock:
8052+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
8053+ return ret;
8054+}
d337f35e 8055+
c2806d43 8056+#include <net/inet_sock.h>
d337f35e 8057+
c2806d43
AM
8058+/*
8059+ * Check if a given address matches for a socket
8060+ *
8061+ * nxi: the socket's nx_info if any
8062+ * addr: to be verified address
8063+ */
8064+static inline
8065+int v4_sock_addr_match (
8066+ struct nx_info *nxi,
8067+ struct inet_sock *inet,
8068+ __be32 addr)
8069+{
8070+ __be32 saddr = inet->inet_rcv_saddr;
8071+ __be32 bcast = nxi ? nxi->v4_bcast.s_addr : INADDR_BROADCAST;
d337f35e 8072+
c2806d43
AM
8073+ if (addr && (saddr == addr || bcast == addr))
8074+ return 1;
8075+ if (!saddr)
8076+ return v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND);
8077+ return 0;
8078+}
d337f35e
JR
8079+
8080+
c2806d43 8081+/* inet related checks and helpers */
d337f35e 8082+
d337f35e 8083+
c2806d43
AM
8084+struct in_ifaddr;
8085+struct net_device;
8086+struct sock;
d337f35e 8087+
c2806d43 8088+#ifdef CONFIG_INET
d337f35e 8089+
c2806d43
AM
8090+#include <linux/netdevice.h>
8091+#include <linux/inetdevice.h>
8092+#include <net/inet_sock.h>
8093+#include <net/inet_timewait_sock.h>
d337f35e 8094+
d337f35e 8095+
c2806d43
AM
8096+int dev_in_nx_info(struct net_device *, struct nx_info *);
8097+int v4_dev_in_nx_info(struct net_device *, struct nx_info *);
8098+int nx_v4_addr_conflict(struct nx_info *, struct nx_info *);
d337f35e 8099+
d337f35e 8100+
c2806d43
AM
8101+/*
8102+ * check if address is covered by socket
8103+ *
8104+ * sk: the socket to check against
8105+ * addr: the address in question (must be != 0)
8106+ */
d337f35e 8107+
c2806d43
AM
8108+static inline
8109+int __v4_addr_match_socket(const struct sock *sk, struct nx_addr_v4 *nxa)
8110+{
8111+ struct nx_info *nxi = sk->sk_nx_info;
8112+ __be32 saddr = sk->sk_rcv_saddr;
d337f35e 8113+
c2806d43
AM
8114+ vxdprintk(VXD_CBIT(net, 5),
8115+ "__v4_addr_in_socket(%p," NXAV4_FMT ") %p:" NIPQUAD_FMT " %p;%lx",
8116+ sk, NXAV4(nxa), nxi, NIPQUAD(saddr), sk->sk_socket,
8117+ (sk->sk_socket?sk->sk_socket->flags:0));
d337f35e 8118+
c2806d43
AM
8119+ if (saddr) { /* direct address match */
8120+ return v4_addr_match(nxa, saddr, -1);
8121+ } else if (nxi) { /* match against nx_info */
8122+ return v4_nx_addr_in_nx_info(nxi, nxa, -1);
8123+ } else { /* unrestricted any socket */
8124+ return 1;
8125+ }
8126+}
d337f35e 8127+
d337f35e 8128+
d337f35e 8129+
c2806d43
AM
8130+static inline
8131+int nx_dev_visible(struct nx_info *nxi, struct net_device *dev)
8132+{
8133+ vxdprintk(VXD_CBIT(net, 1),
8134+ "nx_dev_visible(%p[#%u],%p " VS_Q("%s") ") %d",
8135+ nxi, nxi ? nxi->nx_id : 0, dev, dev->name,
8136+ nxi ? dev_in_nx_info(dev, nxi) : 0);
d337f35e 8137+
c2806d43
AM
8138+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8139+ return 1;
8140+ if (dev_in_nx_info(dev, nxi))
8141+ return 1;
8142+ return 0;
8143+}
d337f35e 8144+
d337f35e 8145+
c2806d43
AM
8146+static inline
8147+int v4_ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
8148+{
8149+ if (!nxi)
8150+ return 1;
8151+ if (!ifa)
8152+ return 0;
8153+ return v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW);
8154+}
d337f35e 8155+
c2806d43
AM
8156+static inline
8157+int nx_v4_ifa_visible(struct nx_info *nxi, struct in_ifaddr *ifa)
8158+{
8159+ vxdprintk(VXD_CBIT(net, 1), "nx_v4_ifa_visible(%p[#%u],%p) %d",
8160+ nxi, nxi ? nxi->nx_id : 0, ifa,
8161+ nxi ? v4_ifa_in_nx_info(ifa, nxi) : 0);
d337f35e 8162+
c2806d43
AM
8163+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8164+ return 1;
8165+ if (v4_ifa_in_nx_info(ifa, nxi))
8166+ return 1;
8167+ return 0;
8168+}
d337f35e
JR
8169+
8170+
c2806d43
AM
8171+struct nx_v4_sock_addr {
8172+ __be32 saddr; /* Address used for validation */
8173+ __be32 baddr; /* Address used for socket bind */
8174+};
d337f35e 8175+
c2806d43
AM
8176+static inline
8177+int v4_map_sock_addr(struct inet_sock *inet, struct sockaddr_in *addr,
8178+ struct nx_v4_sock_addr *nsa)
8179+{
8180+ struct sock *sk = &inet->sk;
8181+ struct nx_info *nxi = sk->sk_nx_info;
8182+ __be32 saddr = addr->sin_addr.s_addr;
8183+ __be32 baddr = saddr;
d337f35e 8184+
c2806d43
AM
8185+ vxdprintk(VXD_CBIT(net, 3),
8186+ "inet_bind(%p)* %p,%p;%lx " NIPQUAD_FMT,
8187+ sk, sk->sk_nx_info, sk->sk_socket,
8188+ (sk->sk_socket ? sk->sk_socket->flags : 0),
8189+ NIPQUAD(saddr));
d337f35e 8190+
c2806d43
AM
8191+ if (nxi) {
8192+ if (saddr == INADDR_ANY) {
8193+ if (nx_info_flags(nxi, NXF_SINGLE_IP, 0))
8194+ baddr = nxi->v4.ip[0].s_addr;
8195+ } else if (saddr == IPI_LOOPBACK) {
8196+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
8197+ baddr = nxi->v4_lback.s_addr;
8198+ } else if (!ipv4_is_multicast(saddr) ||
8199+ !nx_info_ncaps(nxi, NXC_MULTICAST)) {
8200+ /* normal address bind */
8201+ if (!v4_addr_in_nx_info(nxi, saddr, NXA_MASK_BIND))
8202+ return -EADDRNOTAVAIL;
8203+ }
8204+ }
d337f35e 8205+
c2806d43
AM
8206+ vxdprintk(VXD_CBIT(net, 3),
8207+ "inet_bind(%p) " NIPQUAD_FMT ", " NIPQUAD_FMT,
8208+ sk, NIPQUAD(saddr), NIPQUAD(baddr));
d337f35e 8209+
c2806d43
AM
8210+ nsa->saddr = saddr;
8211+ nsa->baddr = baddr;
8212+ return 0;
8213+}
d337f35e 8214+
c2806d43
AM
8215+static inline
8216+void v4_set_sock_addr(struct inet_sock *inet, struct nx_v4_sock_addr *nsa)
8217+{
8218+ inet->inet_saddr = nsa->baddr;
8219+ inet->inet_rcv_saddr = nsa->baddr;
8220+}
d337f35e
JR
8221+
8222+
c2806d43
AM
8223+/*
8224+ * helper to simplify inet_lookup_listener
8225+ *
8226+ * nxi: the socket's nx_info if any
8227+ * addr: to be verified address
8228+ * saddr: socket address
8229+ */
8230+static inline int v4_inet_addr_match (
8231+ struct nx_info *nxi,
8232+ __be32 addr,
8233+ __be32 saddr)
8234+{
8235+ if (addr && (saddr == addr))
8236+ return 1;
8237+ if (!saddr)
8238+ return nxi ? v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND) : 1;
8239+ return 0;
8240+}
d337f35e 8241+
c2806d43
AM
8242+static inline __be32 nx_map_sock_lback(struct nx_info *nxi, __be32 addr)
8243+{
8244+ if (nx_info_flags(nxi, NXF_HIDE_LBACK, 0) &&
8245+ (addr == nxi->v4_lback.s_addr))
8246+ return IPI_LOOPBACK;
8247+ return addr;
8248+}
d337f35e 8249+
c2806d43
AM
8250+static inline
8251+int nx_info_has_v4(struct nx_info *nxi)
8252+{
8253+ if (!nxi)
8254+ return 1;
8255+ if (NX_IPV4(nxi))
8256+ return 1;
8257+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
8258+ return 1;
8259+ return 0;
8260+}
d337f35e 8261+
c2806d43 8262+#else /* CONFIG_INET */
d337f35e 8263+
c2806d43
AM
8264+static inline
8265+int nx_dev_visible(struct nx_info *n, struct net_device *d)
8266+{
8267+ return 1;
8268+}
d337f35e 8269+
c2806d43
AM
8270+static inline
8271+int nx_v4_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
8272+{
8273+ return 1;
8274+}
d337f35e 8275+
c2806d43
AM
8276+static inline
8277+int v4_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
8278+{
8279+ return 1;
8280+}
d337f35e 8281+
c2806d43
AM
8282+static inline
8283+int nx_info_has_v4(struct nx_info *nxi)
8284+{
8285+ return 0;
8286+}
d337f35e 8287+
c2806d43 8288+#endif /* CONFIG_INET */
4a036bed 8289+
c2806d43
AM
8290+#define current_nx_info_has_v4() \
8291+ nx_info_has_v4(current_nx_info())
d337f35e 8292+
c2806d43
AM
8293+#else
8294+// #warning duplicate inclusion
8295+#endif
8296diff -urNp -x '*.orig' linux-4.4/include/linux/vs_inet6.h linux-4.4/include/linux/vs_inet6.h
8297--- linux-4.4/include/linux/vs_inet6.h 1970-01-01 01:00:00.000000000 +0100
8298+++ linux-4.4/include/linux/vs_inet6.h 2021-02-24 16:56:24.589489533 +0100
8299@@ -0,0 +1,257 @@
8300+#ifndef _VS_INET6_H
8301+#define _VS_INET6_H
d337f35e 8302+
c2806d43
AM
8303+#include "vserver/base.h"
8304+#include "vserver/network.h"
8305+#include "vserver/debug.h"
7e46296a 8306+
c2806d43 8307+#include <net/ipv6.h>
7e46296a 8308+
c2806d43
AM
8309+#define NXAV6(a) &(a)->ip, &(a)->mask, (a)->prefix, (a)->type
8310+#define NXAV6_FMT "[%pI6/%pI6/%d:%04x]"
7e46296a 8311+
7e46296a 8312+
c2806d43 8313+#ifdef CONFIG_IPV6
7e46296a 8314+
c2806d43
AM
8315+static inline
8316+int v6_addr_match(struct nx_addr_v6 *nxa,
8317+ const struct in6_addr *addr, uint16_t mask)
8318+{
8319+ int ret = 0;
d337f35e 8320+
c2806d43
AM
8321+ switch (nxa->type & mask) {
8322+ case NXA_TYPE_MASK:
8323+ ret = ipv6_masked_addr_cmp(&nxa->ip, &nxa->mask, addr);
8324+ break;
8325+ case NXA_TYPE_ADDR:
8326+ ret = ipv6_addr_equal(&nxa->ip, addr);
8327+ break;
8328+ case NXA_TYPE_ANY:
8329+ ret = 1;
8330+ break;
8331+ }
8332+ vxdprintk(VXD_CBIT(net, 0),
8333+ "v6_addr_match(%p" NXAV6_FMT ",%pI6,%04x) = %d",
8334+ nxa, NXAV6(nxa), addr, mask, ret);
8335+ return ret;
8336+}
4bf69007 8337+
c2806d43
AM
8338+static inline
8339+int v6_addr_in_nx_info(struct nx_info *nxi,
8340+ const struct in6_addr *addr, uint16_t mask)
8341+{
8342+ struct nx_addr_v6 *nxa;
8343+ unsigned long irqflags;
8344+ int ret = 1;
d337f35e 8345+
c2806d43
AM
8346+ if (!nxi)
8347+ goto out;
d337f35e 8348+
c2806d43
AM
8349+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
8350+ for (nxa = &nxi->v6; nxa; nxa = nxa->next)
8351+ if (v6_addr_match(nxa, addr, mask))
8352+ goto out_unlock;
8353+ ret = 0;
8354+out_unlock:
8355+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
8356+out:
8357+ vxdprintk(VXD_CBIT(net, 0),
8358+ "v6_addr_in_nx_info(%p[#%u],%pI6,%04x) = %d",
8359+ nxi, nxi ? nxi->nx_id : 0, addr, mask, ret);
8360+ return ret;
8361+}
d337f35e 8362+
c2806d43
AM
8363+static inline
8364+int v6_nx_addr_match(struct nx_addr_v6 *nxa, struct nx_addr_v6 *addr, uint16_t mask)
8365+{
8366+ /* FIXME: needs full range checks */
8367+ return v6_addr_match(nxa, &addr->ip, mask);
8368+}
d337f35e 8369+
c2806d43
AM
8370+static inline
8371+int v6_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v6 *nxa, uint16_t mask)
8372+{
8373+ struct nx_addr_v6 *ptr;
8374+ unsigned long irqflags;
8375+ int ret = 1;
d337f35e 8376+
c2806d43
AM
8377+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
8378+ for (ptr = &nxi->v6; ptr; ptr = ptr->next)
8379+ if (v6_nx_addr_match(ptr, nxa, mask))
8380+ goto out_unlock;
8381+ ret = 0;
8382+out_unlock:
8383+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
8384+ return ret;
8385+}
d337f35e 8386+
d337f35e 8387+
c2806d43
AM
8388+/*
8389+ * Check if a given address matches for a socket
8390+ *
8391+ * nxi: the socket's nx_info if any
8392+ * addr: to be verified address
8393+ */
8394+static inline
8395+int v6_sock_addr_match (
8396+ struct nx_info *nxi,
8397+ struct inet_sock *inet,
8398+ struct in6_addr *addr)
8399+{
8400+ struct sock *sk = &inet->sk;
8401+ const struct in6_addr *saddr = inet6_rcv_saddr(sk);
d337f35e 8402+
c2806d43
AM
8403+ if (!ipv6_addr_any(addr) &&
8404+ ipv6_addr_equal(saddr, addr))
8405+ return 1;
8406+ if (ipv6_addr_any(saddr))
8407+ return v6_addr_in_nx_info(nxi, addr, -1);
8408+ return 0;
8409+}
d337f35e 8410+
c2806d43
AM
8411+/*
8412+ * check if address is covered by socket
8413+ *
8414+ * sk: the socket to check against
8415+ * addr: the address in question (must be != 0)
8416+ */
d337f35e 8417+
c2806d43
AM
8418+static inline
8419+int __v6_addr_match_socket(const struct sock *sk, struct nx_addr_v6 *nxa)
8420+{
8421+ struct nx_info *nxi = sk->sk_nx_info;
8422+ const struct in6_addr *saddr = inet6_rcv_saddr(sk);
d337f35e 8423+
c2806d43
AM
8424+ vxdprintk(VXD_CBIT(net, 5),
8425+ "__v6_addr_in_socket(%p," NXAV6_FMT ") %p:%pI6 %p;%lx",
8426+ sk, NXAV6(nxa), nxi, saddr, sk->sk_socket,
8427+ (sk->sk_socket?sk->sk_socket->flags:0));
d337f35e 8428+
c2806d43
AM
8429+ if (!ipv6_addr_any(saddr)) { /* direct address match */
8430+ return v6_addr_match(nxa, saddr, -1);
8431+ } else if (nxi) { /* match against nx_info */
8432+ return v6_nx_addr_in_nx_info(nxi, nxa, -1);
8433+ } else { /* unrestricted any socket */
8434+ return 1;
8435+ }
8436+}
d337f35e 8437+
d337f35e 8438+
c2806d43 8439+/* inet related checks and helpers */
d337f35e
JR
8440+
8441+
c2806d43
AM
8442+struct in_ifaddr;
8443+struct net_device;
8444+struct sock;
d337f35e 8445+
d337f35e 8446+
c2806d43
AM
8447+#include <linux/netdevice.h>
8448+#include <linux/inetdevice.h>
8449+#include <net/inet_timewait_sock.h>
d337f35e
JR
8450+
8451+
c2806d43
AM
8452+int dev_in_nx_info(struct net_device *, struct nx_info *);
8453+int v6_dev_in_nx_info(struct net_device *, struct nx_info *);
8454+int nx_v6_addr_conflict(struct nx_info *, struct nx_info *);
3bac966d 8455+
d337f35e 8456+
d337f35e 8457+
c2806d43
AM
8458+static inline
8459+int v6_ifa_in_nx_info(struct inet6_ifaddr *ifa, struct nx_info *nxi)
8460+{
8461+ if (!nxi)
8462+ return 1;
8463+ if (!ifa)
8464+ return 0;
8465+ return v6_addr_in_nx_info(nxi, &ifa->addr, -1);
8466+}
d337f35e 8467+
c2806d43
AM
8468+static inline
8469+int nx_v6_ifa_visible(struct nx_info *nxi, struct inet6_ifaddr *ifa)
8470+{
8471+ vxdprintk(VXD_CBIT(net, 1), "nx_v6_ifa_visible(%p[#%u],%p) %d",
8472+ nxi, nxi ? nxi->nx_id : 0, ifa,
8473+ nxi ? v6_ifa_in_nx_info(ifa, nxi) : 0);
d337f35e 8474+
c2806d43
AM
8475+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8476+ return 1;
8477+ if (v6_ifa_in_nx_info(ifa, nxi))
8478+ return 1;
8479+ return 0;
8480+}
d33d7b00 8481+
3bac966d 8482+
c2806d43
AM
8483+struct nx_v6_sock_addr {
8484+ struct in6_addr saddr; /* Address used for validation */
8485+ struct in6_addr baddr; /* Address used for socket bind */
3cc86a71 8486+};
3bac966d 8487+
c2806d43
AM
8488+static inline
8489+int v6_map_sock_addr(struct inet_sock *inet, struct sockaddr_in6 *addr,
8490+ struct nx_v6_sock_addr *nsa)
d33d7b00 8491+{
c2806d43
AM
8492+ // struct sock *sk = &inet->sk;
8493+ // struct nx_info *nxi = sk->sk_nx_info;
8494+ struct in6_addr saddr = addr->sin6_addr;
8495+ struct in6_addr baddr = saddr;
3bac966d 8496+
c2806d43
AM
8497+ nsa->saddr = saddr;
8498+ nsa->baddr = baddr;
8499+ return 0;
d33d7b00 8500+}
3bac966d 8501+
c2806d43
AM
8502+static inline
8503+void v6_set_sock_addr(struct inet_sock *inet, struct nx_v6_sock_addr *nsa)
8504+{
8505+ // struct sock *sk = &inet->sk;
8506+ // struct in6_addr *saddr = inet6_rcv_saddr(sk);
3cc86a71 8507+
c2806d43
AM
8508+ // *saddr = nsa->baddr;
8509+ // inet->inet_saddr = nsa->baddr;
8510+}
2380c486 8511+
d33d7b00 8512+static inline
c2806d43 8513+int nx_info_has_v6(struct nx_info *nxi)
d33d7b00 8514+{
c2806d43
AM
8515+ if (!nxi)
8516+ return 1;
8517+ if (NX_IPV6(nxi))
8518+ return 1;
8519+ return 0;
d33d7b00
AM
8520+}
8521+
c2806d43 8522+#else /* CONFIG_IPV6 */
3cc86a71 8523+
d33d7b00 8524+static inline
c2806d43 8525+int nx_v6_dev_visible(struct nx_info *n, struct net_device *d)
d33d7b00 8526+{
c2806d43 8527+ return 1;
d33d7b00 8528+}
2380c486 8529+
d337f35e 8530+
c2806d43
AM
8531+static inline
8532+int nx_v6_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
8533+{
8534+ return 1;
8535+}
3bac966d 8536+
c2806d43
AM
8537+static inline
8538+int v6_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
8539+{
8540+ return 1;
8541+}
8542+
8543+static inline
8544+int nx_info_has_v6(struct nx_info *nxi)
8545+{
8546+ return 0;
8547+}
8548+
8549+#endif /* CONFIG_IPV6 */
8550+
8551+#define current_nx_info_has_v6() \
8552+ nx_info_has_v6(current_nx_info())
3cc86a71 8553+
d337f35e 8554+#else
c2806d43 8555+#warning duplicate inclusion
d337f35e 8556+#endif
c2806d43
AM
8557diff -urNp -x '*.orig' linux-4.4/include/linux/vs_limit.h linux-4.4/include/linux/vs_limit.h
8558--- linux-4.4/include/linux/vs_limit.h 1970-01-01 01:00:00.000000000 +0100
8559+++ linux-4.4/include/linux/vs_limit.h 2021-02-24 16:56:24.589489533 +0100
8560@@ -0,0 +1,140 @@
8561+#ifndef _VS_LIMIT_H
8562+#define _VS_LIMIT_H
d337f35e 8563+
c2806d43
AM
8564+#include "vserver/limit.h"
8565+#include "vserver/base.h"
8566+#include "vserver/context.h"
8567+#include "vserver/debug.h"
8568+#include "vserver/context.h"
8569+#include "vserver/limit_int.h"
d337f35e 8570+
d337f35e 8571+
c2806d43
AM
8572+#define vx_acc_cres(v, d, p, r) \
8573+ __vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
d337f35e 8574+
c2806d43
AM
8575+#define vx_acc_cres_cond(x, d, p, r) \
8576+ __vx_acc_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
8577+ r, d, p, __FILE__, __LINE__)
d337f35e 8578+
d337f35e 8579+
c2806d43
AM
8580+#define vx_add_cres(v, a, p, r) \
8581+ __vx_add_cres(v, r, a, p, __FILE__, __LINE__)
8582+#define vx_sub_cres(v, a, p, r) vx_add_cres(v, -(a), p, r)
d337f35e 8583+
c2806d43
AM
8584+#define vx_add_cres_cond(x, a, p, r) \
8585+ __vx_add_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
8586+ r, a, p, __FILE__, __LINE__)
8587+#define vx_sub_cres_cond(x, a, p, r) vx_add_cres_cond(x, -(a), p, r)
d337f35e 8588+
d337f35e 8589+
c2806d43 8590+/* process and file limits */
d337f35e 8591+
c2806d43
AM
8592+#define vx_nproc_inc(p) \
8593+ vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
d337f35e 8594+
c2806d43
AM
8595+#define vx_nproc_dec(p) \
8596+ vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
d337f35e 8597+
c2806d43
AM
8598+#define vx_files_inc(f) \
8599+ vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
d337f35e 8600+
c2806d43
AM
8601+#define vx_files_dec(f) \
8602+ vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
d337f35e 8603+
c2806d43
AM
8604+#define vx_locks_inc(l) \
8605+ vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
d337f35e 8606+
c2806d43
AM
8607+#define vx_locks_dec(l) \
8608+ vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
d337f35e 8609+
c2806d43
AM
8610+#define vx_openfd_inc(f) \
8611+ vx_acc_cres(current_vx_info(), 1, (void *)(long)(f), VLIMIT_OPENFD)
d337f35e 8612+
c2806d43
AM
8613+#define vx_openfd_dec(f) \
8614+ vx_acc_cres(current_vx_info(),-1, (void *)(long)(f), VLIMIT_OPENFD)
d337f35e 8615+
d337f35e 8616+
c2806d43
AM
8617+#define vx_cres_avail(v, n, r) \
8618+ __vx_cres_avail(v, r, n, __FILE__, __LINE__)
d337f35e 8619+
d337f35e 8620+
c2806d43
AM
8621+#define vx_nproc_avail(n) \
8622+ vx_cres_avail(current_vx_info(), n, RLIMIT_NPROC)
d337f35e 8623+
c2806d43
AM
8624+#define vx_files_avail(n) \
8625+ vx_cres_avail(current_vx_info(), n, RLIMIT_NOFILE)
d337f35e 8626+
c2806d43
AM
8627+#define vx_locks_avail(n) \
8628+ vx_cres_avail(current_vx_info(), n, RLIMIT_LOCKS)
d337f35e 8629+
c2806d43
AM
8630+#define vx_openfd_avail(n) \
8631+ vx_cres_avail(current_vx_info(), n, VLIMIT_OPENFD)
d337f35e 8632+
d337f35e 8633+
c2806d43 8634+/* dentry limits */
d337f35e 8635+
c2806d43
AM
8636+#define vx_dentry_inc(d) do { \
8637+ if (d_count(d) == 1) \
8638+ vx_acc_cres(current_vx_info(), 1, d, VLIMIT_DENTRY); \
8639+ } while (0)
d337f35e 8640+
c2806d43
AM
8641+#define vx_dentry_dec(d) do { \
8642+ if (d_count(d) == 0) \
8643+ vx_acc_cres(current_vx_info(),-1, d, VLIMIT_DENTRY); \
8644+ } while (0)
d337f35e 8645+
c2806d43
AM
8646+#define vx_dentry_avail(n) \
8647+ vx_cres_avail(current_vx_info(), n, VLIMIT_DENTRY)
d337f35e
JR
8648+
8649+
c2806d43 8650+/* socket limits */
d337f35e 8651+
c2806d43
AM
8652+#define vx_sock_inc(s) \
8653+ vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
d337f35e 8654+
c2806d43
AM
8655+#define vx_sock_dec(s) \
8656+ vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
d337f35e 8657+
c2806d43
AM
8658+#define vx_sock_avail(n) \
8659+ vx_cres_avail(current_vx_info(), n, VLIMIT_NSOCK)
d337f35e 8660+
d337f35e 8661+
c2806d43 8662+/* ipc resource limits */
d337f35e 8663+
c2806d43
AM
8664+#define vx_ipcmsg_add(v, u, a) \
8665+ vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
d337f35e 8666+
c2806d43
AM
8667+#define vx_ipcmsg_sub(v, u, a) \
8668+ vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
d337f35e 8669+
c2806d43
AM
8670+#define vx_ipcmsg_avail(v, a) \
8671+ vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
d337f35e
JR
8672+
8673+
c2806d43
AM
8674+#define vx_ipcshm_add(v, k, a) \
8675+ vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
d337f35e 8676+
c2806d43
AM
8677+#define vx_ipcshm_sub(v, k, a) \
8678+ vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
d337f35e 8679+
c2806d43
AM
8680+#define vx_ipcshm_avail(v, a) \
8681+ vx_cres_avail(v, a, VLIMIT_SHMEM)
d337f35e
JR
8682+
8683+
c2806d43
AM
8684+#define vx_semary_inc(a) \
8685+ vx_acc_cres(current_vx_info(), 1, a, VLIMIT_SEMARY)
d337f35e 8686+
c2806d43
AM
8687+#define vx_semary_dec(a) \
8688+ vx_acc_cres(current_vx_info(), -1, a, VLIMIT_SEMARY)
d337f35e 8689+
d337f35e 8690+
c2806d43
AM
8691+#define vx_nsems_add(a,n) \
8692+ vx_add_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
d337f35e 8693+
c2806d43
AM
8694+#define vx_nsems_sub(a,n) \
8695+ vx_sub_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
7e46296a 8696+
2380c486 8697+
c2806d43
AM
8698+#else
8699+#warning duplicate inclusion
8700+#endif
8701diff -urNp -x '*.orig' linux-4.4/include/linux/vs_network.h linux-4.4/include/linux/vs_network.h
8702--- linux-4.4/include/linux/vs_network.h 1970-01-01 01:00:00.000000000 +0100
8703+++ linux-4.4/include/linux/vs_network.h 2021-02-24 16:56:24.589489533 +0100
8704@@ -0,0 +1,169 @@
8705+#ifndef _NX_VS_NETWORK_H
8706+#define _NX_VS_NETWORK_H
2380c486 8707+
c2806d43
AM
8708+#include "vserver/context.h"
8709+#include "vserver/network.h"
8710+#include "vserver/base.h"
8711+#include "vserver/check.h"
8712+#include "vserver/debug.h"
2380c486 8713+
c2806d43 8714+#include <linux/sched.h>
2380c486 8715+
d337f35e 8716+
c2806d43 8717+#define get_nx_info(i) __get_nx_info(i, __FILE__, __LINE__)
d337f35e 8718+
c2806d43
AM
8719+static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
8720+ const char *_file, int _line)
8721+{
8722+ if (!nxi)
8723+ return NULL;
d337f35e 8724+
c2806d43
AM
8725+ vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
8726+ nxi, nxi ? nxi->nx_id : 0,
8727+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8728+ _file, _line);
d337f35e 8729+
c2806d43
AM
8730+ atomic_inc(&nxi->nx_usecnt);
8731+ return nxi;
8732+}
d337f35e 8733+
d337f35e 8734+
c2806d43 8735+extern void free_nx_info(struct nx_info *);
d337f35e 8736+
c2806d43 8737+#define put_nx_info(i) __put_nx_info(i, __FILE__, __LINE__)
d337f35e 8738+
c2806d43
AM
8739+static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
8740+{
8741+ if (!nxi)
8742+ return;
3cc86a71 8743+
c2806d43
AM
8744+ vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
8745+ nxi, nxi ? nxi->nx_id : 0,
8746+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8747+ _file, _line);
d337f35e 8748+
c2806d43
AM
8749+ if (atomic_dec_and_test(&nxi->nx_usecnt))
8750+ free_nx_info(nxi);
8751+}
d337f35e 8752+
d337f35e 8753+
c2806d43 8754+#define init_nx_info(p, i) __init_nx_info(p, i, __FILE__, __LINE__)
d337f35e 8755+
c2806d43
AM
8756+static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
8757+ const char *_file, int _line)
8758+{
8759+ if (nxi) {
8760+ vxlprintk(VXD_CBIT(nid, 3),
8761+ "init_nx_info(%p[#%d.%d])",
8762+ nxi, nxi ? nxi->nx_id : 0,
8763+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8764+ _file, _line);
d337f35e 8765+
c2806d43
AM
8766+ atomic_inc(&nxi->nx_usecnt);
8767+ }
8768+ *nxp = nxi;
8769+}
d337f35e 8770+
d337f35e 8771+
c2806d43 8772+#define set_nx_info(p, i) __set_nx_info(p, i, __FILE__, __LINE__)
d337f35e 8773+
c2806d43
AM
8774+static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
8775+ const char *_file, int _line)
8776+{
8777+ struct nx_info *nxo;
d337f35e 8778+
c2806d43
AM
8779+ if (!nxi)
8780+ return;
d337f35e 8781+
c2806d43
AM
8782+ vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
8783+ nxi, nxi ? nxi->nx_id : 0,
8784+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8785+ _file, _line);
d337f35e 8786+
c2806d43
AM
8787+ atomic_inc(&nxi->nx_usecnt);
8788+ nxo = xchg(nxp, nxi);
8789+ BUG_ON(nxo);
8790+}
d337f35e 8791+
c2806d43 8792+#define clr_nx_info(p) __clr_nx_info(p, __FILE__, __LINE__)
d337f35e 8793+
c2806d43
AM
8794+static inline void __clr_nx_info(struct nx_info **nxp,
8795+ const char *_file, int _line)
8796+{
8797+ struct nx_info *nxo;
d337f35e 8798+
c2806d43
AM
8799+ nxo = xchg(nxp, NULL);
8800+ if (!nxo)
8801+ return;
d337f35e 8802+
c2806d43
AM
8803+ vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
8804+ nxo, nxo ? nxo->nx_id : 0,
8805+ nxo ? atomic_read(&nxo->nx_usecnt) : 0,
8806+ _file, _line);
d337f35e 8807+
c2806d43
AM
8808+ if (atomic_dec_and_test(&nxo->nx_usecnt))
8809+ free_nx_info(nxo);
8810+}
d337f35e 8811+
d337f35e 8812+
c2806d43
AM
8813+#define claim_nx_info(v, p) __claim_nx_info(v, p, __FILE__, __LINE__)
8814+
8815+static inline void __claim_nx_info(struct nx_info *nxi,
8816+ struct task_struct *task, const char *_file, int _line)
d33d7b00 8817+{
c2806d43
AM
8818+ vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
8819+ nxi, nxi ? nxi->nx_id : 0,
8820+ nxi?atomic_read(&nxi->nx_usecnt):0,
8821+ nxi?atomic_read(&nxi->nx_tasks):0,
8822+ task, _file, _line);
8823+
8824+ atomic_inc(&nxi->nx_tasks);
d33d7b00 8825+}
d337f35e 8826+
d337f35e 8827+
c2806d43 8828+extern void unhash_nx_info(struct nx_info *);
d337f35e 8829+
c2806d43 8830+#define release_nx_info(v, p) __release_nx_info(v, p, __FILE__, __LINE__)
d337f35e 8831+
c2806d43
AM
8832+static inline void __release_nx_info(struct nx_info *nxi,
8833+ struct task_struct *task, const char *_file, int _line)
8834+{
8835+ vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
8836+ nxi, nxi ? nxi->nx_id : 0,
8837+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8838+ nxi ? atomic_read(&nxi->nx_tasks) : 0,
8839+ task, _file, _line);
ab30d09f 8840+
c2806d43 8841+ might_sleep();
d337f35e 8842+
c2806d43
AM
8843+ if (atomic_dec_and_test(&nxi->nx_tasks))
8844+ unhash_nx_info(nxi);
8845+}
d337f35e 8846+
d337f35e 8847+
c2806d43 8848+#define task_get_nx_info(i) __task_get_nx_info(i, __FILE__, __LINE__)
adc1caaa 8849+
c2806d43
AM
8850+static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
8851+ const char *_file, int _line)
8852+{
8853+ struct nx_info *nxi;
d337f35e 8854+
c2806d43
AM
8855+ task_lock(p);
8856+ vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
8857+ p, _file, _line);
8858+ nxi = __get_nx_info(p->nx_info, _file, _line);
8859+ task_unlock(p);
8860+ return nxi;
8861+}
d337f35e 8862+
d337f35e 8863+
c2806d43
AM
8864+static inline void exit_nx_info(struct task_struct *p)
8865+{
8866+ if (p->nx_info)
8867+ release_nx_info(p->nx_info, p);
8868+}
d337f35e 8869+
d337f35e 8870+
c2806d43
AM
8871+#else
8872+#warning duplicate inclusion
8873+#endif
8874diff -urNp -x '*.orig' linux-4.4/include/linux/vs_pid.h linux-4.4/include/linux/vs_pid.h
8875--- linux-4.4/include/linux/vs_pid.h 1970-01-01 01:00:00.000000000 +0100
8876+++ linux-4.4/include/linux/vs_pid.h 2021-02-24 16:56:24.589489533 +0100
8877@@ -0,0 +1,50 @@
8878+#ifndef _VS_PID_H
8879+#define _VS_PID_H
d337f35e 8880+
c2806d43
AM
8881+#include "vserver/base.h"
8882+#include "vserver/check.h"
8883+#include "vserver/context.h"
8884+#include "vserver/debug.h"
8885+#include "vserver/pid.h"
8886+#include <linux/pid_namespace.h>
d337f35e 8887+
d337f35e 8888+
c2806d43 8889+#define VXF_FAKE_INIT (VXF_INFO_INIT | VXF_STATE_INIT)
d337f35e 8890+
c2806d43
AM
8891+static inline
8892+int vx_proc_task_visible(struct task_struct *task)
8893+{
8894+ if ((task->pid == 1) &&
8895+ !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
8896+ /* show a blend through init */
8897+ goto visible;
8898+ if (vx_check(vx_task_xid(task), VS_WATCH | VS_IDENT))
8899+ goto visible;
8900+ return 0;
8901+visible:
8902+ return 1;
8903+}
d337f35e 8904+
c2806d43 8905+#define find_task_by_real_pid(pid) find_task_by_pid_ns(pid, &init_pid_ns)
d337f35e 8906+
d337f35e 8907+
c2806d43
AM
8908+static inline
8909+struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid)
8910+{
8911+ struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
d337f35e 8912+
c2806d43
AM
8913+ if (task && !vx_proc_task_visible(task)) {
8914+ vxdprintk(VXD_CBIT(misc, 6),
8915+ "dropping task (get) %p[#%u,%u] for %p[#%u,%u]",
8916+ task, task->xid, task->pid,
8917+ current, current->xid, current->pid);
8918+ put_task_struct(task);
8919+ task = NULL;
8920+ }
8921+ return task;
8922+}
d337f35e 8923+
d337f35e 8924+
c2806d43
AM
8925+#else
8926+#warning duplicate inclusion
8927+#endif
8928diff -urNp -x '*.orig' linux-4.4/include/linux/vs_sched.h linux-4.4/include/linux/vs_sched.h
8929--- linux-4.4/include/linux/vs_sched.h 1970-01-01 01:00:00.000000000 +0100
8930+++ linux-4.4/include/linux/vs_sched.h 2021-02-24 16:56:24.589489533 +0100
8931@@ -0,0 +1,40 @@
8932+#ifndef _VS_SCHED_H
8933+#define _VS_SCHED_H
d337f35e 8934+
c2806d43
AM
8935+#include "vserver/base.h"
8936+#include "vserver/context.h"
8937+#include "vserver/sched.h"
d337f35e 8938+
d337f35e 8939+
c2806d43
AM
8940+#define MAX_PRIO_BIAS 20
8941+#define MIN_PRIO_BIAS -20
8942+
8943+static inline
8944+int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
8945+{
8946+ struct vx_info *vxi = p->vx_info;
8947+
8948+ if (vxi)
8949+ prio += vx_cpu(vxi, sched_pc).prio_bias;
8950+ return prio;
8951+}
8952+
8953+static inline void vx_account_user(struct vx_info *vxi,
8954+ cputime_t cputime, int nice)
8955+{
8956+ if (!vxi)
8957+ return;
8958+ vx_cpu(vxi, sched_pc).user_ticks += cputime;
8959+}
8960+
8961+static inline void vx_account_system(struct vx_info *vxi,
8962+ cputime_t cputime, int idle)
8963+{
8964+ if (!vxi)
8965+ return;
8966+ vx_cpu(vxi, sched_pc).sys_ticks += cputime;
8967+}
d337f35e 8968+
d33d7b00 8969+#else
c2806d43 8970+#warning duplicate inclusion
d33d7b00 8971+#endif
c2806d43
AM
8972diff -urNp -x '*.orig' linux-4.4/include/linux/vs_socket.h linux-4.4/include/linux/vs_socket.h
8973--- linux-4.4/include/linux/vs_socket.h 1970-01-01 01:00:00.000000000 +0100
8974+++ linux-4.4/include/linux/vs_socket.h 2021-02-24 16:56:24.589489533 +0100
8975@@ -0,0 +1,67 @@
8976+#ifndef _VS_SOCKET_H
8977+#define _VS_SOCKET_H
d337f35e 8978+
c2806d43
AM
8979+#include "vserver/debug.h"
8980+#include "vserver/base.h"
8981+#include "vserver/cacct.h"
8982+#include "vserver/context.h"
8983+#include "vserver/tag.h"
d337f35e
JR
8984+
8985+
c2806d43 8986+/* socket accounting */
d337f35e 8987+
c2806d43 8988+#include <linux/socket.h>
d337f35e 8989+
c2806d43
AM
8990+static inline int vx_sock_type(int family)
8991+{
8992+ switch (family) {
8993+ case PF_UNSPEC:
8994+ return VXA_SOCK_UNSPEC;
8995+ case PF_UNIX:
8996+ return VXA_SOCK_UNIX;
8997+ case PF_INET:
8998+ return VXA_SOCK_INET;
8999+ case PF_INET6:
9000+ return VXA_SOCK_INET6;
9001+ case PF_PACKET:
9002+ return VXA_SOCK_PACKET;
9003+ default:
9004+ return VXA_SOCK_OTHER;
9005+ }
9006+}
d337f35e 9007+
c2806d43
AM
9008+#define vx_acc_sock(v, f, p, s) \
9009+ __vx_acc_sock(v, f, p, s, __FILE__, __LINE__)
d337f35e 9010+
c2806d43
AM
9011+static inline void __vx_acc_sock(struct vx_info *vxi,
9012+ int family, int pos, int size, char *file, int line)
9013+{
9014+ if (vxi) {
9015+ int type = vx_sock_type(family);
d337f35e 9016+
c2806d43
AM
9017+ atomic_long_inc(&vxi->cacct.sock[type][pos].count);
9018+ atomic_long_add(size, &vxi->cacct.sock[type][pos].total);
9019+ }
9020+}
d337f35e 9021+
c2806d43
AM
9022+#define vx_sock_recv(sk, s) \
9023+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, s)
9024+#define vx_sock_send(sk, s) \
9025+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, s)
9026+#define vx_sock_fail(sk, s) \
9027+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, s)
d337f35e 9028+
d337f35e 9029+
c2806d43
AM
9030+#define sock_vx_init(s) do { \
9031+ (s)->sk_xid = 0; \
9032+ (s)->sk_vx_info = NULL; \
d33d7b00 9033+ } while (0)
d337f35e 9034+
c2806d43
AM
9035+#define sock_nx_init(s) do { \
9036+ (s)->sk_nid = 0; \
9037+ (s)->sk_nx_info = NULL; \
3cc86a71 9038+ } while (0)
d337f35e 9039+
c2806d43
AM
9040+#else
9041+#warning duplicate inclusion
9042+#endif
9043diff -urNp -x '*.orig' linux-4.4/include/linux/vs_tag.h linux-4.4/include/linux/vs_tag.h
9044--- linux-4.4/include/linux/vs_tag.h 1970-01-01 01:00:00.000000000 +0100
9045+++ linux-4.4/include/linux/vs_tag.h 2021-02-24 16:56:24.589489533 +0100
9046@@ -0,0 +1,47 @@
9047+#ifndef _VS_TAG_H
9048+#define _VS_TAG_H
d337f35e 9049+
c2806d43 9050+#include <linux/vserver/tag.h>
d337f35e 9051+
c2806d43 9052+/* check conditions */
d337f35e 9053+
c2806d43
AM
9054+#define DX_ADMIN 0x0001
9055+#define DX_WATCH 0x0002
9056+#define DX_HOSTID 0x0008
d337f35e 9057+
c2806d43 9058+#define DX_IDENT 0x0010
d337f35e 9059+
c2806d43 9060+#define DX_ARG_MASK 0x0010
d337f35e 9061+
d337f35e 9062+
c2806d43 9063+#define dx_task_tag(t) ((t)->tag)
d337f35e 9064+
c2806d43 9065+#define dx_current_tag() dx_task_tag(current)
d337f35e 9066+
c2806d43 9067+#define dx_check(c, m) __dx_check(dx_current_tag(), c, m)
d337f35e 9068+
c2806d43 9069+#define dx_weak_check(c, m) ((m) ? dx_check(c, m) : 1)
d337f35e 9070+
d337f35e 9071+
c2806d43
AM
9072+/*
9073+ * check current context for ADMIN/WATCH and
9074+ * optionally against supplied argument
9075+ */
9076+static inline int __dx_check(vtag_t cid, vtag_t id, unsigned int mode)
9077+{
9078+ if (mode & DX_ARG_MASK) {
9079+ if ((mode & DX_IDENT) && (id == cid))
9080+ return 1;
9081+ }
9082+ return (((mode & DX_ADMIN) && (cid == 0)) ||
9083+ ((mode & DX_WATCH) && (cid == 1)) ||
9084+ ((mode & DX_HOSTID) && (id == 0)));
9085+}
d337f35e 9086+
c2806d43
AM
9087+struct inode;
9088+int dx_permission(const struct inode *inode, int mask);
d337f35e
JR
9089+
9090+
d33d7b00 9091+#else
c2806d43 9092+#warning duplicate inclusion
d33d7b00 9093+#endif
c2806d43
AM
9094diff -urNp -x '*.orig' linux-4.4/include/linux/vs_time.h linux-4.4/include/linux/vs_time.h
9095--- linux-4.4/include/linux/vs_time.h 1970-01-01 01:00:00.000000000 +0100
9096+++ linux-4.4/include/linux/vs_time.h 2021-02-24 16:56:24.589489533 +0100
9097@@ -0,0 +1,19 @@
9098+#ifndef _VS_TIME_H
9099+#define _VS_TIME_H
d337f35e
JR
9100+
9101+
c2806d43 9102+/* time faking stuff */
d337f35e 9103+
c2806d43 9104+#ifdef CONFIG_VSERVER_VTIME
d337f35e 9105+
c2806d43
AM
9106+extern void vx_adjust_timespec(struct timespec *ts);
9107+extern int vx_settimeofday(const struct timespec *ts);
d337f35e 9108+
c2806d43
AM
9109+#else
9110+#define vx_adjust_timespec(t) do { } while (0)
9111+#define vx_settimeofday(t) do_settimeofday(t)
9112+#endif
d337f35e 9113+
c2806d43
AM
9114+#else
9115+#warning duplicate inclusion
9116+#endif
9117diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/base.h linux-4.4/include/linux/vserver/base.h
9118--- linux-4.4/include/linux/vserver/base.h 1970-01-01 01:00:00.000000000 +0100
9119+++ linux-4.4/include/linux/vserver/base.h 2021-02-24 16:56:24.586156094 +0100
9120@@ -0,0 +1,184 @@
9121+#ifndef _VSERVER_BASE_H
9122+#define _VSERVER_BASE_H
d337f35e 9123+
ba86f833 9124+
c2806d43 9125+/* context state changes */
ba86f833 9126+
c2806d43
AM
9127+enum {
9128+ VSC_STARTUP = 1,
9129+ VSC_SHUTDOWN,
d337f35e 9130+
c2806d43
AM
9131+ VSC_NETUP,
9132+ VSC_NETDOWN,
9133+};
d337f35e 9134+
d337f35e 9135+
d337f35e 9136+
c2806d43 9137+#define vx_task_xid(t) ((t)->xid)
d337f35e 9138+
c2806d43 9139+#define vx_current_xid() vx_task_xid(current)
d337f35e 9140+
c2806d43 9141+#define current_vx_info() (current->vx_info)
d337f35e 9142+
d337f35e 9143+
c2806d43 9144+#define nx_task_nid(t) ((t)->nid)
d337f35e 9145+
c2806d43 9146+#define nx_current_nid() nx_task_nid(current)
d337f35e 9147+
c2806d43 9148+#define current_nx_info() (current->nx_info)
d337f35e 9149+
d337f35e 9150+
c2806d43 9151+/* generic flag merging */
d337f35e 9152+
c2806d43 9153+#define vs_check_flags(v, m, f) (((v) & (m)) ^ (f))
d337f35e 9154+
c2806d43 9155+#define vs_mask_flags(v, f, m) (((v) & ~(m)) | ((f) & (m)))
d337f35e 9156+
c2806d43 9157+#define vs_mask_mask(v, f, m) (((v) & ~(m)) | ((v) & (f) & (m)))
d337f35e 9158+
c2806d43 9159+#define vs_check_bit(v, n) ((v) & (1LL << (n)))
d337f35e 9160+
d337f35e 9161+
c2806d43 9162+/* context flags */
d337f35e 9163+
c2806d43 9164+#define __vx_flags(v) ((v) ? (v)->vx_flags : 0)
d337f35e 9165+
c2806d43 9166+#define vx_current_flags() __vx_flags(current_vx_info())
d337f35e 9167+
c2806d43
AM
9168+#define vx_info_flags(v, m, f) \
9169+ vs_check_flags(__vx_flags(v), m, f)
d337f35e 9170+
c2806d43
AM
9171+#define task_vx_flags(t, m, f) \
9172+ ((t) && vx_info_flags((t)->vx_info, m, f))
d337f35e 9173+
c2806d43 9174+#define vx_flags(m, f) vx_info_flags(current_vx_info(), m, f)
2380c486 9175+
d337f35e 9176+
c2806d43 9177+/* context caps */
d337f35e 9178+
c2806d43 9179+#define __vx_ccaps(v) ((v) ? (v)->vx_ccaps : 0)
d337f35e 9180+
c2806d43 9181+#define vx_current_ccaps() __vx_ccaps(current_vx_info())
d337f35e 9182+
c2806d43 9183+#define vx_info_ccaps(v, c) (__vx_ccaps(v) & (c))
d337f35e 9184+
c2806d43 9185+#define vx_ccaps(c) vx_info_ccaps(current_vx_info(), (c))
d337f35e 9186+
d337f35e 9187+
d337f35e 9188+
c2806d43 9189+/* network flags */
d337f35e 9190+
c2806d43 9191+#define __nx_flags(n) ((n) ? (n)->nx_flags : 0)
d337f35e 9192+
c2806d43 9193+#define nx_current_flags() __nx_flags(current_nx_info())
d337f35e 9194+
c2806d43
AM
9195+#define nx_info_flags(n, m, f) \
9196+ vs_check_flags(__nx_flags(n), m, f)
d337f35e 9197+
c2806d43
AM
9198+#define task_nx_flags(t, m, f) \
9199+ ((t) && nx_info_flags((t)->nx_info, m, f))
d337f35e 9200+
c2806d43 9201+#define nx_flags(m, f) nx_info_flags(current_nx_info(), m, f)
d337f35e 9202+
d337f35e 9203+
c2806d43 9204+/* network caps */
d337f35e 9205+
c2806d43 9206+#define __nx_ncaps(n) ((n) ? (n)->nx_ncaps : 0)
d337f35e 9207+
c2806d43 9208+#define nx_current_ncaps() __nx_ncaps(current_nx_info())
d337f35e 9209+
c2806d43 9210+#define nx_info_ncaps(n, c) (__nx_ncaps(n) & (c))
d337f35e 9211+
c2806d43 9212+#define nx_ncaps(c) nx_info_ncaps(current_nx_info(), c)
d337f35e 9213+
d337f35e 9214+
c2806d43 9215+/* context mask capabilities */
adc1caaa 9216+
c2806d43 9217+#define __vx_mcaps(v) ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
2380c486 9218+
c2806d43 9219+#define vx_info_mcaps(v, c) (__vx_mcaps(v) & (c))
d337f35e 9220+
c2806d43 9221+#define vx_mcaps(c) vx_info_mcaps(current_vx_info(), c)
d337f35e 9222+
2380c486 9223+
c2806d43 9224+/* context bcap mask */
d337f35e 9225+
c2806d43 9226+#define __vx_bcaps(v) ((v)->vx_bcaps)
d337f35e 9227+
c2806d43 9228+#define vx_current_bcaps() __vx_bcaps(current_vx_info())
d33d7b00 9229+
d337f35e 9230+
c2806d43 9231+/* mask given bcaps */
d337f35e 9232+
c2806d43 9233+#define vx_info_mbcaps(v, c) ((v) ? cap_intersect(__vx_bcaps(v), c) : c)
d337f35e 9234+
c2806d43 9235+#define vx_mbcaps(c) vx_info_mbcaps(current_vx_info(), c)
763640ca 9236+
d337f35e 9237+
c2806d43 9238+/* masked cap_bset */
c2e5f7c8 9239+
c2806d43 9240+#define vx_info_cap_bset(v) vx_info_mbcaps(v, current->cap_bset)
d337f35e 9241+
c2806d43 9242+#define vx_current_cap_bset() vx_info_cap_bset(current_vx_info())
d337f35e 9243+
c2806d43
AM
9244+#if 0
9245+#define vx_info_mbcap(v, b) \
9246+ (!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
9247+ vx_info_bcaps(v, b) : (b))
d337f35e 9248+
c2806d43
AM
9249+#define task_vx_mbcap(t, b) \
9250+ vx_info_mbcap((t)->vx_info, (t)->b)
d337f35e 9251+
c2806d43
AM
9252+#define vx_mbcap(b) task_vx_mbcap(current, b)
9253+#endif
7e46296a 9254+
c2806d43 9255+#define vx_cap_raised(v, c, f) cap_raised(vx_info_mbcaps(v, c), f)
7e46296a 9256+
c2806d43
AM
9257+#define vx_capable(b, c) (capable(b) || \
9258+ (cap_raised(current_cap(), b) && vx_ccaps(c)))
7e46296a 9259+
c2806d43
AM
9260+#define vx_ns_capable(n, b, c) (ns_capable(n, b) || \
9261+ (cap_raised(current_cap(), b) && vx_ccaps(c)))
7e46296a 9262+
c2806d43
AM
9263+#define nx_capable(b, c) (capable(b) || \
9264+ (cap_raised(current_cap(), b) && nx_ncaps(c)))
265d6dcc 9265+
c2806d43
AM
9266+#define nx_ns_capable(n, b, c) (ns_capable(n, b) || \
9267+ (cap_raised(current_cap(), b) && nx_ncaps(c)))
265d6dcc 9268+
c2806d43
AM
9269+#define vx_task_initpid(t, n) \
9270+ ((t)->vx_info && \
9271+ ((t)->vx_info->vx_initpid == (n)))
265d6dcc 9272+
c2806d43 9273+#define vx_current_initpid(n) vx_task_initpid(current, n)
7e46296a 9274+
d337f35e 9275+
c2806d43 9276+/* context unshare mask */
d337f35e 9277+
c2806d43 9278+#define __vx_umask(v) ((v)->vx_umask)
d337f35e 9279+
c2806d43 9280+#define vx_current_umask() __vx_umask(current_vx_info())
d337f35e 9281+
c2806d43
AM
9282+#define vx_can_unshare(b, f) (capable(b) || \
9283+ (cap_raised(current_cap(), b) && \
9284+ !((f) & ~vx_current_umask())))
d337f35e 9285+
c2806d43
AM
9286+#define vx_ns_can_unshare(n, b, f) (ns_capable(n, b) || \
9287+ (cap_raised(current_cap(), b) && \
9288+ !((f) & ~vx_current_umask())))
d337f35e 9289+
c2806d43 9290+#define __vx_wmask(v) ((v)->vx_wmask)
d337f35e 9291+
c2806d43 9292+#define vx_current_wmask() __vx_wmask(current_vx_info())
d337f35e 9293+
d337f35e 9294+
c2806d43 9295+#define __vx_state(v) ((v) ? ((v)->vx_state) : 0)
d337f35e 9296+
c2806d43 9297+#define vx_info_state(v, m) (__vx_state(v) & (m))
d337f35e 9298+
d337f35e 9299+
c2806d43 9300+#define __nx_state(n) ((n) ? ((n)->nx_state) : 0)
d337f35e 9301+
c2806d43 9302+#define nx_info_state(n, m) (__nx_state(n) & (m))
d337f35e 9303+
c2806d43
AM
9304+#endif
9305diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cacct.h linux-4.4/include/linux/vserver/cacct.h
9306--- linux-4.4/include/linux/vserver/cacct.h 1970-01-01 01:00:00.000000000 +0100
9307+++ linux-4.4/include/linux/vserver/cacct.h 2021-02-24 16:56:24.586156094 +0100
9308@@ -0,0 +1,15 @@
9309+#ifndef _VSERVER_CACCT_H
9310+#define _VSERVER_CACCT_H
d337f35e 9311+
d337f35e 9312+
c2806d43
AM
9313+enum sock_acc_field {
9314+ VXA_SOCK_UNSPEC = 0,
9315+ VXA_SOCK_UNIX,
9316+ VXA_SOCK_INET,
9317+ VXA_SOCK_INET6,
9318+ VXA_SOCK_PACKET,
9319+ VXA_SOCK_OTHER,
9320+ VXA_SOCK_SIZE /* array size */
9321+};
d337f35e 9322+
c2806d43
AM
9323+#endif /* _VSERVER_CACCT_H */
9324diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cacct_cmd.h linux-4.4/include/linux/vserver/cacct_cmd.h
9325--- linux-4.4/include/linux/vserver/cacct_cmd.h 1970-01-01 01:00:00.000000000 +0100
9326+++ linux-4.4/include/linux/vserver/cacct_cmd.h 2021-02-24 16:56:24.586156094 +0100
9327@@ -0,0 +1,10 @@
9328+#ifndef _VSERVER_CACCT_CMD_H
9329+#define _VSERVER_CACCT_CMD_H
d337f35e
JR
9330+
9331+
c2806d43
AM
9332+#include <linux/compiler.h>
9333+#include <uapi/vserver/cacct_cmd.h>
d337f35e 9334+
c2806d43 9335+extern int vc_sock_stat(struct vx_info *, void __user *);
2380c486 9336+
c2806d43
AM
9337+#endif /* _VSERVER_CACCT_CMD_H */
9338diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cacct_def.h linux-4.4/include/linux/vserver/cacct_def.h
9339--- linux-4.4/include/linux/vserver/cacct_def.h 1970-01-01 01:00:00.000000000 +0100
9340+++ linux-4.4/include/linux/vserver/cacct_def.h 2021-02-24 16:56:24.586156094 +0100
9341@@ -0,0 +1,43 @@
9342+#ifndef _VSERVER_CACCT_DEF_H
9343+#define _VSERVER_CACCT_DEF_H
d337f35e 9344+
c2806d43
AM
9345+#include <asm/atomic.h>
9346+#include <linux/vserver/cacct.h>
d337f35e 9347+
d337f35e 9348+
c2806d43
AM
9349+struct _vx_sock_acc {
9350+ atomic_long_t count;
9351+ atomic_long_t total;
9352+};
d337f35e 9353+
c2806d43 9354+/* context sub struct */
d337f35e 9355+
c2806d43
AM
9356+struct _vx_cacct {
9357+ struct _vx_sock_acc sock[VXA_SOCK_SIZE][3];
9358+ atomic_t slab[8];
9359+ atomic_t page[6][8];
9360+};
d337f35e 9361+
c2806d43 9362+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 9363+
c2806d43
AM
9364+static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
9365+{
9366+ int i, j;
d337f35e 9367+
c2806d43
AM
9368+ printk("\t_vx_cacct:");
9369+ for (i = 0; i < 6; i++) {
9370+ struct _vx_sock_acc *ptr = cacct->sock[i];
d337f35e 9371+
c2806d43
AM
9372+ printk("\t [%d] =", i);
9373+ for (j = 0; j < 3; j++) {
9374+ printk(" [%d] = %8lu, %8lu", j,
9375+ atomic_long_read(&ptr[j].count),
9376+ atomic_long_read(&ptr[j].total));
9377+ }
9378+ printk("\n");
9379+ }
9380+}
d337f35e 9381+
c2806d43 9382+#endif
d337f35e 9383+
c2806d43
AM
9384+#endif /* _VSERVER_CACCT_DEF_H */
9385diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cacct_int.h linux-4.4/include/linux/vserver/cacct_int.h
9386--- linux-4.4/include/linux/vserver/cacct_int.h 1970-01-01 01:00:00.000000000 +0100
9387+++ linux-4.4/include/linux/vserver/cacct_int.h 2021-02-24 16:56:24.586156094 +0100
9388@@ -0,0 +1,17 @@
9389+#ifndef _VSERVER_CACCT_INT_H
9390+#define _VSERVER_CACCT_INT_H
d337f35e 9391+
c2806d43
AM
9392+static inline
9393+unsigned long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
9394+{
9395+ return atomic_long_read(&cacct->sock[type][pos].count);
9396+}
d337f35e 9397+
d337f35e 9398+
c2806d43
AM
9399+static inline
9400+unsigned long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
9401+{
9402+ return atomic_long_read(&cacct->sock[type][pos].total);
9403+}
d337f35e 9404+
c2806d43
AM
9405+#endif /* _VSERVER_CACCT_INT_H */
9406diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/check.h linux-4.4/include/linux/vserver/check.h
9407--- linux-4.4/include/linux/vserver/check.h 1970-01-01 01:00:00.000000000 +0100
9408+++ linux-4.4/include/linux/vserver/check.h 2021-02-24 16:56:24.586156094 +0100
9409@@ -0,0 +1,89 @@
9410+#ifndef _VSERVER_CHECK_H
9411+#define _VSERVER_CHECK_H
d337f35e 9412+
d337f35e 9413+
c2806d43 9414+#define MAX_S_CONTEXT 65535 /* Arbitrary limit */
d337f35e 9415+
c2806d43
AM
9416+#ifdef CONFIG_VSERVER_DYNAMIC_IDS
9417+#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */
9418+#else
9419+#define MIN_D_CONTEXT 65536
9420+#endif
d337f35e 9421+
c2806d43 9422+/* check conditions */
d337f35e 9423+
c2806d43
AM
9424+#define VS_ADMIN 0x0001
9425+#define VS_WATCH 0x0002
9426+#define VS_HIDE 0x0004
9427+#define VS_HOSTID 0x0008
d337f35e 9428+
c2806d43
AM
9429+#define VS_IDENT 0x0010
9430+#define VS_EQUIV 0x0020
9431+#define VS_PARENT 0x0040
9432+#define VS_CHILD 0x0080
3cc86a71 9433+
c2806d43 9434+#define VS_ARG_MASK 0x00F0
2380c486 9435+
c2806d43
AM
9436+#define VS_DYNAMIC 0x0100
9437+#define VS_STATIC 0x0200
d337f35e 9438+
c2806d43 9439+#define VS_ATR_MASK 0x0F00
d337f35e 9440+
c2806d43
AM
9441+#ifdef CONFIG_VSERVER_PRIVACY
9442+#define VS_ADMIN_P (0)
9443+#define VS_WATCH_P (0)
3cc86a71 9444+#else
c2806d43
AM
9445+#define VS_ADMIN_P VS_ADMIN
9446+#define VS_WATCH_P VS_WATCH
d33d7b00 9447+#endif
d337f35e 9448+
c2806d43
AM
9449+#define VS_HARDIRQ 0x1000
9450+#define VS_SOFTIRQ 0x2000
9451+#define VS_IRQ 0x4000
d337f35e 9452+
c2806d43 9453+#define VS_IRQ_MASK 0xF000
d337f35e 9454+
c2806d43 9455+#include <linux/hardirq.h>
d337f35e 9456+
c2806d43
AM
9457+/*
9458+ * check current context for ADMIN/WATCH and
9459+ * optionally against supplied argument
9460+ */
9461+static inline int __vs_check(int cid, int id, unsigned int mode)
9462+{
9463+ if (mode & VS_ARG_MASK) {
9464+ if ((mode & VS_IDENT) && (id == cid))
9465+ return 1;
9466+ }
9467+ if (mode & VS_ATR_MASK) {
9468+ if ((mode & VS_DYNAMIC) &&
9469+ (id >= MIN_D_CONTEXT) &&
9470+ (id <= MAX_S_CONTEXT))
9471+ return 1;
9472+ if ((mode & VS_STATIC) &&
9473+ (id > 1) && (id < MIN_D_CONTEXT))
9474+ return 1;
9475+ }
9476+ if (mode & VS_IRQ_MASK) {
9477+ if ((mode & VS_IRQ) && unlikely(in_interrupt()))
9478+ return 1;
9479+ if ((mode & VS_HARDIRQ) && unlikely(in_irq()))
9480+ return 1;
9481+ if ((mode & VS_SOFTIRQ) && unlikely(in_softirq()))
9482+ return 1;
9483+ }
9484+ return (((mode & VS_ADMIN) && (cid == 0)) ||
9485+ ((mode & VS_WATCH) && (cid == 1)) ||
9486+ ((mode & VS_HOSTID) && (id == 0)));
9487+}
d337f35e 9488+
c2806d43 9489+#define vx_check(c, m) __vs_check(vx_current_xid(), c, (m) | VS_IRQ)
d337f35e 9490+
c2806d43 9491+#define vx_weak_check(c, m) ((m) ? vx_check(c, m) : 1)
d337f35e 9492+
d337f35e 9493+
c2806d43 9494+#define nx_check(c, m) __vs_check(nx_current_nid(), c, m)
d337f35e 9495+
c2806d43 9496+#define nx_weak_check(c, m) ((m) ? nx_check(c, m) : 1)
d337f35e 9497+
c2806d43
AM
9498+#endif
9499diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/context.h linux-4.4/include/linux/vserver/context.h
9500--- linux-4.4/include/linux/vserver/context.h 1970-01-01 01:00:00.000000000 +0100
9501+++ linux-4.4/include/linux/vserver/context.h 2021-02-24 16:56:24.586156094 +0100
9502@@ -0,0 +1,110 @@
9503+#ifndef _VSERVER_CONTEXT_H
9504+#define _VSERVER_CONTEXT_H
d337f35e 9505+
d337f35e 9506+
c2806d43
AM
9507+#include <linux/list.h>
9508+#include <linux/spinlock.h>
9509+#include <linux/rcupdate.h>
9510+#include <uapi/vserver/context.h>
d337f35e 9511+
c2806d43
AM
9512+#include "limit_def.h"
9513+#include "sched_def.h"
9514+#include "cvirt_def.h"
9515+#include "cacct_def.h"
9516+#include "device_def.h"
d337f35e 9517+
c2806d43 9518+#define VX_SPACES 2
d337f35e 9519+
c2806d43
AM
9520+struct _vx_info_pc {
9521+ struct _vx_sched_pc sched_pc;
9522+ struct _vx_cvirt_pc cvirt_pc;
9523+};
d337f35e 9524+
c2806d43
AM
9525+struct _vx_space {
9526+ unsigned long vx_nsmask; /* assignment mask */
9527+ struct nsproxy *vx_nsproxy; /* private namespaces */
9528+ struct fs_struct *vx_fs; /* private namespace fs */
9529+ const struct cred *vx_cred; /* task credentials */
9530+};
d337f35e 9531+
c2806d43
AM
9532+struct vx_info {
9533+ struct hlist_node vx_hlist; /* linked list of contexts */
9534+ vxid_t vx_id; /* context id */
9535+ atomic_t vx_usecnt; /* usage count */
9536+ atomic_t vx_tasks; /* tasks count */
9537+ struct vx_info *vx_parent; /* parent context */
9538+ int vx_state; /* context state */
d337f35e 9539+
c2806d43 9540+ struct _vx_space space[VX_SPACES]; /* namespace store */
3cc86a71 9541+
c2806d43
AM
9542+ uint64_t vx_flags; /* context flags */
9543+ uint64_t vx_ccaps; /* context caps (vserver) */
9544+ uint64_t vx_umask; /* unshare mask (guest) */
9545+ uint64_t vx_wmask; /* warn mask (guest) */
9546+ kernel_cap_t vx_bcaps; /* bounding caps (system) */
d337f35e 9547+
c2806d43
AM
9548+ struct task_struct *vx_reaper; /* guest reaper process */
9549+ pid_t vx_initpid; /* PID of guest init */
9550+ int64_t vx_badness_bias; /* OOM points bias */
d337f35e 9551+
c2806d43
AM
9552+ struct _vx_limit limit; /* vserver limits */
9553+ struct _vx_sched sched; /* vserver scheduler */
9554+ struct _vx_cvirt cvirt; /* virtual/bias stuff */
9555+ struct _vx_cacct cacct; /* context accounting */
d337f35e 9556+
c2806d43 9557+ struct _vx_device dmap; /* default device map targets */
d337f35e 9558+
c2806d43
AM
9559+#ifndef CONFIG_SMP
9560+ struct _vx_info_pc info_pc; /* per cpu data */
9561+#else
9562+ struct _vx_info_pc *ptr_pc; /* per cpu array */
9563+#endif
d337f35e 9564+
c2806d43
AM
9565+ wait_queue_head_t vx_wait; /* context exit waitqueue */
9566+ int reboot_cmd; /* last sys_reboot() cmd */
9567+ int exit_code; /* last process exit code */
d337f35e 9568+
c2806d43
AM
9569+ char vx_name[65]; /* vserver name */
9570+};
adc1caaa 9571+
c2806d43
AM
9572+#ifndef CONFIG_SMP
9573+#define vx_ptr_pc(vxi) (&(vxi)->info_pc)
9574+#define vx_per_cpu(vxi, v, id) vx_ptr_pc(vxi)->v
9575+#else
9576+#define vx_ptr_pc(vxi) ((vxi)->ptr_pc)
9577+#define vx_per_cpu(vxi, v, id) per_cpu_ptr(vx_ptr_pc(vxi), id)->v
3cc86a71 9578+#endif
d337f35e 9579+
c2806d43 9580+#define vx_cpu(vxi, v) vx_per_cpu(vxi, v, smp_processor_id())
d337f35e 9581+
d337f35e 9582+
c2806d43
AM
9583+struct vx_info_save {
9584+ struct vx_info *vxi;
9585+ vxid_t xid;
9586+};
d337f35e 9587+
d337f35e 9588+
c2806d43 9589+/* status flags */
d337f35e 9590+
c2806d43
AM
9591+#define VXS_HASHED 0x0001
9592+#define VXS_PAUSED 0x0010
9593+#define VXS_SHUTDOWN 0x0100
9594+#define VXS_HELPER 0x1000
9595+#define VXS_RELEASED 0x8000
d337f35e 9596+
d337f35e 9597+
c2806d43
AM
9598+extern void claim_vx_info(struct vx_info *, struct task_struct *);
9599+extern void release_vx_info(struct vx_info *, struct task_struct *);
d337f35e 9600+
c2806d43
AM
9601+extern struct vx_info *lookup_vx_info(int);
9602+extern struct vx_info *lookup_or_create_vx_info(int);
d337f35e 9603+
c2806d43
AM
9604+extern int get_xid_list(int, unsigned int *, int);
9605+extern int xid_is_hashed(vxid_t);
d337f35e 9606+
c2806d43 9607+extern int vx_migrate_task(struct task_struct *, struct vx_info *, int);
d337f35e 9608+
c2806d43 9609+extern long vs_state_change(struct vx_info *, unsigned int);
d337f35e 9610+
d33d7b00 9611+
c2806d43
AM
9612+#endif /* _VSERVER_CONTEXT_H */
9613diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/context_cmd.h linux-4.4/include/linux/vserver/context_cmd.h
9614--- linux-4.4/include/linux/vserver/context_cmd.h 1970-01-01 01:00:00.000000000 +0100
9615+++ linux-4.4/include/linux/vserver/context_cmd.h 2021-02-24 16:56:24.586156094 +0100
9616@@ -0,0 +1,33 @@
9617+#ifndef _VSERVER_CONTEXT_CMD_H
9618+#define _VSERVER_CONTEXT_CMD_H
d33d7b00 9619+
c2806d43 9620+#include <uapi/vserver/context_cmd.h>
d337f35e 9621+
c2806d43 9622+extern int vc_task_xid(uint32_t);
d337f35e 9623+
c2806d43 9624+extern int vc_vx_info(struct vx_info *, void __user *);
d337f35e 9625+
c2806d43 9626+extern int vc_ctx_stat(struct vx_info *, void __user *);
d337f35e 9627+
c2806d43
AM
9628+extern int vc_ctx_create(uint32_t, void __user *);
9629+extern int vc_ctx_migrate(struct vx_info *, void __user *);
d337f35e 9630+
c2806d43
AM
9631+extern int vc_get_cflags(struct vx_info *, void __user *);
9632+extern int vc_set_cflags(struct vx_info *, void __user *);
d337f35e 9633+
c2806d43
AM
9634+extern int vc_get_ccaps(struct vx_info *, void __user *);
9635+extern int vc_set_ccaps(struct vx_info *, void __user *);
d337f35e 9636+
c2806d43
AM
9637+extern int vc_get_bcaps(struct vx_info *, void __user *);
9638+extern int vc_set_bcaps(struct vx_info *, void __user *);
d337f35e 9639+
c2806d43
AM
9640+extern int vc_get_umask(struct vx_info *, void __user *);
9641+extern int vc_set_umask(struct vx_info *, void __user *);
d337f35e 9642+
c2806d43
AM
9643+extern int vc_get_wmask(struct vx_info *, void __user *);
9644+extern int vc_set_wmask(struct vx_info *, void __user *);
d337f35e 9645+
c2806d43
AM
9646+extern int vc_get_badness(struct vx_info *, void __user *);
9647+extern int vc_set_badness(struct vx_info *, void __user *);
d337f35e 9648+
c2806d43
AM
9649+#endif /* _VSERVER_CONTEXT_CMD_H */
9650diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cvirt.h linux-4.4/include/linux/vserver/cvirt.h
9651--- linux-4.4/include/linux/vserver/cvirt.h 1970-01-01 01:00:00.000000000 +0100
9652+++ linux-4.4/include/linux/vserver/cvirt.h 2021-02-24 16:56:24.586156094 +0100
9653@@ -0,0 +1,18 @@
9654+#ifndef _VSERVER_CVIRT_H
9655+#define _VSERVER_CVIRT_H
d337f35e 9656+
c2806d43 9657+struct timespec;
d337f35e 9658+
c2806d43 9659+void vx_vsi_boottime(struct timespec *);
d337f35e 9660+
c2806d43 9661+void vx_vsi_uptime(struct timespec *, struct timespec *);
d337f35e 9662+
d337f35e 9663+
c2806d43 9664+struct vx_info;
d337f35e 9665+
c2806d43 9666+void vx_update_load(struct vx_info *);
d337f35e 9667+
d337f35e 9668+
c2806d43 9669+int vx_do_syslog(int, char __user *, int);
d337f35e 9670+
c2806d43
AM
9671+#endif /* _VSERVER_CVIRT_H */
9672diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cvirt_cmd.h linux-4.4/include/linux/vserver/cvirt_cmd.h
9673--- linux-4.4/include/linux/vserver/cvirt_cmd.h 1970-01-01 01:00:00.000000000 +0100
9674+++ linux-4.4/include/linux/vserver/cvirt_cmd.h 2021-02-24 16:56:24.586156094 +0100
9675@@ -0,0 +1,13 @@
9676+#ifndef _VSERVER_CVIRT_CMD_H
9677+#define _VSERVER_CVIRT_CMD_H
d337f35e 9678+
d337f35e 9679+
c2806d43
AM
9680+#include <linux/compiler.h>
9681+#include <uapi/vserver/cvirt_cmd.h>
d337f35e 9682+
c2806d43
AM
9683+extern int vc_set_vhi_name(struct vx_info *, void __user *);
9684+extern int vc_get_vhi_name(struct vx_info *, void __user *);
d337f35e 9685+
c2806d43 9686+extern int vc_virt_stat(struct vx_info *, void __user *);
d337f35e 9687+
c2806d43
AM
9688+#endif /* _VSERVER_CVIRT_CMD_H */
9689diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/cvirt_def.h linux-4.4/include/linux/vserver/cvirt_def.h
9690--- linux-4.4/include/linux/vserver/cvirt_def.h 1970-01-01 01:00:00.000000000 +0100
9691+++ linux-4.4/include/linux/vserver/cvirt_def.h 2021-02-24 16:56:24.586156094 +0100
9692@@ -0,0 +1,80 @@
9693+#ifndef _VSERVER_CVIRT_DEF_H
9694+#define _VSERVER_CVIRT_DEF_H
d337f35e 9695+
c2806d43
AM
9696+#include <linux/jiffies.h>
9697+#include <linux/spinlock.h>
9698+#include <linux/wait.h>
9699+#include <linux/time.h>
9700+#include <asm/atomic.h>
3bac966d 9701+
d337f35e 9702+
c2806d43
AM
9703+struct _vx_usage_stat {
9704+ uint64_t user;
9705+ uint64_t nice;
9706+ uint64_t system;
9707+ uint64_t softirq;
9708+ uint64_t irq;
9709+ uint64_t idle;
9710+ uint64_t iowait;
9711+};
d337f35e 9712+
c2806d43
AM
9713+struct _vx_syslog {
9714+ wait_queue_head_t log_wait;
9715+ spinlock_t logbuf_lock; /* lock for the log buffer */
d337f35e 9716+
c2806d43
AM
9717+ unsigned long log_start; /* next char to be read by syslog() */
9718+ unsigned long con_start; /* next char to be sent to consoles */
9719+ unsigned long log_end; /* most-recently-written-char + 1 */
9720+ unsigned long logged_chars; /* #chars since last read+clear operation */
3bac966d 9721+
c2806d43
AM
9722+ char log_buf[1024];
9723+};
d337f35e 9724+
d337f35e 9725+
c2806d43 9726+/* context sub struct */
d337f35e 9727+
c2806d43
AM
9728+struct _vx_cvirt {
9729+ atomic_t nr_threads; /* number of current threads */
9730+ atomic_t nr_running; /* number of running threads */
9731+ atomic_t nr_uninterruptible; /* number of uninterruptible threads */
d337f35e 9732+
c2806d43
AM
9733+ atomic_t nr_onhold; /* processes on hold */
9734+ uint32_t onhold_last; /* jiffies when put on hold */
d337f35e 9735+
c2806d43
AM
9736+ struct timespec bias_ts; /* time offset to the host */
9737+ struct timespec bias_idle;
9738+ struct timespec bias_uptime; /* context creation point */
9739+ uint64_t bias_clock; /* offset in clock_t */
d337f35e 9740+
c2806d43
AM
9741+ spinlock_t load_lock; /* lock for the load averages */
9742+ atomic_t load_updates; /* nr of load updates done so far */
9743+ uint32_t load_last; /* last time load was calculated */
9744+ uint32_t load[3]; /* load averages 1,5,15 */
d337f35e 9745+
c2806d43 9746+ atomic_t total_forks; /* number of forks so far */
d337f35e 9747+
c2806d43
AM
9748+ struct _vx_syslog syslog;
9749+};
d337f35e 9750+
c2806d43
AM
9751+struct _vx_cvirt_pc {
9752+ struct _vx_usage_stat cpustat;
9753+};
d337f35e
JR
9754+
9755+
c2806d43 9756+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 9757+
c2806d43
AM
9758+static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
9759+{
9760+ printk("\t_vx_cvirt:\n");
9761+ printk("\t threads: %4d, %4d, %4d, %4d\n",
9762+ atomic_read(&cvirt->nr_threads),
9763+ atomic_read(&cvirt->nr_running),
9764+ atomic_read(&cvirt->nr_uninterruptible),
9765+ atomic_read(&cvirt->nr_onhold));
9766+ /* add rest here */
9767+ printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
3cc86a71 9768+}
d337f35e 9769+
c2806d43 9770+#endif
d337f35e 9771+
c2806d43
AM
9772+#endif /* _VSERVER_CVIRT_DEF_H */
9773diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/debug.h linux-4.4/include/linux/vserver/debug.h
9774--- linux-4.4/include/linux/vserver/debug.h 1970-01-01 01:00:00.000000000 +0100
9775+++ linux-4.4/include/linux/vserver/debug.h 2021-02-24 16:56:24.586156094 +0100
9776@@ -0,0 +1,146 @@
9777+#ifndef _VSERVER_DEBUG_H
9778+#define _VSERVER_DEBUG_H
d337f35e 9779+
d337f35e 9780+
c2806d43
AM
9781+#define VXD_CBIT(n, m) (vs_debug_ ## n & (1 << (m)))
9782+#define VXD_CMIN(n, m) (vs_debug_ ## n > (m))
9783+#define VXD_MASK(n, m) (vs_debug_ ## n & (m))
d337f35e 9784+
c2806d43
AM
9785+#define VXD_DEV(d) (d), (d)->bd_inode->i_ino, \
9786+ imajor((d)->bd_inode), iminor((d)->bd_inode)
9787+#define VXF_DEV "%p[%lu,%d:%d]"
d337f35e 9788+
c2806d43
AM
9789+#if defined(CONFIG_QUOTES_UTF8)
9790+#define VS_Q_LQM "\xc2\xbb"
9791+#define VS_Q_RQM "\xc2\xab"
9792+#elif defined(CONFIG_QUOTES_ASCII)
9793+#define VS_Q_LQM "\x27"
9794+#define VS_Q_RQM "\x27"
9795+#else
9796+#define VS_Q_LQM "\xbb"
9797+#define VS_Q_RQM "\xab"
9798+#endif
d337f35e 9799+
c2806d43 9800+#define VS_Q(f) VS_Q_LQM f VS_Q_RQM
d337f35e 9801+
d337f35e 9802+
c2806d43
AM
9803+#define vxd_path(p) \
9804+ ({ static char _buffer[PATH_MAX]; \
9805+ d_path(p, _buffer, sizeof(_buffer)); })
d337f35e 9806+
c2806d43
AM
9807+#define vxd_cond_path(n) \
9808+ ((n) ? vxd_path(&(n)->path) : "<null>" )
d337f35e 9809+
d337f35e 9810+
c2806d43 9811+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 9812+
c2806d43
AM
9813+extern unsigned int vs_debug_switch;
9814+extern unsigned int vs_debug_xid;
9815+extern unsigned int vs_debug_nid;
9816+extern unsigned int vs_debug_tag;
9817+extern unsigned int vs_debug_net;
9818+extern unsigned int vs_debug_limit;
9819+extern unsigned int vs_debug_cres;
9820+extern unsigned int vs_debug_dlim;
9821+extern unsigned int vs_debug_quota;
9822+extern unsigned int vs_debug_cvirt;
9823+extern unsigned int vs_debug_space;
9824+extern unsigned int vs_debug_perm;
9825+extern unsigned int vs_debug_misc;
d337f35e 9826+
2380c486 9827+
c2806d43
AM
9828+#define VX_LOGLEVEL "vxD: "
9829+#define VX_PROC_FMT "%p: "
9830+#define VX_PROCESS current
2380c486 9831+
c2806d43
AM
9832+#define vxdprintk(c, f, x...) \
9833+ do { \
9834+ if (c) \
9835+ printk(VX_LOGLEVEL VX_PROC_FMT f "\n", \
9836+ VX_PROCESS , ##x); \
9837+ } while (0)
d337f35e 9838+
c2806d43
AM
9839+#define vxlprintk(c, f, x...) \
9840+ do { \
9841+ if (c) \
9842+ printk(VX_LOGLEVEL f " @%s:%d\n", x); \
9843+ } while (0)
d337f35e 9844+
c2806d43
AM
9845+#define vxfprintk(c, f, x...) \
9846+ do { \
9847+ if (c) \
9848+ printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
9849+ } while (0)
d337f35e 9850+
d337f35e 9851+
c2806d43 9852+struct vx_info;
d337f35e 9853+
c2806d43
AM
9854+void dump_vx_info(struct vx_info *, int);
9855+void dump_vx_info_inactive(int);
d337f35e 9856+
c2806d43 9857+#else /* CONFIG_VSERVER_DEBUG */
d337f35e 9858+
c2806d43
AM
9859+#define vs_debug_switch 0
9860+#define vs_debug_xid 0
9861+#define vs_debug_nid 0
9862+#define vs_debug_tag 0
9863+#define vs_debug_net 0
9864+#define vs_debug_limit 0
9865+#define vs_debug_cres 0
9866+#define vs_debug_dlim 0
9867+#define vs_debug_quota 0
9868+#define vs_debug_cvirt 0
9869+#define vs_debug_space 0
9870+#define vs_debug_perm 0
9871+#define vs_debug_misc 0
d337f35e 9872+
c2806d43
AM
9873+#define vxdprintk(x...) do { } while (0)
9874+#define vxlprintk(x...) do { } while (0)
9875+#define vxfprintk(x...) do { } while (0)
d337f35e 9876+
c2806d43 9877+#endif /* CONFIG_VSERVER_DEBUG */
d337f35e 9878+
d337f35e 9879+
c2806d43 9880+#ifdef CONFIG_VSERVER_WARN
d337f35e 9881+
c2806d43
AM
9882+#define VX_WARNLEVEL KERN_WARNING "vxW: "
9883+#define VX_WARN_TASK "[" VS_Q("%s") ",%u:#%u|%u|%u] "
9884+#define VX_WARN_XID "[xid #%u] "
9885+#define VX_WARN_NID "[nid #%u] "
9886+#define VX_WARN_TAG "[tag #%u] "
d337f35e 9887+
c2806d43
AM
9888+#define vxwprintk(c, f, x...) \
9889+ do { \
9890+ if (c) \
9891+ printk(VX_WARNLEVEL f "\n", ##x); \
9892+ } while (0)
d337f35e 9893+
c2806d43 9894+#else /* CONFIG_VSERVER_WARN */
d337f35e 9895+
c2806d43 9896+#define vxwprintk(x...) do { } while (0)
d337f35e 9897+
c2806d43 9898+#endif /* CONFIG_VSERVER_WARN */
d337f35e 9899+
c2806d43
AM
9900+#define vxwprintk_task(c, f, x...) \
9901+ vxwprintk(c, VX_WARN_TASK f, \
9902+ current->comm, current->pid, \
9903+ current->xid, current->nid, \
9904+ current->tag, ##x)
9905+#define vxwprintk_xid(c, f, x...) \
9906+ vxwprintk(c, VX_WARN_XID f, current->xid, x)
9907+#define vxwprintk_nid(c, f, x...) \
9908+ vxwprintk(c, VX_WARN_NID f, current->nid, x)
9909+#define vxwprintk_tag(c, f, x...) \
9910+ vxwprintk(c, VX_WARN_TAG f, current->tag, x)
d337f35e 9911+
c2806d43
AM
9912+#ifdef CONFIG_VSERVER_DEBUG
9913+#define vxd_assert_lock(l) assert_spin_locked(l)
9914+#define vxd_assert(c, f, x...) vxlprintk(!(c), \
9915+ "assertion [" f "] failed.", ##x, __FILE__, __LINE__)
3cc86a71 9916+#else
c2806d43
AM
9917+#define vxd_assert_lock(l) do { } while (0)
9918+#define vxd_assert(c, f, x...) do { } while (0)
3cc86a71 9919+#endif
d337f35e 9920+
d337f35e 9921+
c2806d43
AM
9922+#endif /* _VSERVER_DEBUG_H */
9923diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/debug_cmd.h linux-4.4/include/linux/vserver/debug_cmd.h
9924--- linux-4.4/include/linux/vserver/debug_cmd.h 1970-01-01 01:00:00.000000000 +0100
9925+++ linux-4.4/include/linux/vserver/debug_cmd.h 2021-02-24 16:56:24.586156094 +0100
9926@@ -0,0 +1,37 @@
9927+#ifndef _VSERVER_DEBUG_CMD_H
9928+#define _VSERVER_DEBUG_CMD_H
d337f35e 9929+
c2806d43
AM
9930+#include <uapi/vserver/debug_cmd.h>
9931+
9932+
9933+#ifdef CONFIG_COMPAT
9934+
9935+#include <asm/compat.h>
9936+
9937+struct vcmd_read_history_v0_x32 {
9938+ uint32_t index;
9939+ uint32_t count;
9940+ compat_uptr_t data_ptr;
3cc86a71 9941+};
d337f35e 9942+
c2806d43
AM
9943+struct vcmd_read_monitor_v0_x32 {
9944+ uint32_t index;
9945+ uint32_t count;
9946+ compat_uptr_t data_ptr;
9947+};
d337f35e 9948+
c2806d43 9949+#endif /* CONFIG_COMPAT */
d337f35e 9950+
c2806d43 9951+extern int vc_dump_history(uint32_t);
d337f35e 9952+
c2806d43
AM
9953+extern int vc_read_history(uint32_t, void __user *);
9954+extern int vc_read_monitor(uint32_t, void __user *);
d337f35e 9955+
c2806d43 9956+#ifdef CONFIG_COMPAT
d337f35e 9957+
c2806d43
AM
9958+extern int vc_read_history_x32(uint32_t, void __user *);
9959+extern int vc_read_monitor_x32(uint32_t, void __user *);
d337f35e 9960+
c2806d43 9961+#endif /* CONFIG_COMPAT */
d337f35e 9962+
c2806d43
AM
9963+#endif /* _VSERVER_DEBUG_CMD_H */
9964diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/device.h linux-4.4/include/linux/vserver/device.h
9965--- linux-4.4/include/linux/vserver/device.h 1970-01-01 01:00:00.000000000 +0100
9966+++ linux-4.4/include/linux/vserver/device.h 2021-02-24 16:56:24.586156094 +0100
9967@@ -0,0 +1,9 @@
9968+#ifndef _VSERVER_DEVICE_H
9969+#define _VSERVER_DEVICE_H
d337f35e
JR
9970+
9971+
c2806d43 9972+#include <uapi/vserver/device.h>
d337f35e 9973+
c2806d43
AM
9974+#else /* _VSERVER_DEVICE_H */
9975+#warning duplicate inclusion
9976+#endif /* _VSERVER_DEVICE_H */
9977diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/device_cmd.h linux-4.4/include/linux/vserver/device_cmd.h
9978--- linux-4.4/include/linux/vserver/device_cmd.h 1970-01-01 01:00:00.000000000 +0100
9979+++ linux-4.4/include/linux/vserver/device_cmd.h 2021-02-24 16:56:24.586156094 +0100
9980@@ -0,0 +1,31 @@
9981+#ifndef _VSERVER_DEVICE_CMD_H
9982+#define _VSERVER_DEVICE_CMD_H
d337f35e 9983+
c2806d43 9984+#include <uapi/vserver/device_cmd.h>
d337f35e 9985+
d337f35e 9986+
c2806d43 9987+#ifdef CONFIG_COMPAT
d337f35e 9988+
c2806d43 9989+#include <asm/compat.h>
d337f35e 9990+
c2806d43
AM
9991+struct vcmd_set_mapping_v0_x32 {
9992+ compat_uptr_t device_ptr;
9993+ compat_uptr_t target_ptr;
9994+ uint32_t flags;
d33d7b00 9995+};
d337f35e 9996+
c2806d43 9997+#endif /* CONFIG_COMPAT */
3cc86a71 9998+
c2806d43 9999+#include <linux/compiler.h>
d337f35e 10000+
c2806d43
AM
10001+extern int vc_set_mapping(struct vx_info *, void __user *);
10002+extern int vc_unset_mapping(struct vx_info *, void __user *);
d337f35e 10003+
c2806d43 10004+#ifdef CONFIG_COMPAT
3bac966d 10005+
c2806d43
AM
10006+extern int vc_set_mapping_x32(struct vx_info *, void __user *);
10007+extern int vc_unset_mapping_x32(struct vx_info *, void __user *);
d337f35e 10008+
c2806d43 10009+#endif /* CONFIG_COMPAT */
d337f35e 10010+
c2806d43
AM
10011+#endif /* _VSERVER_DEVICE_CMD_H */
10012diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/device_def.h linux-4.4/include/linux/vserver/device_def.h
10013--- linux-4.4/include/linux/vserver/device_def.h 1970-01-01 01:00:00.000000000 +0100
10014+++ linux-4.4/include/linux/vserver/device_def.h 2021-02-24 16:56:24.586156094 +0100
10015@@ -0,0 +1,17 @@
10016+#ifndef _VSERVER_DEVICE_DEF_H
10017+#define _VSERVER_DEVICE_DEF_H
d337f35e 10018+
c2806d43 10019+#include <linux/types.h>
d337f35e 10020+
c2806d43
AM
10021+struct vx_dmap_target {
10022+ dev_t target;
10023+ uint32_t flags;
10024+};
d337f35e 10025+
c2806d43
AM
10026+struct _vx_device {
10027+#ifdef CONFIG_VSERVER_DEVICE
10028+ struct vx_dmap_target targets[2];
10029+#endif
10030+};
d337f35e 10031+
c2806d43
AM
10032+#endif /* _VSERVER_DEVICE_DEF_H */
10033diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/dlimit.h linux-4.4/include/linux/vserver/dlimit.h
10034--- linux-4.4/include/linux/vserver/dlimit.h 1970-01-01 01:00:00.000000000 +0100
10035+++ linux-4.4/include/linux/vserver/dlimit.h 2021-02-24 16:56:24.586156094 +0100
10036@@ -0,0 +1,54 @@
10037+#ifndef _VSERVER_DLIMIT_H
10038+#define _VSERVER_DLIMIT_H
d337f35e 10039+
c2806d43 10040+#include "switch.h"
d337f35e 10041+
d337f35e 10042+
c2806d43 10043+#ifdef __KERNEL__
d337f35e 10044+
c2806d43 10045+/* keep in sync with CDLIM_INFINITY */
d337f35e 10046+
c2806d43 10047+#define DLIM_INFINITY (~0ULL)
d337f35e 10048+
c2806d43
AM
10049+#include <linux/spinlock.h>
10050+#include <linux/rcupdate.h>
d337f35e 10051+
c2806d43 10052+struct super_block;
d337f35e 10053+
c2806d43
AM
10054+struct dl_info {
10055+ struct hlist_node dl_hlist; /* linked list of contexts */
10056+ struct rcu_head dl_rcu; /* the rcu head */
10057+ vtag_t dl_tag; /* context tag */
10058+ atomic_t dl_usecnt; /* usage count */
10059+ atomic_t dl_refcnt; /* reference count */
d337f35e 10060+
c2806d43 10061+ struct super_block *dl_sb; /* associated superblock */
d337f35e 10062+
c2806d43 10063+ spinlock_t dl_lock; /* protect the values */
d337f35e 10064+
c2806d43
AM
10065+ unsigned long long dl_space_used; /* used space in bytes */
10066+ unsigned long long dl_space_total; /* maximum space in bytes */
10067+ unsigned long dl_inodes_used; /* used inodes */
10068+ unsigned long dl_inodes_total; /* maximum inodes */
d337f35e 10069+
c2806d43
AM
10070+ unsigned int dl_nrlmult; /* non root limit mult */
10071+};
d337f35e 10072+
c2806d43 10073+struct rcu_head;
d337f35e 10074+
c2806d43
AM
10075+extern void rcu_free_dl_info(struct rcu_head *);
10076+extern void unhash_dl_info(struct dl_info *);
d337f35e 10077+
c2806d43 10078+extern struct dl_info *locate_dl_info(struct super_block *, vtag_t);
d337f35e 10079+
2380c486 10080+
c2806d43 10081+struct kstatfs;
adc1caaa 10082+
c2806d43 10083+extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
d337f35e 10084+
c2806d43 10085+typedef uint64_t dlsize_t;
d337f35e 10086+
c2806d43
AM
10087+#endif /* __KERNEL__ */
10088+#else /* _VSERVER_DLIMIT_H */
10089+#warning duplicate inclusion
10090+#endif /* _VSERVER_DLIMIT_H */
10091diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/dlimit_cmd.h linux-4.4/include/linux/vserver/dlimit_cmd.h
10092--- linux-4.4/include/linux/vserver/dlimit_cmd.h 1970-01-01 01:00:00.000000000 +0100
10093+++ linux-4.4/include/linux/vserver/dlimit_cmd.h 2021-02-24 16:56:24.586156094 +0100
10094@@ -0,0 +1,46 @@
10095+#ifndef _VSERVER_DLIMIT_CMD_H
10096+#define _VSERVER_DLIMIT_CMD_H
d337f35e 10097+
c2806d43 10098+#include <uapi/vserver/dlimit_cmd.h>
d337f35e 10099+
d337f35e 10100+
c2806d43 10101+#ifdef CONFIG_COMPAT
d337f35e 10102+
c2806d43 10103+#include <asm/compat.h>
2380c486 10104+
c2806d43
AM
10105+struct vcmd_ctx_dlimit_base_v0_x32 {
10106+ compat_uptr_t name_ptr;
10107+ uint32_t flags;
10108+};
d337f35e 10109+
c2806d43
AM
10110+struct vcmd_ctx_dlimit_v0_x32 {
10111+ compat_uptr_t name_ptr;
10112+ uint32_t space_used; /* used space in kbytes */
10113+ uint32_t space_total; /* maximum space in kbytes */
10114+ uint32_t inodes_used; /* used inodes */
10115+ uint32_t inodes_total; /* maximum inodes */
10116+ uint32_t reserved; /* reserved for root in % */
10117+ uint32_t flags;
10118+};
d337f35e 10119+
c2806d43 10120+#endif /* CONFIG_COMPAT */
d337f35e 10121+
c2806d43 10122+#include <linux/compiler.h>
d337f35e 10123+
c2806d43
AM
10124+extern int vc_add_dlimit(uint32_t, void __user *);
10125+extern int vc_rem_dlimit(uint32_t, void __user *);
d337f35e 10126+
c2806d43
AM
10127+extern int vc_set_dlimit(uint32_t, void __user *);
10128+extern int vc_get_dlimit(uint32_t, void __user *);
d337f35e 10129+
c2806d43 10130+#ifdef CONFIG_COMPAT
d337f35e 10131+
c2806d43
AM
10132+extern int vc_add_dlimit_x32(uint32_t, void __user *);
10133+extern int vc_rem_dlimit_x32(uint32_t, void __user *);
d337f35e 10134+
c2806d43
AM
10135+extern int vc_set_dlimit_x32(uint32_t, void __user *);
10136+extern int vc_get_dlimit_x32(uint32_t, void __user *);
d337f35e 10137+
c2806d43 10138+#endif /* CONFIG_COMPAT */
d337f35e 10139+
c2806d43
AM
10140+#endif /* _VSERVER_DLIMIT_CMD_H */
10141diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/global.h linux-4.4/include/linux/vserver/global.h
10142--- linux-4.4/include/linux/vserver/global.h 1970-01-01 01:00:00.000000000 +0100
10143+++ linux-4.4/include/linux/vserver/global.h 2021-02-24 16:56:24.586156094 +0100
10144@@ -0,0 +1,19 @@
10145+#ifndef _VSERVER_GLOBAL_H
10146+#define _VSERVER_GLOBAL_H
d337f35e 10147+
d337f35e 10148+
c2806d43
AM
10149+extern atomic_t vx_global_ctotal;
10150+extern atomic_t vx_global_cactive;
d337f35e 10151+
c2806d43
AM
10152+extern atomic_t nx_global_ctotal;
10153+extern atomic_t nx_global_cactive;
d337f35e 10154+
c2806d43
AM
10155+extern atomic_t vs_global_nsproxy;
10156+extern atomic_t vs_global_fs;
10157+extern atomic_t vs_global_mnt_ns;
10158+extern atomic_t vs_global_uts_ns;
10159+extern atomic_t vs_global_user_ns;
10160+extern atomic_t vs_global_pid_ns;
d337f35e 10161+
d337f35e 10162+
c2806d43
AM
10163+#endif /* _VSERVER_GLOBAL_H */
10164diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/history.h linux-4.4/include/linux/vserver/history.h
10165--- linux-4.4/include/linux/vserver/history.h 1970-01-01 01:00:00.000000000 +0100
10166+++ linux-4.4/include/linux/vserver/history.h 2021-02-24 16:56:24.586156094 +0100
10167@@ -0,0 +1,197 @@
10168+#ifndef _VSERVER_HISTORY_H
10169+#define _VSERVER_HISTORY_H
d337f35e 10170+
d337f35e 10171+
c2806d43
AM
10172+enum {
10173+ VXH_UNUSED = 0,
10174+ VXH_THROW_OOPS = 1,
d337f35e 10175+
c2806d43
AM
10176+ VXH_GET_VX_INFO,
10177+ VXH_PUT_VX_INFO,
10178+ VXH_INIT_VX_INFO,
10179+ VXH_SET_VX_INFO,
10180+ VXH_CLR_VX_INFO,
10181+ VXH_CLAIM_VX_INFO,
10182+ VXH_RELEASE_VX_INFO,
10183+ VXH_ALLOC_VX_INFO,
10184+ VXH_DEALLOC_VX_INFO,
10185+ VXH_HASH_VX_INFO,
10186+ VXH_UNHASH_VX_INFO,
10187+ VXH_LOC_VX_INFO,
10188+ VXH_LOOKUP_VX_INFO,
10189+ VXH_CREATE_VX_INFO,
10190+};
d337f35e 10191+
c2806d43
AM
10192+struct _vxhe_vxi {
10193+ struct vx_info *ptr;
10194+ unsigned xid;
10195+ unsigned usecnt;
10196+ unsigned tasks;
10197+};
d337f35e 10198+
c2806d43
AM
10199+struct _vxhe_set_clr {
10200+ void *data;
10201+};
d337f35e 10202+
c2806d43
AM
10203+struct _vxhe_loc_lookup {
10204+ unsigned arg;
10205+};
d337f35e 10206+
c2806d43
AM
10207+struct _vx_hist_entry {
10208+ void *loc;
10209+ unsigned short seq;
10210+ unsigned short type;
10211+ struct _vxhe_vxi vxi;
10212+ union {
10213+ struct _vxhe_set_clr sc;
10214+ struct _vxhe_loc_lookup ll;
10215+ };
10216+};
d337f35e 10217+
c2806d43 10218+#ifdef CONFIG_VSERVER_HISTORY
d337f35e 10219+
c2806d43 10220+extern unsigned volatile int vxh_active;
d337f35e 10221+
c2806d43 10222+struct _vx_hist_entry *vxh_advance(void *loc);
d337f35e 10223+
d337f35e 10224+
c2806d43
AM
10225+static inline
10226+void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
10227+{
10228+ entry->vxi.ptr = vxi;
10229+ if (vxi) {
10230+ entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
10231+ entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
10232+ entry->vxi.xid = vxi->vx_id;
10233+ }
10234+}
d337f35e 10235+
d337f35e 10236+
c2806d43 10237+#define __HERE__ current_text_addr()
d337f35e 10238+
c2806d43
AM
10239+#define __VXH_BODY(__type, __data, __here) \
10240+ struct _vx_hist_entry *entry; \
10241+ \
10242+ preempt_disable(); \
10243+ entry = vxh_advance(__here); \
10244+ __data; \
10245+ entry->type = __type; \
10246+ preempt_enable();
d337f35e 10247+
d337f35e 10248+
c2806d43 10249+ /* pass vxi only */
d337f35e 10250+
c2806d43
AM
10251+#define __VXH_SMPL \
10252+ __vxh_copy_vxi(entry, vxi)
d337f35e 10253+
c2806d43
AM
10254+static inline
10255+void __vxh_smpl(struct vx_info *vxi, int __type, void *__here)
10256+{
10257+ __VXH_BODY(__type, __VXH_SMPL, __here)
10258+}
d337f35e 10259+
c2806d43 10260+ /* pass vxi and data (void *) */
d337f35e 10261+
c2806d43
AM
10262+#define __VXH_DATA \
10263+ __vxh_copy_vxi(entry, vxi); \
10264+ entry->sc.data = data
d337f35e 10265+
c2806d43
AM
10266+static inline
10267+void __vxh_data(struct vx_info *vxi, void *data,
10268+ int __type, void *__here)
10269+{
10270+ __VXH_BODY(__type, __VXH_DATA, __here)
10271+}
d337f35e 10272+
c2806d43 10273+ /* pass vxi and arg (long) */
d337f35e 10274+
c2806d43
AM
10275+#define __VXH_LONG \
10276+ __vxh_copy_vxi(entry, vxi); \
10277+ entry->ll.arg = arg
d337f35e 10278+
c2806d43
AM
10279+static inline
10280+void __vxh_long(struct vx_info *vxi, long arg,
10281+ int __type, void *__here)
10282+{
10283+ __VXH_BODY(__type, __VXH_LONG, __here)
10284+}
d337f35e 10285+
d337f35e 10286+
c2806d43
AM
10287+static inline
10288+void __vxh_throw_oops(void *__here)
10289+{
10290+ __VXH_BODY(VXH_THROW_OOPS, {}, __here);
10291+ /* prevent further acquisition */
10292+ vxh_active = 0;
10293+}
d337f35e 10294+
2380c486 10295+
c2806d43 10296+#define vxh_throw_oops() __vxh_throw_oops(__HERE__);
d337f35e 10297+
c2806d43
AM
10298+#define __vxh_get_vx_info(v, h) __vxh_smpl(v, VXH_GET_VX_INFO, h);
10299+#define __vxh_put_vx_info(v, h) __vxh_smpl(v, VXH_PUT_VX_INFO, h);
d337f35e 10300+
c2806d43
AM
10301+#define __vxh_init_vx_info(v, d, h) \
10302+ __vxh_data(v, d, VXH_INIT_VX_INFO, h);
10303+#define __vxh_set_vx_info(v, d, h) \
10304+ __vxh_data(v, d, VXH_SET_VX_INFO, h);
10305+#define __vxh_clr_vx_info(v, d, h) \
10306+ __vxh_data(v, d, VXH_CLR_VX_INFO, h);
d337f35e 10307+
c2806d43
AM
10308+#define __vxh_claim_vx_info(v, d, h) \
10309+ __vxh_data(v, d, VXH_CLAIM_VX_INFO, h);
10310+#define __vxh_release_vx_info(v, d, h) \
10311+ __vxh_data(v, d, VXH_RELEASE_VX_INFO, h);
d337f35e 10312+
c2806d43
AM
10313+#define vxh_alloc_vx_info(v) \
10314+ __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
10315+#define vxh_dealloc_vx_info(v) \
10316+ __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
3bac966d 10317+
c2806d43
AM
10318+#define vxh_hash_vx_info(v) \
10319+ __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
10320+#define vxh_unhash_vx_info(v) \
10321+ __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
d337f35e 10322+
c2806d43
AM
10323+#define vxh_loc_vx_info(v, l) \
10324+ __vxh_long(v, l, VXH_LOC_VX_INFO, __HERE__);
10325+#define vxh_lookup_vx_info(v, l) \
10326+ __vxh_long(v, l, VXH_LOOKUP_VX_INFO, __HERE__);
10327+#define vxh_create_vx_info(v, l) \
10328+ __vxh_long(v, l, VXH_CREATE_VX_INFO, __HERE__);
d337f35e 10329+
c2806d43 10330+extern void vxh_dump_history(void);
d337f35e
JR
10331+
10332+
c2806d43 10333+#else /* CONFIG_VSERVER_HISTORY */
d337f35e 10334+
c2806d43 10335+#define __HERE__ 0
d337f35e 10336+
c2806d43 10337+#define vxh_throw_oops() do { } while (0)
d337f35e 10338+
c2806d43
AM
10339+#define __vxh_get_vx_info(v, h) do { } while (0)
10340+#define __vxh_put_vx_info(v, h) do { } while (0)
d337f35e 10341+
c2806d43
AM
10342+#define __vxh_init_vx_info(v, d, h) do { } while (0)
10343+#define __vxh_set_vx_info(v, d, h) do { } while (0)
10344+#define __vxh_clr_vx_info(v, d, h) do { } while (0)
d337f35e 10345+
c2806d43
AM
10346+#define __vxh_claim_vx_info(v, d, h) do { } while (0)
10347+#define __vxh_release_vx_info(v, d, h) do { } while (0)
d337f35e 10348+
c2806d43
AM
10349+#define vxh_alloc_vx_info(v) do { } while (0)
10350+#define vxh_dealloc_vx_info(v) do { } while (0)
d337f35e 10351+
c2806d43
AM
10352+#define vxh_hash_vx_info(v) do { } while (0)
10353+#define vxh_unhash_vx_info(v) do { } while (0)
d337f35e 10354+
c2806d43
AM
10355+#define vxh_loc_vx_info(v, l) do { } while (0)
10356+#define vxh_lookup_vx_info(v, l) do { } while (0)
10357+#define vxh_create_vx_info(v, l) do { } while (0)
d337f35e 10358+
c2806d43 10359+#define vxh_dump_history() do { } while (0)
d337f35e
JR
10360+
10361+
c2806d43 10362+#endif /* CONFIG_VSERVER_HISTORY */
d337f35e 10363+
c2806d43
AM
10364+#endif /* _VSERVER_HISTORY_H */
10365diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/inode.h linux-4.4/include/linux/vserver/inode.h
10366--- linux-4.4/include/linux/vserver/inode.h 1970-01-01 01:00:00.000000000 +0100
10367+++ linux-4.4/include/linux/vserver/inode.h 2021-02-24 16:56:24.586156094 +0100
10368@@ -0,0 +1,19 @@
10369+#ifndef _VSERVER_INODE_H
10370+#define _VSERVER_INODE_H
d337f35e 10371+
c2806d43 10372+#include <uapi/vserver/inode.h>
d337f35e 10373+
d337f35e 10374+
c2806d43
AM
10375+#ifdef CONFIG_VSERVER_PROC_SECURE
10376+#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE )
10377+#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
10378+#else
10379+#define IATTR_PROC_DEFAULT ( IATTR_ADMIN )
10380+#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
10381+#endif
d337f35e 10382+
c2806d43 10383+#define vx_hide_check(c, m) (((m) & IATTR_HIDE) ? vx_check(c, m) : 1)
d337f35e 10384+
c2806d43
AM
10385+#else /* _VSERVER_INODE_H */
10386+#warning duplicate inclusion
10387+#endif /* _VSERVER_INODE_H */
10388diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/inode_cmd.h linux-4.4/include/linux/vserver/inode_cmd.h
10389--- linux-4.4/include/linux/vserver/inode_cmd.h 1970-01-01 01:00:00.000000000 +0100
10390+++ linux-4.4/include/linux/vserver/inode_cmd.h 2021-02-24 16:56:24.586156094 +0100
10391@@ -0,0 +1,36 @@
10392+#ifndef _VSERVER_INODE_CMD_H
10393+#define _VSERVER_INODE_CMD_H
d337f35e 10394+
c2806d43 10395+#include <uapi/vserver/inode_cmd.h>
d337f35e 10396+
d337f35e 10397+
d337f35e 10398+
c2806d43 10399+#ifdef CONFIG_COMPAT
d337f35e 10400+
c2806d43 10401+#include <asm/compat.h>
d337f35e 10402+
c2806d43
AM
10403+struct vcmd_ctx_iattr_v1_x32 {
10404+ compat_uptr_t name_ptr;
10405+ uint32_t tag;
10406+ uint32_t flags;
10407+ uint32_t mask;
10408+};
d337f35e 10409+
c2806d43 10410+#endif /* CONFIG_COMPAT */
d337f35e 10411+
c2806d43 10412+#include <linux/compiler.h>
d337f35e 10413+
c2806d43
AM
10414+extern int vc_get_iattr(void __user *);
10415+extern int vc_set_iattr(void __user *);
d337f35e 10416+
c2806d43
AM
10417+extern int vc_fget_iattr(uint32_t, void __user *);
10418+extern int vc_fset_iattr(uint32_t, void __user *);
d337f35e 10419+
c2806d43 10420+#ifdef CONFIG_COMPAT
d337f35e 10421+
c2806d43
AM
10422+extern int vc_get_iattr_x32(void __user *);
10423+extern int vc_set_iattr_x32(void __user *);
d337f35e 10424+
c2806d43 10425+#endif /* CONFIG_COMPAT */
d337f35e 10426+
c2806d43
AM
10427+#endif /* _VSERVER_INODE_CMD_H */
10428diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/limit.h linux-4.4/include/linux/vserver/limit.h
10429--- linux-4.4/include/linux/vserver/limit.h 1970-01-01 01:00:00.000000000 +0100
10430+++ linux-4.4/include/linux/vserver/limit.h 2021-02-24 16:56:24.586156094 +0100
10431@@ -0,0 +1,67 @@
10432+#ifndef _VSERVER_LIMIT_H
10433+#define _VSERVER_LIMIT_H
d337f35e 10434+
c2806d43 10435+#include <uapi/vserver/limit.h>
d337f35e 10436+
d337f35e 10437+
c2806d43 10438+#define VLIM_NOCHECK ((1L << VLIMIT_DENTRY) | (1L << RLIMIT_RSS))
d337f35e 10439+
c2806d43 10440+/* keep in sync with CRLIM_INFINITY */
d337f35e 10441+
c2806d43 10442+#define VLIM_INFINITY (~0ULL)
d337f35e 10443+
c2806d43
AM
10444+#include <asm/atomic.h>
10445+#include <asm/resource.h>
d337f35e 10446+
c2806d43
AM
10447+#ifndef RLIM_INFINITY
10448+#warning RLIM_INFINITY is undefined
10449+#endif
d337f35e 10450+
c2806d43 10451+#define __rlim_val(l, r, v) ((l)->res[r].v)
d337f35e 10452+
c2806d43
AM
10453+#define __rlim_soft(l, r) __rlim_val(l, r, soft)
10454+#define __rlim_hard(l, r) __rlim_val(l, r, hard)
d337f35e 10455+
c2806d43
AM
10456+#define __rlim_rcur(l, r) __rlim_val(l, r, rcur)
10457+#define __rlim_rmin(l, r) __rlim_val(l, r, rmin)
10458+#define __rlim_rmax(l, r) __rlim_val(l, r, rmax)
d337f35e 10459+
c2806d43
AM
10460+#define __rlim_lhit(l, r) __rlim_val(l, r, lhit)
10461+#define __rlim_hit(l, r) atomic_inc(&__rlim_lhit(l, r))
d337f35e 10462+
c2806d43
AM
10463+typedef atomic_long_t rlim_atomic_t;
10464+typedef unsigned long rlim_t;
d337f35e 10465+
c2806d43
AM
10466+#define __rlim_get(l, r) atomic_long_read(&__rlim_rcur(l, r))
10467+#define __rlim_set(l, r, v) atomic_long_set(&__rlim_rcur(l, r), v)
10468+#define __rlim_inc(l, r) atomic_long_inc(&__rlim_rcur(l, r))
10469+#define __rlim_dec(l, r) atomic_long_dec(&__rlim_rcur(l, r))
10470+#define __rlim_add(l, r, v) atomic_long_add(v, &__rlim_rcur(l, r))
10471+#define __rlim_sub(l, r, v) atomic_long_sub(v, &__rlim_rcur(l, r))
d337f35e 10472+
d337f35e 10473+
c2806d43
AM
10474+#if (RLIM_INFINITY == VLIM_INFINITY)
10475+#define VX_VLIM(r) ((long long)(long)(r))
10476+#define VX_RLIM(v) ((rlim_t)(v))
10477+#else
10478+#define VX_VLIM(r) (((r) == RLIM_INFINITY) \
10479+ ? VLIM_INFINITY : (long long)(r))
10480+#define VX_RLIM(v) (((v) == VLIM_INFINITY) \
10481+ ? RLIM_INFINITY : (rlim_t)(v))
10482+#endif
d337f35e 10483+
c2806d43 10484+struct sysinfo;
d337f35e 10485+
c2806d43
AM
10486+#ifdef CONFIG_MEMCG
10487+void vx_vsi_meminfo(struct sysinfo *);
10488+void vx_vsi_swapinfo(struct sysinfo *);
10489+long vx_vsi_cached(struct sysinfo *);
10490+#else /* !CONFIG_MEMCG */
10491+#define vx_vsi_meminfo(s) do { } while (0)
10492+#define vx_vsi_swapinfo(s) do { } while (0)
10493+#define vx_vsi_cached(s) (0L)
10494+#endif /* !CONFIG_MEMCG */
d337f35e 10495+
c2806d43 10496+#define NUM_LIMITS 24
d337f35e 10497+
c2806d43
AM
10498+#endif /* _VSERVER_LIMIT_H */
10499diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/limit_cmd.h linux-4.4/include/linux/vserver/limit_cmd.h
10500--- linux-4.4/include/linux/vserver/limit_cmd.h 1970-01-01 01:00:00.000000000 +0100
10501+++ linux-4.4/include/linux/vserver/limit_cmd.h 2021-02-24 16:56:24.586156094 +0100
10502@@ -0,0 +1,35 @@
10503+#ifndef _VSERVER_LIMIT_CMD_H
10504+#define _VSERVER_LIMIT_CMD_H
adc1caaa 10505+
c2806d43 10506+#include <uapi/vserver/limit_cmd.h>
d337f35e 10507+
d337f35e 10508+
c2806d43 10509+#ifdef CONFIG_IA32_EMULATION
d337f35e 10510+
c2806d43
AM
10511+struct vcmd_ctx_rlimit_v0_x32 {
10512+ uint32_t id;
10513+ uint64_t minimum;
10514+ uint64_t softlimit;
10515+ uint64_t maximum;
10516+} __attribute__ ((packed));
d337f35e 10517+
c2806d43 10518+#endif /* CONFIG_IA32_EMULATION */
d337f35e 10519+
c2806d43 10520+#include <linux/compiler.h>
d337f35e 10521+
c2806d43
AM
10522+extern int vc_get_rlimit_mask(uint32_t, void __user *);
10523+extern int vc_get_rlimit(struct vx_info *, void __user *);
10524+extern int vc_set_rlimit(struct vx_info *, void __user *);
10525+extern int vc_reset_hits(struct vx_info *, void __user *);
10526+extern int vc_reset_minmax(struct vx_info *, void __user *);
d337f35e 10527+
c2806d43 10528+extern int vc_rlimit_stat(struct vx_info *, void __user *);
d337f35e 10529+
c2806d43 10530+#ifdef CONFIG_IA32_EMULATION
2380c486 10531+
c2806d43
AM
10532+extern int vc_get_rlimit_x32(struct vx_info *, void __user *);
10533+extern int vc_set_rlimit_x32(struct vx_info *, void __user *);
adc1caaa 10534+
c2806d43 10535+#endif /* CONFIG_IA32_EMULATION */
adc1caaa 10536+
c2806d43
AM
10537+#endif /* _VSERVER_LIMIT_CMD_H */
10538diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/limit_def.h linux-4.4/include/linux/vserver/limit_def.h
10539--- linux-4.4/include/linux/vserver/limit_def.h 1970-01-01 01:00:00.000000000 +0100
10540+++ linux-4.4/include/linux/vserver/limit_def.h 2021-02-24 16:56:24.586156094 +0100
10541@@ -0,0 +1,47 @@
10542+#ifndef _VSERVER_LIMIT_DEF_H
10543+#define _VSERVER_LIMIT_DEF_H
10544+
10545+#include <asm/atomic.h>
10546+#include <asm/resource.h>
10547+
10548+#include "limit.h"
10549+
10550+
10551+struct _vx_res_limit {
10552+ rlim_t soft; /* Context soft limit */
10553+ rlim_t hard; /* Context hard limit */
10554+
10555+ rlim_atomic_t rcur; /* Current value */
10556+ rlim_t rmin; /* Context minimum */
10557+ rlim_t rmax; /* Context maximum */
10558+
10559+ atomic_t lhit; /* Limit hits */
10560+};
10561+
10562+/* context sub struct */
10563+
10564+struct _vx_limit {
10565+ struct _vx_res_limit res[NUM_LIMITS];
10566+};
10567+
10568+#ifdef CONFIG_VSERVER_DEBUG
10569+
10570+static inline void __dump_vx_limit(struct _vx_limit *limit)
3bac966d 10571+{
c2806d43 10572+ int i;
d337f35e 10573+
c2806d43
AM
10574+ printk("\t_vx_limit:");
10575+ for (i = 0; i < NUM_LIMITS; i++) {
10576+ printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
10577+ i, (unsigned long)__rlim_get(limit, i),
10578+ (unsigned long)__rlim_rmin(limit, i),
10579+ (unsigned long)__rlim_rmax(limit, i),
10580+ (long)__rlim_soft(limit, i),
10581+ (long)__rlim_hard(limit, i),
10582+ atomic_read(&__rlim_lhit(limit, i)));
d33d7b00 10583+ }
d33d7b00 10584+}
d337f35e 10585+
c2806d43 10586+#endif
d337f35e 10587+
c2806d43
AM
10588+#endif /* _VSERVER_LIMIT_DEF_H */
10589diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/limit_int.h linux-4.4/include/linux/vserver/limit_int.h
10590--- linux-4.4/include/linux/vserver/limit_int.h 1970-01-01 01:00:00.000000000 +0100
10591+++ linux-4.4/include/linux/vserver/limit_int.h 2021-02-24 16:56:24.586156094 +0100
10592@@ -0,0 +1,193 @@
10593+#ifndef _VSERVER_LIMIT_INT_H
10594+#define _VSERVER_LIMIT_INT_H
3cc86a71 10595+
c2806d43
AM
10596+#define VXD_RCRES_COND(r) VXD_CBIT(cres, r)
10597+#define VXD_RLIMIT_COND(r) VXD_CBIT(limit, r)
3cc86a71 10598+
c2806d43
AM
10599+extern const char *vlimit_name[NUM_LIMITS];
10600+
10601+static inline void __vx_acc_cres(struct vx_info *vxi,
10602+ int res, int dir, void *_data, char *_file, int _line)
10603+{
10604+ if (VXD_RCRES_COND(res))
10605+ vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
10606+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10607+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10608+ (dir > 0) ? "++" : "--", _data, _file, _line);
10609+ if (!vxi)
10610+ return;
10611+
10612+ if (dir > 0)
10613+ __rlim_inc(&vxi->limit, res);
10614+ else
10615+ __rlim_dec(&vxi->limit, res);
3bac966d 10616+}
d337f35e 10617+
c2806d43
AM
10618+static inline void __vx_add_cres(struct vx_info *vxi,
10619+ int res, int amount, void *_data, char *_file, int _line)
3bac966d 10620+{
c2806d43
AM
10621+ if (VXD_RCRES_COND(res))
10622+ vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
10623+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10624+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10625+ amount, _data, _file, _line);
10626+ if (amount == 0)
10627+ return;
10628+ if (!vxi)
10629+ return;
10630+ __rlim_add(&vxi->limit, res, amount);
3bac966d 10631+}
d337f35e 10632+
3bac966d 10633+static inline
c2806d43 10634+int __vx_cres_adjust_max(struct _vx_limit *limit, int res, rlim_t value)
3bac966d 10635+{
c2806d43 10636+ int cond = (value > __rlim_rmax(limit, res));
d337f35e 10637+
c2806d43
AM
10638+ if (cond)
10639+ __rlim_rmax(limit, res) = value;
10640+ return cond;
3cc86a71 10641+}
2380c486 10642+
3cc86a71 10643+static inline
c2806d43 10644+int __vx_cres_adjust_min(struct _vx_limit *limit, int res, rlim_t value)
3bac966d 10645+{
c2806d43 10646+ int cond = (value < __rlim_rmin(limit, res));
d337f35e 10647+
c2806d43
AM
10648+ if (cond)
10649+ __rlim_rmin(limit, res) = value;
10650+ return cond;
3cc86a71 10651+}
d337f35e 10652+
3bac966d 10653+static inline
c2806d43 10654+void __vx_cres_fixup(struct _vx_limit *limit, int res, rlim_t value)
3bac966d 10655+{
c2806d43
AM
10656+ if (!__vx_cres_adjust_max(limit, res, value))
10657+ __vx_cres_adjust_min(limit, res, value);
3bac966d 10658+}
d337f35e 10659+
3cc86a71 10660+
c2806d43
AM
10661+/* return values:
10662+ +1 ... no limit hit
10663+ -1 ... over soft limit
10664+ 0 ... over hard limit */
3cc86a71 10665+
c2806d43
AM
10666+static inline int __vx_cres_avail(struct vx_info *vxi,
10667+ int res, int num, char *_file, int _line)
3bac966d 10668+{
c2806d43
AM
10669+ struct _vx_limit *limit;
10670+ rlim_t value;
d337f35e 10671+
c2806d43
AM
10672+ if (VXD_RLIMIT_COND(res))
10673+ vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
10674+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10675+ (vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
10676+ (vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
10677+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10678+ num, _file, _line);
10679+ if (!vxi)
3cc86a71 10680+ return 1;
d33d7b00 10681+
c2806d43
AM
10682+ limit = &vxi->limit;
10683+ value = __rlim_get(limit, res);
d33d7b00 10684+
c2806d43
AM
10685+ if (!__vx_cres_adjust_max(limit, res, value))
10686+ __vx_cres_adjust_min(limit, res, value);
10687+
10688+ if (num == 0)
3cc86a71 10689+ return 1;
d337f35e 10690+
c2806d43
AM
10691+ if (__rlim_soft(limit, res) == RLIM_INFINITY)
10692+ return -1;
10693+ if (value + num <= __rlim_soft(limit, res))
10694+ return -1;
d33d7b00 10695+
c2806d43 10696+ if (__rlim_hard(limit, res) == RLIM_INFINITY)
3bac966d 10697+ return 1;
c2806d43 10698+ if (value + num <= __rlim_hard(limit, res))
3bac966d 10699+ return 1;
c2806d43
AM
10700+
10701+ __rlim_hit(limit, res);
3cc86a71
AM
10702+ return 0;
10703+}
d337f35e
JR
10704+
10705+
c2806d43 10706+static const int VLA_RSS[] = { RLIMIT_RSS, VLIMIT_ANON, VLIMIT_MAPPED, 0 };
d33d7b00 10707+
3cc86a71 10708+static inline
c2806d43 10709+rlim_t __vx_cres_array_sum(struct _vx_limit *limit, const int *array)
3cc86a71 10710+{
c2806d43
AM
10711+ rlim_t value, sum = 0;
10712+ int res;
3cc86a71 10713+
c2806d43
AM
10714+ while ((res = *array++)) {
10715+ value = __rlim_get(limit, res);
10716+ __vx_cres_fixup(limit, res, value);
10717+ sum += value;
3cc86a71 10718+ }
c2806d43 10719+ return sum;
3cc86a71
AM
10720+}
10721+
10722+static inline
c2806d43 10723+rlim_t __vx_cres_array_fixup(struct _vx_limit *limit, const int *array)
3bac966d 10724+{
c2806d43
AM
10725+ rlim_t value = __vx_cres_array_sum(limit, array + 1);
10726+ int res = *array;
d337f35e 10727+
c2806d43
AM
10728+ if (value == __rlim_get(limit, res))
10729+ return value;
d337f35e 10730+
c2806d43
AM
10731+ __rlim_set(limit, res, value);
10732+ /* now adjust min/max */
10733+ if (!__vx_cres_adjust_max(limit, res, value))
10734+ __vx_cres_adjust_min(limit, res, value);
d33d7b00 10735+
c2806d43 10736+ return value;
3cc86a71 10737+}
d33d7b00 10738+
c2806d43
AM
10739+static inline int __vx_cres_array_avail(struct vx_info *vxi,
10740+ const int *array, int num, char *_file, int _line)
3cc86a71 10741+{
c2806d43
AM
10742+ struct _vx_limit *limit;
10743+ rlim_t value = 0;
10744+ int res;
10745+
10746+ if (num == 0)
3cc86a71 10747+ return 1;
c2806d43 10748+ if (!vxi)
3cc86a71 10749+ return 1;
d337f35e 10750+
c2806d43
AM
10751+ limit = &vxi->limit;
10752+ res = *array;
10753+ value = __vx_cres_array_sum(limit, array + 1);
d337f35e 10754+
c2806d43
AM
10755+ __rlim_set(limit, res, value);
10756+ __vx_cres_fixup(limit, res, value);
d337f35e 10757+
c2806d43 10758+ return __vx_cres_avail(vxi, res, num, _file, _line);
3cc86a71 10759+}
d337f35e 10760+
d337f35e 10761+
c2806d43 10762+static inline void vx_limit_fixup(struct _vx_limit *limit, int id)
3cc86a71 10763+{
c2806d43
AM
10764+ rlim_t value;
10765+ int res;
10766+
10767+ /* complex resources first */
10768+ if ((id < 0) || (id == RLIMIT_RSS))
10769+ __vx_cres_array_fixup(limit, VLA_RSS);
10770+
10771+ for (res = 0; res < NUM_LIMITS; res++) {
10772+ if ((id > 0) && (res != id))
10773+ continue;
10774+
10775+ value = __rlim_get(limit, res);
10776+ __vx_cres_fixup(limit, res, value);
10777+
10778+ /* not supposed to happen, maybe warn? */
10779+ if (__rlim_rmax(limit, res) > __rlim_hard(limit, res))
10780+ __rlim_rmax(limit, res) = __rlim_hard(limit, res);
10781+ }
3cc86a71 10782+}
d337f35e 10783+
d337f35e 10784+
c2806d43
AM
10785+#endif /* _VSERVER_LIMIT_INT_H */
10786diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/monitor.h linux-4.4/include/linux/vserver/monitor.h
10787--- linux-4.4/include/linux/vserver/monitor.h 1970-01-01 01:00:00.000000000 +0100
10788+++ linux-4.4/include/linux/vserver/monitor.h 2021-02-24 16:56:24.586156094 +0100
10789@@ -0,0 +1,6 @@
10790+#ifndef _VSERVER_MONITOR_H
10791+#define _VSERVER_MONITOR_H
d337f35e 10792+
c2806d43 10793+#include <uapi/vserver/monitor.h>
d337f35e 10794+
c2806d43
AM
10795+#endif /* _VSERVER_MONITOR_H */
10796diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/network.h linux-4.4/include/linux/vserver/network.h
10797--- linux-4.4/include/linux/vserver/network.h 1970-01-01 01:00:00.000000000 +0100
10798+++ linux-4.4/include/linux/vserver/network.h 2021-02-24 16:56:24.589489533 +0100
10799@@ -0,0 +1,76 @@
10800+#ifndef _VSERVER_NETWORK_H
10801+#define _VSERVER_NETWORK_H
d337f35e
JR
10802+
10803+
c2806d43
AM
10804+#include <linux/list.h>
10805+#include <linux/spinlock.h>
10806+#include <linux/rcupdate.h>
10807+#include <linux/in.h>
10808+#include <linux/in6.h>
10809+#include <asm/atomic.h>
10810+#include <uapi/vserver/network.h>
d337f35e 10811+
c2806d43
AM
10812+struct nx_addr_v4 {
10813+ struct nx_addr_v4 *next;
10814+ struct in_addr ip[2];
10815+ struct in_addr mask;
10816+ uint16_t type;
10817+ uint16_t flags;
10818+};
d337f35e 10819+
c2806d43
AM
10820+struct nx_addr_v6 {
10821+ struct nx_addr_v6 *next;
10822+ struct in6_addr ip;
10823+ struct in6_addr mask;
10824+ uint32_t prefix;
10825+ uint16_t type;
10826+ uint16_t flags;
10827+};
d337f35e 10828+
c2806d43
AM
10829+struct nx_info {
10830+ struct hlist_node nx_hlist; /* linked list of nxinfos */
10831+ vnid_t nx_id; /* vnet id */
10832+ atomic_t nx_usecnt; /* usage count */
10833+ atomic_t nx_tasks; /* tasks count */
10834+ int nx_state; /* context state */
d337f35e 10835+
c2806d43
AM
10836+ uint64_t nx_flags; /* network flag word */
10837+ uint64_t nx_ncaps; /* network capabilities */
d337f35e 10838+
c2806d43
AM
10839+ spinlock_t addr_lock; /* protect address changes */
10840+ struct in_addr v4_lback; /* Loopback address */
10841+ struct in_addr v4_bcast; /* Broadcast address */
10842+ struct nx_addr_v4 v4; /* First/Single ipv4 address */
10843+#ifdef CONFIG_IPV6
10844+ struct nx_addr_v6 v6; /* First/Single ipv6 address */
10845+#endif
10846+ char nx_name[65]; /* network context name */
10847+};
d337f35e 10848+
d337f35e 10849+
c2806d43 10850+/* status flags */
d337f35e 10851+
c2806d43
AM
10852+#define NXS_HASHED 0x0001
10853+#define NXS_SHUTDOWN 0x0100
10854+#define NXS_RELEASED 0x8000
d337f35e 10855+
c2806d43 10856+extern struct nx_info *lookup_nx_info(int);
d337f35e 10857+
c2806d43
AM
10858+extern int get_nid_list(int, unsigned int *, int);
10859+extern int nid_is_hashed(vnid_t);
d337f35e 10860+
c2806d43 10861+extern int nx_migrate_task(struct task_struct *, struct nx_info *);
d337f35e 10862+
c2806d43 10863+extern long vs_net_change(struct nx_info *, unsigned int);
d337f35e 10864+
c2806d43 10865+struct sock;
d337f35e 10866+
d337f35e 10867+
c2806d43
AM
10868+#define NX_IPV4(n) ((n)->v4.type != NXA_TYPE_NONE)
10869+#ifdef CONFIG_IPV6
10870+#define NX_IPV6(n) ((n)->v6.type != NXA_TYPE_NONE)
10871+#else
10872+#define NX_IPV6(n) (0)
10873+#endif
d337f35e 10874+
c2806d43
AM
10875+#endif /* _VSERVER_NETWORK_H */
10876diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/network_cmd.h linux-4.4/include/linux/vserver/network_cmd.h
10877--- linux-4.4/include/linux/vserver/network_cmd.h 1970-01-01 01:00:00.000000000 +0100
10878+++ linux-4.4/include/linux/vserver/network_cmd.h 2021-02-24 16:56:24.589489533 +0100
10879@@ -0,0 +1,37 @@
10880+#ifndef _VSERVER_NETWORK_CMD_H
10881+#define _VSERVER_NETWORK_CMD_H
d337f35e 10882+
c2806d43 10883+#include <uapi/vserver/network_cmd.h>
d337f35e 10884+
c2806d43 10885+extern int vc_task_nid(uint32_t);
d337f35e 10886+
c2806d43 10887+extern int vc_nx_info(struct nx_info *, void __user *);
d337f35e 10888+
c2806d43
AM
10889+extern int vc_net_create(uint32_t, void __user *);
10890+extern int vc_net_migrate(struct nx_info *, void __user *);
d33d7b00 10891+
c2806d43
AM
10892+extern int vc_net_add(struct nx_info *, void __user *);
10893+extern int vc_net_remove(struct nx_info *, void __user *);
d337f35e 10894+
c2806d43
AM
10895+extern int vc_net_add_ipv4_v1(struct nx_info *, void __user *);
10896+extern int vc_net_add_ipv4(struct nx_info *, void __user *);
d337f35e 10897+
c2806d43
AM
10898+extern int vc_net_rem_ipv4_v1(struct nx_info *, void __user *);
10899+extern int vc_net_rem_ipv4(struct nx_info *, void __user *);
d337f35e 10900+
c2806d43
AM
10901+extern int vc_net_add_ipv6(struct nx_info *, void __user *);
10902+extern int vc_net_remove_ipv6(struct nx_info *, void __user *);
d337f35e 10903+
c2806d43
AM
10904+extern int vc_add_match_ipv4(struct nx_info *, void __user *);
10905+extern int vc_get_match_ipv4(struct nx_info *, void __user *);
d337f35e 10906+
c2806d43
AM
10907+extern int vc_add_match_ipv6(struct nx_info *, void __user *);
10908+extern int vc_get_match_ipv6(struct nx_info *, void __user *);
9795bf04 10909+
c2806d43
AM
10910+extern int vc_get_nflags(struct nx_info *, void __user *);
10911+extern int vc_set_nflags(struct nx_info *, void __user *);
d337f35e 10912+
c2806d43
AM
10913+extern int vc_get_ncaps(struct nx_info *, void __user *);
10914+extern int vc_set_ncaps(struct nx_info *, void __user *);
d337f35e 10915+
c2806d43
AM
10916+#endif /* _VSERVER_CONTEXT_CMD_H */
10917diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/percpu.h linux-4.4/include/linux/vserver/percpu.h
10918--- linux-4.4/include/linux/vserver/percpu.h 1970-01-01 01:00:00.000000000 +0100
10919+++ linux-4.4/include/linux/vserver/percpu.h 2021-02-24 16:56:24.589489533 +0100
10920@@ -0,0 +1,14 @@
10921+#ifndef _VSERVER_PERCPU_H
10922+#define _VSERVER_PERCPU_H
d337f35e 10923+
c2806d43
AM
10924+#include "cvirt_def.h"
10925+#include "sched_def.h"
d337f35e 10926+
c2806d43
AM
10927+struct _vx_percpu {
10928+ struct _vx_cvirt_pc cvirt;
10929+ struct _vx_sched_pc sched;
10930+};
d337f35e 10931+
c2806d43 10932+#define PERCPU_PERCTX (sizeof(struct _vx_percpu))
d337f35e 10933+
c2806d43
AM
10934+#endif /* _VSERVER_PERCPU_H */
10935diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/pid.h linux-4.4/include/linux/vserver/pid.h
10936--- linux-4.4/include/linux/vserver/pid.h 1970-01-01 01:00:00.000000000 +0100
10937+++ linux-4.4/include/linux/vserver/pid.h 2021-02-24 16:56:24.589489533 +0100
10938@@ -0,0 +1,51 @@
10939+#ifndef _VSERVER_PID_H
10940+#define _VSERVER_PID_H
d337f35e 10941+
c2806d43 10942+/* pid faking stuff */
d337f35e 10943+
c2806d43
AM
10944+#define vx_info_map_pid(v, p) \
10945+ __vx_info_map_pid((v), (p), __func__, __FILE__, __LINE__)
10946+#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p)
10947+#define vx_map_pid(p) vx_info_map_pid(current_vx_info(), p)
10948+#define vx_map_tgid(p) vx_map_pid(p)
d337f35e 10949+
c2806d43
AM
10950+static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
10951+ const char *func, const char *file, int line)
10952+{
10953+ if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
10954+ vxfprintk(VXD_CBIT(cvirt, 2),
10955+ "vx_map_tgid: %p/%llx: %d -> %d",
10956+ vxi, (long long)vxi->vx_flags, pid,
10957+ (pid && pid == vxi->vx_initpid) ? 1 : pid,
10958+ func, file, line);
10959+ if (pid == 0)
10960+ return 0;
10961+ if (pid == vxi->vx_initpid)
10962+ return 1;
10963+ }
10964+ return pid;
10965+}
10966+
10967+#define vx_info_rmap_pid(v, p) \
10968+ __vx_info_rmap_pid((v), (p), __func__, __FILE__, __LINE__)
10969+#define vx_rmap_pid(p) vx_info_rmap_pid(current_vx_info(), p)
10970+#define vx_rmap_tgid(p) vx_rmap_pid(p)
d337f35e 10971+
c2806d43
AM
10972+static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
10973+ const char *func, const char *file, int line)
10974+{
10975+ if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
10976+ vxfprintk(VXD_CBIT(cvirt, 2),
10977+ "vx_rmap_tgid: %p/%llx: %d -> %d",
10978+ vxi, (long long)vxi->vx_flags, pid,
10979+ (pid == 1) ? vxi->vx_initpid : pid,
10980+ func, file, line);
10981+ if ((pid == 1) && vxi->vx_initpid)
10982+ return vxi->vx_initpid;
10983+ if (pid == vxi->vx_initpid)
10984+ return ~0U;
10985+ }
10986+ return pid;
10987+}
d337f35e 10988+
c2806d43
AM
10989+#endif
10990diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/sched.h linux-4.4/include/linux/vserver/sched.h
10991--- linux-4.4/include/linux/vserver/sched.h 1970-01-01 01:00:00.000000000 +0100
10992+++ linux-4.4/include/linux/vserver/sched.h 2021-02-24 16:56:24.589489533 +0100
10993@@ -0,0 +1,23 @@
10994+#ifndef _VSERVER_SCHED_H
10995+#define _VSERVER_SCHED_H
d337f35e
JR
10996+
10997+
c2806d43 10998+#ifdef __KERNEL__
d337f35e 10999+
c2806d43 11000+struct timespec;
d337f35e 11001+
c2806d43 11002+void vx_vsi_uptime(struct timespec *, struct timespec *);
3cc86a71 11003+
d337f35e 11004+
c2806d43 11005+struct vx_info;
d337f35e 11006+
c2806d43 11007+void vx_update_load(struct vx_info *);
3cc86a71 11008+
3cc86a71 11009+
c2806d43
AM
11010+void vx_update_sched_param(struct _vx_sched *sched,
11011+ struct _vx_sched_pc *sched_pc);
3cc86a71 11012+
c2806d43
AM
11013+#endif /* __KERNEL__ */
11014+#else /* _VSERVER_SCHED_H */
4bf69007 11015+#warning duplicate inclusion
c2806d43
AM
11016+#endif /* _VSERVER_SCHED_H */
11017diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/sched_cmd.h linux-4.4/include/linux/vserver/sched_cmd.h
11018--- linux-4.4/include/linux/vserver/sched_cmd.h 1970-01-01 01:00:00.000000000 +0100
11019+++ linux-4.4/include/linux/vserver/sched_cmd.h 2021-02-24 16:56:24.589489533 +0100
11020@@ -0,0 +1,11 @@
11021+#ifndef _VSERVER_SCHED_CMD_H
11022+#define _VSERVER_SCHED_CMD_H
d337f35e 11023+
d337f35e 11024+
c2806d43
AM
11025+#include <linux/compiler.h>
11026+#include <uapi/vserver/sched_cmd.h>
d33d7b00 11027+
c2806d43
AM
11028+extern int vc_set_prio_bias(struct vx_info *, void __user *);
11029+extern int vc_get_prio_bias(struct vx_info *, void __user *);
d33d7b00 11030+
c2806d43
AM
11031+#endif /* _VSERVER_SCHED_CMD_H */
11032diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/sched_def.h linux-4.4/include/linux/vserver/sched_def.h
11033--- linux-4.4/include/linux/vserver/sched_def.h 1970-01-01 01:00:00.000000000 +0100
11034+++ linux-4.4/include/linux/vserver/sched_def.h 2021-02-24 16:56:24.589489533 +0100
11035@@ -0,0 +1,38 @@
11036+#ifndef _VSERVER_SCHED_DEF_H
11037+#define _VSERVER_SCHED_DEF_H
d337f35e 11038+
c2806d43
AM
11039+#include <linux/spinlock.h>
11040+#include <linux/jiffies.h>
11041+#include <linux/cpumask.h>
11042+#include <asm/atomic.h>
11043+#include <asm/param.h>
d337f35e 11044+
d337f35e 11045+
c2806d43 11046+/* context sub struct */
d337f35e 11047+
c2806d43
AM
11048+struct _vx_sched {
11049+ int prio_bias; /* bias offset for priority */
d337f35e 11050+
c2806d43
AM
11051+ cpumask_t update; /* CPUs which should update */
11052+};
d337f35e 11053+
c2806d43
AM
11054+struct _vx_sched_pc {
11055+ int prio_bias; /* bias offset for priority */
d337f35e 11056+
c2806d43
AM
11057+ uint64_t user_ticks; /* token tick events */
11058+ uint64_t sys_ticks; /* token tick events */
11059+ uint64_t hold_ticks; /* token ticks paused */
11060+};
d337f35e 11061+
3cc86a71 11062+
c2806d43 11063+#ifdef CONFIG_VSERVER_DEBUG
3cc86a71 11064+
c2806d43 11065+static inline void __dump_vx_sched(struct _vx_sched *sched)
4bf69007 11066+{
c2806d43
AM
11067+ printk("\t_vx_sched:\n");
11068+ printk("\t priority = %4d\n", sched->prio_bias);
4bf69007 11069+}
d337f35e 11070+
c2806d43 11071+#endif
4bf69007 11072+
c2806d43
AM
11073+#endif /* _VSERVER_SCHED_DEF_H */
11074diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/signal.h linux-4.4/include/linux/vserver/signal.h
11075--- linux-4.4/include/linux/vserver/signal.h 1970-01-01 01:00:00.000000000 +0100
11076+++ linux-4.4/include/linux/vserver/signal.h 2021-02-24 16:56:24.589489533 +0100
11077@@ -0,0 +1,14 @@
11078+#ifndef _VSERVER_SIGNAL_H
11079+#define _VSERVER_SIGNAL_H
d337f35e 11080+
d337f35e 11081+
c2806d43 11082+#ifdef __KERNEL__
4bf69007 11083+
c2806d43 11084+struct vx_info;
4bf69007 11085+
c2806d43 11086+int vx_info_kill(struct vx_info *, int, int);
d337f35e 11087+
c2806d43
AM
11088+#endif /* __KERNEL__ */
11089+#else /* _VSERVER_SIGNAL_H */
11090+#warning duplicate inclusion
11091+#endif /* _VSERVER_SIGNAL_H */
11092diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/signal_cmd.h linux-4.4/include/linux/vserver/signal_cmd.h
11093--- linux-4.4/include/linux/vserver/signal_cmd.h 1970-01-01 01:00:00.000000000 +0100
11094+++ linux-4.4/include/linux/vserver/signal_cmd.h 2021-02-24 16:56:24.589489533 +0100
11095@@ -0,0 +1,14 @@
11096+#ifndef _VSERVER_SIGNAL_CMD_H
11097+#define _VSERVER_SIGNAL_CMD_H
d337f35e 11098+
c2806d43 11099+#include <uapi/vserver/signal_cmd.h>
d337f35e 11100+
d337f35e 11101+
c2806d43
AM
11102+extern int vc_ctx_kill(struct vx_info *, void __user *);
11103+extern int vc_wait_exit(struct vx_info *, void __user *);
d337f35e
JR
11104+
11105+
c2806d43
AM
11106+extern int vc_get_pflags(uint32_t pid, void __user *);
11107+extern int vc_set_pflags(uint32_t pid, void __user *);
adc1caaa 11108+
c2806d43
AM
11109+#endif /* _VSERVER_SIGNAL_CMD_H */
11110diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/space.h linux-4.4/include/linux/vserver/space.h
11111--- linux-4.4/include/linux/vserver/space.h 1970-01-01 01:00:00.000000000 +0100
11112+++ linux-4.4/include/linux/vserver/space.h 2021-02-24 16:56:24.589489533 +0100
11113@@ -0,0 +1,12 @@
11114+#ifndef _VSERVER_SPACE_H
11115+#define _VSERVER_SPACE_H
d337f35e 11116+
c2806d43 11117+#include <linux/types.h>
d337f35e 11118+
c2806d43 11119+struct vx_info;
d337f35e 11120+
c2806d43 11121+int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index);
9f7054f1 11122+
c2806d43
AM
11123+#else /* _VSERVER_SPACE_H */
11124+#warning duplicate inclusion
11125+#endif /* _VSERVER_SPACE_H */
11126diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/space_cmd.h linux-4.4/include/linux/vserver/space_cmd.h
11127--- linux-4.4/include/linux/vserver/space_cmd.h 1970-01-01 01:00:00.000000000 +0100
11128+++ linux-4.4/include/linux/vserver/space_cmd.h 2021-02-24 16:56:24.589489533 +0100
11129@@ -0,0 +1,13 @@
11130+#ifndef _VSERVER_SPACE_CMD_H
11131+#define _VSERVER_SPACE_CMD_H
9f7054f1 11132+
c2806d43 11133+#include <uapi/vserver/space_cmd.h>
d337f35e 11134+
d337f35e 11135+
c2806d43
AM
11136+extern int vc_enter_space_v1(struct vx_info *, void __user *);
11137+extern int vc_set_space_v1(struct vx_info *, void __user *);
11138+extern int vc_enter_space(struct vx_info *, void __user *);
11139+extern int vc_set_space(struct vx_info *, void __user *);
11140+extern int vc_get_space_mask(void __user *, int);
d337f35e 11141+
c2806d43
AM
11142+#endif /* _VSERVER_SPACE_CMD_H */
11143diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/switch.h linux-4.4/include/linux/vserver/switch.h
11144--- linux-4.4/include/linux/vserver/switch.h 1970-01-01 01:00:00.000000000 +0100
11145+++ linux-4.4/include/linux/vserver/switch.h 2021-02-24 16:56:24.589489533 +0100
11146@@ -0,0 +1,8 @@
11147+#ifndef _VSERVER_SWITCH_H
11148+#define _VSERVER_SWITCH_H
d337f35e 11149+
d337f35e 11150+
c2806d43
AM
11151+#include <linux/errno.h>
11152+#include <uapi/vserver/switch.h>
2380c486 11153+
c2806d43
AM
11154+#endif /* _VSERVER_SWITCH_H */
11155diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/tag.h linux-4.4/include/linux/vserver/tag.h
11156--- linux-4.4/include/linux/vserver/tag.h 1970-01-01 01:00:00.000000000 +0100
11157+++ linux-4.4/include/linux/vserver/tag.h 2021-02-24 16:56:24.589489533 +0100
11158@@ -0,0 +1,160 @@
11159+#ifndef _DX_TAG_H
11160+#define _DX_TAG_H
d337f35e 11161+
c2806d43
AM
11162+#include <linux/types.h>
11163+#include <linux/uidgid.h>
d337f35e 11164+
d337f35e 11165+
c2806d43 11166+#define DX_TAG(in) (IS_TAGGED(in))
9f7054f1 11167+
d337f35e 11168+
c2806d43
AM
11169+#ifdef CONFIG_TAG_NFSD
11170+#define DX_TAG_NFSD 1
4bf69007 11171+#else
c2806d43 11172+#define DX_TAG_NFSD 0
4bf69007 11173+#endif
2380c486 11174+
2380c486 11175+
c2806d43 11176+#ifdef CONFIG_TAGGING_NONE
d337f35e 11177+
c2806d43
AM
11178+#define MAX_UID 0xFFFFFFFF
11179+#define MAX_GID 0xFFFFFFFF
d337f35e 11180+
c2806d43 11181+#define INOTAG_TAG(cond, uid, gid, tag) (0)
d337f35e 11182+
c2806d43
AM
11183+#define TAGINO_UID(cond, uid, tag) (uid)
11184+#define TAGINO_GID(cond, gid, tag) (gid)
d337f35e 11185+
c2806d43 11186+#endif
d337f35e 11187+
d337f35e 11188+
c2806d43 11189+#ifdef CONFIG_TAGGING_GID16
d337f35e 11190+
c2806d43
AM
11191+#define MAX_UID 0xFFFFFFFF
11192+#define MAX_GID 0x0000FFFF
d337f35e 11193+
c2806d43
AM
11194+#define INOTAG_TAG(cond, uid, gid, tag) \
11195+ ((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
d337f35e 11196+
c2806d43
AM
11197+#define TAGINO_UID(cond, uid, tag) (uid)
11198+#define TAGINO_GID(cond, gid, tag) \
11199+ ((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
d337f35e 11200+
c2806d43 11201+#endif
d337f35e 11202+
d337f35e 11203+
c2806d43 11204+#ifdef CONFIG_TAGGING_ID24
d337f35e 11205+
c2806d43
AM
11206+#define MAX_UID 0x00FFFFFF
11207+#define MAX_GID 0x00FFFFFF
d337f35e 11208+
c2806d43
AM
11209+#define INOTAG_TAG(cond, uid, gid, tag) \
11210+ ((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
d337f35e 11211+
c2806d43
AM
11212+#define TAGINO_UID(cond, uid, tag) \
11213+ ((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
11214+#define TAGINO_GID(cond, gid, tag) \
11215+ ((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
d337f35e 11216+
3cc86a71 11217+#endif
d337f35e 11218+
3bac966d 11219+
c2806d43 11220+#ifdef CONFIG_TAGGING_UID16
d337f35e 11221+
c2806d43
AM
11222+#define MAX_UID 0x0000FFFF
11223+#define MAX_GID 0xFFFFFFFF
d337f35e 11224+
c2806d43
AM
11225+#define INOTAG_TAG(cond, uid, gid, tag) \
11226+ ((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
3cc86a71 11227+
c2806d43
AM
11228+#define TAGINO_UID(cond, uid, tag) \
11229+ ((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
11230+#define TAGINO_GID(cond, gid, tag) (gid)
d337f35e 11231+
c2806d43 11232+#endif
d337f35e 11233+
d337f35e 11234+
c2806d43 11235+#ifdef CONFIG_TAGGING_INTERN
d337f35e 11236+
c2806d43
AM
11237+#define MAX_UID 0xFFFFFFFF
11238+#define MAX_GID 0xFFFFFFFF
d337f35e 11239+
c2806d43
AM
11240+#define INOTAG_TAG(cond, uid, gid, tag) \
11241+ ((cond) ? (tag) : 0)
d337f35e 11242+
c2806d43
AM
11243+#define TAGINO_UID(cond, uid, tag) (uid)
11244+#define TAGINO_GID(cond, gid, tag) (gid)
11245+
11246+#endif
d337f35e 11247+
d337f35e 11248+
c2806d43
AM
11249+#ifndef CONFIG_TAGGING_NONE
11250+#define dx_current_fstag(sb) \
11251+ ((sb)->s_flags & MS_TAGGED ? dx_current_tag() : 0)
4bf69007 11252+#else
c2806d43 11253+#define dx_current_fstag(sb) (0)
4bf69007 11254+#endif
d337f35e 11255+
c2806d43
AM
11256+#ifndef CONFIG_TAGGING_INTERN
11257+#define TAGINO_TAG(cond, tag) (0)
11258+#else
11259+#define TAGINO_TAG(cond, tag) ((cond) ? (tag) : 0)
11260+#endif
d337f35e 11261+
c2806d43
AM
11262+#define TAGINO_KUID(cond, kuid, ktag) \
11263+ KUIDT_INIT(TAGINO_UID(cond, __kuid_val(kuid), __ktag_val(ktag)))
11264+#define TAGINO_KGID(cond, kgid, ktag) \
11265+ KGIDT_INIT(TAGINO_GID(cond, __kgid_val(kgid), __ktag_val(ktag)))
11266+#define TAGINO_KTAG(cond, ktag) \
11267+ KTAGT_INIT(TAGINO_TAG(cond, __ktag_val(ktag)))
a4a22af8
AM
11268+
11269+
c2806d43
AM
11270+#define INOTAG_UID(cond, uid, gid) \
11271+ ((cond) ? ((uid) & MAX_UID) : (uid))
11272+#define INOTAG_GID(cond, uid, gid) \
11273+ ((cond) ? ((gid) & MAX_GID) : (gid))
d337f35e 11274+
c2806d43
AM
11275+#define INOTAG_KUID(cond, kuid, kgid) \
11276+ KUIDT_INIT(INOTAG_UID(cond, __kuid_val(kuid), __kgid_val(kgid)))
11277+#define INOTAG_KGID(cond, kuid, kgid) \
11278+ KGIDT_INIT(INOTAG_GID(cond, __kuid_val(kuid), __kgid_val(kgid)))
11279+#define INOTAG_KTAG(cond, kuid, kgid, ktag) \
11280+ KTAGT_INIT(INOTAG_TAG(cond, \
11281+ __kuid_val(kuid), __kgid_val(kgid), __ktag_val(ktag)))
a4a22af8 11282+
d337f35e 11283+
c2806d43
AM
11284+static inline uid_t dx_map_uid(uid_t uid)
11285+{
11286+ if ((uid > MAX_UID) && (uid != -1))
11287+ uid = -2;
11288+ return (uid & MAX_UID);
11289+}
d337f35e 11290+
c2806d43
AM
11291+static inline gid_t dx_map_gid(gid_t gid)
11292+{
11293+ if ((gid > MAX_GID) && (gid != -1))
11294+ gid = -2;
11295+ return (gid & MAX_GID);
11296+}
d337f35e 11297+
c2806d43
AM
11298+struct peer_tag {
11299+ int32_t xid;
11300+ int32_t nid;
11301+};
d337f35e 11302+
c2806d43 11303+#define dx_notagcheck(sb) ((sb) && ((sb)->s_flags & MS_NOTAGCHECK))
2380c486 11304+
c2806d43
AM
11305+int dx_parse_tag(char *string, vtag_t *tag, int remove, int *mnt_flags,
11306+ unsigned long *flags);
d337f35e 11307+
c2806d43 11308+#ifdef CONFIG_PROPAGATE
d337f35e 11309+
c2806d43 11310+void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
d337f35e 11311+
c2806d43 11312+#define dx_propagate_tag(n, i) __dx_propagate_tag(n, i)
d337f35e 11313+
4bf69007 11314+#else
c2806d43
AM
11315+#define dx_propagate_tag(n, i) do { } while (0)
11316+#endif
d337f35e 11317+
c2806d43
AM
11318+#endif /* _DX_TAG_H */
11319diff -urNp -x '*.orig' linux-4.4/include/linux/vserver/tag_cmd.h linux-4.4/include/linux/vserver/tag_cmd.h
11320--- linux-4.4/include/linux/vserver/tag_cmd.h 1970-01-01 01:00:00.000000000 +0100
11321+++ linux-4.4/include/linux/vserver/tag_cmd.h 2021-02-24 16:56:24.589489533 +0100
11322@@ -0,0 +1,10 @@
11323+#ifndef _VSERVER_TAG_CMD_H
11324+#define _VSERVER_TAG_CMD_H
3cc86a71 11325+
c2806d43 11326+#include <uapi/vserver/tag_cmd.h>
d337f35e 11327+
c2806d43 11328+extern int vc_task_tag(uint32_t);
3bac966d 11329+
c2806d43 11330+extern int vc_tag_migrate(uint32_t);
3bac966d 11331+
c2806d43
AM
11332+#endif /* _VSERVER_TAG_CMD_H */
11333diff -urNp -x '*.orig' linux-4.4/include/net/addrconf.h linux-4.4/include/net/addrconf.h
11334--- linux-4.4/include/net/addrconf.h 2021-02-24 16:56:11.899089877 +0100
11335+++ linux-4.4/include/net/addrconf.h 2021-02-24 16:56:24.589489533 +0100
927ca606 11336@@ -84,7 +84,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(str
c2e5f7c8
JR
11337
11338 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
11339 const struct in6_addr *daddr, unsigned int srcprefs,
11340- struct in6_addr *saddr);
11341+ struct in6_addr *saddr, struct nx_info *nxi);
11342 int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
bb20add7 11343 u32 banned_flags);
c2e5f7c8 11344 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
c2806d43
AM
11345diff -urNp -x '*.orig' linux-4.4/include/net/af_unix.h linux-4.4/include/net/af_unix.h
11346--- linux-4.4/include/net/af_unix.h 2021-02-24 16:56:11.899089877 +0100
11347+++ linux-4.4/include/net/af_unix.h 2021-02-24 16:56:24.589489533 +0100
4bf69007
AM
11348@@ -4,6 +4,7 @@
11349 #include <linux/socket.h>
11350 #include <linux/un.h>
11351 #include <linux/mutex.h>
927ca606 11352+// #include <linux/vs_base.h>
4bf69007
AM
11353 #include <net/sock.h>
11354
927ca606 11355 void unix_inflight(struct user_struct *user, struct file *fp);
c2806d43
AM
11356diff -urNp -x '*.orig' linux-4.4/include/net/inet_timewait_sock.h linux-4.4/include/net/inet_timewait_sock.h
11357--- linux-4.4/include/net/inet_timewait_sock.h 2021-02-24 16:56:11.905756754 +0100
11358+++ linux-4.4/include/net/inet_timewait_sock.h 2021-02-24 16:56:24.589489533 +0100
8931d859 11359@@ -72,6 +72,10 @@ struct inet_timewait_sock {
b00e13aa 11360 #define tw_num __tw_common.skc_num
927ca606
AM
11361 #define tw_cookie __tw_common.skc_cookie
11362 #define tw_dr __tw_common.skc_tw_dr
4bf69007
AM
11363+#define tw_xid __tw_common.skc_xid
11364+#define tw_vx_info __tw_common.skc_vx_info
11365+#define tw_nid __tw_common.skc_nid
11366+#define tw_nx_info __tw_common.skc_nx_info
b00e13aa 11367
4bf69007
AM
11368 int tw_timeout;
11369 volatile unsigned char tw_substate;
c2806d43
AM
11370diff -urNp -x '*.orig' linux-4.4/include/net/ip6_route.h linux-4.4/include/net/ip6_route.h
11371--- linux-4.4/include/net/ip6_route.h 2021-02-24 16:56:11.905756754 +0100
11372+++ linux-4.4/include/net/ip6_route.h 2021-02-24 16:56:24.592822971 +0100
927ca606 11373@@ -90,7 +90,7 @@ int ip6_del_rt(struct rt6_info *);
c2e5f7c8
JR
11374
11375 int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
11376 const struct in6_addr *daddr, unsigned int prefs,
11377- struct in6_addr *saddr);
11378+ struct in6_addr *saddr, struct nx_info *nxi);
11379
11380 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
11381 const struct in6_addr *saddr, int oif, int flags);
c2806d43
AM
11382diff -urNp -x '*.orig' linux-4.4/include/net/route.h linux-4.4/include/net/route.h
11383--- linux-4.4/include/net/route.h 2021-02-24 16:56:11.909090192 +0100
11384+++ linux-4.4/include/net/route.h 2021-02-24 16:56:24.592822971 +0100
11385@@ -227,6 +227,9 @@ static inline void ip_rt_put(struct rtab
b00e13aa 11386 dst_release(&rt->dst);
4bf69007
AM
11387 }
11388
11389+#include <linux/vs_base.h>
11390+#include <linux/vs_inet.h>
d337f35e 11391+
4bf69007
AM
11392 #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3)
11393
11394 extern const __u8 ip_tos2prio[16];
c2806d43 11395@@ -274,6 +277,9 @@ static inline void ip_route_connect_init
4bf69007
AM
11396 protocol, flow_flags, dst, src, dport, sport);
11397 }
11398
11399+extern struct rtable *ip_v4_find_src(struct net *net, struct nx_info *,
11400+ struct flowi4 *);
d337f35e 11401+
4bf69007
AM
11402 static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
11403 __be32 dst, __be32 src, u32 tos,
11404 int oif, u8 protocol,
c2806d43 11405@@ -282,6 +288,7 @@ static inline struct rtable *ip_route_co
4bf69007
AM
11406 {
11407 struct net *net = sock_net(sk);
11408 struct rtable *rt;
11409+ struct nx_info *nx_info = current_nx_info();
11410
11411 ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
f15949f2 11412 sport, dport, sk);
c2806d43 11413@@ -295,7 +302,21 @@ static inline struct rtable *ip_route_co
4bf69007 11414
927ca606
AM
11415 src = fl4->saddr;
11416 }
4bf69007 11417- if (!dst || !src) {
927ca606 11418+
4bf69007
AM
11419+ if (sk)
11420+ nx_info = sk->sk_nx_info;
d337f35e 11421+
4bf69007
AM
11422+ vxdprintk(VXD_CBIT(net, 4),
11423+ "ip_route_connect(%p) %p,%p;%lx",
11424+ sk, nx_info, sk->sk_socket,
11425+ (sk->sk_socket?sk->sk_socket->flags:0));
d337f35e 11426+
4bf69007
AM
11427+ rt = ip_v4_find_src(net, nx_info, fl4);
11428+ if (IS_ERR(rt))
11429+ return rt;
11430+ ip_rt_put(rt);
d337f35e 11431+
4bf69007
AM
11432+ if (!fl4->daddr || !fl4->saddr) {
11433 rt = __ip_route_output_key(net, fl4);
11434 if (IS_ERR(rt))
11435 return rt;
c2806d43
AM
11436diff -urNp -x '*.orig' linux-4.4/include/net/sock.h linux-4.4/include/net/sock.h
11437--- linux-4.4/include/net/sock.h 2021-02-24 16:56:11.912423630 +0100
11438+++ linux-4.4/include/net/sock.h 2021-02-24 16:56:24.592822971 +0100
927ca606
AM
11439@@ -201,6 +201,10 @@ struct sock_common {
11440 struct in6_addr skc_v6_daddr;
11441 struct in6_addr skc_v6_rcv_saddr;
4bf69007 11442 #endif
61333608 11443+ vxid_t skc_xid;
4bf69007 11444+ struct vx_info *skc_vx_info;
61333608 11445+ vnid_t skc_nid;
4bf69007 11446+ struct nx_info *skc_nx_info;
c2e5f7c8 11447
927ca606
AM
11448 atomic64_t skc_cookie;
11449
3cc86a71 11450@@ -350,8 +354,12 @@ struct sock {
4bf69007
AM
11451 #define sk_prot __sk_common.skc_prot
11452 #define sk_net __sk_common.skc_net
c2e5f7c8
JR
11453 #define sk_v6_daddr __sk_common.skc_v6_daddr
11454-#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
11455+#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
927ca606 11456 #define sk_cookie __sk_common.skc_cookie
4bf69007
AM
11457+#define sk_xid __sk_common.skc_xid
11458+#define sk_vx_info __sk_common.skc_vx_info
11459+#define sk_nid __sk_common.skc_nid
11460+#define sk_nx_info __sk_common.skc_nx_info
927ca606
AM
11461 #define sk_incoming_cpu __sk_common.skc_incoming_cpu
11462 #define sk_flags __sk_common.skc_flags
11463 #define sk_rxhash __sk_common.skc_rxhash
c2806d43
AM
11464diff -urNp -x '*.orig' linux-4.4/include/uapi/Kbuild linux-4.4/include/uapi/Kbuild
11465--- linux-4.4/include/uapi/Kbuild 2016-01-11 00:01:32.000000000 +0100
11466+++ linux-4.4/include/uapi/Kbuild 2021-02-24 16:56:24.592822971 +0100
bb20add7 11467@@ -13,3 +13,4 @@ header-y += drm/
4bf69007
AM
11468 header-y += xen/
11469 header-y += scsi/
bb20add7 11470 header-y += misc/
4bf69007 11471+header-y += vserver/
c2806d43
AM
11472diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/capability.h linux-4.4/include/uapi/linux/capability.h
11473--- linux-4.4/include/uapi/linux/capability.h 2016-01-11 00:01:32.000000000 +0100
11474+++ linux-4.4/include/uapi/linux/capability.h 2021-02-24 16:56:24.592822971 +0100
4bf69007
AM
11475@@ -259,6 +259,7 @@ struct vfs_cap_data {
11476 arbitrary SCSI commands */
11477 /* Allow setting encryption key on loopback filesystem */
11478 /* Allow setting zone reclaim policy */
11479+/* Allow the selection of a security context */
11480
11481 #define CAP_SYS_ADMIN 21
11482
bb20add7 11483@@ -354,7 +355,12 @@ struct vfs_cap_data {
4bf69007 11484
bb20add7 11485 #define CAP_LAST_CAP CAP_AUDIT_READ
4bf69007
AM
11486
11487-#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
11488+/* Allow context manipulations */
11489+/* Allow changing context info on files */
d337f35e 11490+
4bf69007 11491+#define CAP_CONTEXT 63
d337f35e 11492+
4bf69007
AM
11493+#define cap_valid(x) ((x) >= 0 && ((x) <= CAP_LAST_CAP || (x) == CAP_CONTEXT))
11494
11495 /*
11496 * Bit location of each capability (used by user-space library and kernel)
c2806d43
AM
11497diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/fs.h linux-4.4/include/uapi/linux/fs.h
11498--- linux-4.4/include/uapi/linux/fs.h 2016-01-11 00:01:32.000000000 +0100
11499+++ linux-4.4/include/uapi/linux/fs.h 2021-02-24 16:56:24.592822971 +0100
927ca606 11500@@ -91,6 +91,9 @@ struct inodes_stat_t {
4bf69007
AM
11501 #define MS_I_VERSION (1<<23) /* Update inode I_version field */
11502 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
927ca606 11503 #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
b00e13aa
AM
11504+#define MS_TAGGED (1<<8) /* use generic inode tagging */
11505+#define MS_NOTAGCHECK (1<<9) /* don't check tags */
927ca606 11506+#define MS_TAGID (1<<26) /* use specific tag for this mount */
b00e13aa
AM
11507
11508 /* These sb flags are internal to the kernel */
09be7631 11509 #define MS_NOSEC (1<<28)
927ca606 11510@@ -197,12 +200,15 @@ struct inodes_stat_t {
4bf69007
AM
11511 #define FS_EXTENT_FL 0x00080000 /* Extents */
11512 #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */
11513 #define FS_NOCOW_FL 0x00800000 /* Do not cow file */
11514+#define FS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
927ca606 11515 #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
4bf69007
AM
11516 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
11517
11518-#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
11519-#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
11520+#define FS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
11521+#define FS_COW_FL 0x20000000 /* Copy on Write marker */
8931d859 11522
4bf69007
AM
11523+#define FS_FL_USER_VISIBLE 0x0103DFFF /* User visible flags */
11524+#define FS_FL_USER_MODIFIABLE 0x010380FF /* User modifiable flags */
11525
11526 #define SYNC_FILE_RANGE_WAIT_BEFORE 1
11527 #define SYNC_FILE_RANGE_WRITE 2
c2806d43
AM
11528diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/gfs2_ondisk.h linux-4.4/include/uapi/linux/gfs2_ondisk.h
11529--- linux-4.4/include/uapi/linux/gfs2_ondisk.h 2016-01-11 00:01:32.000000000 +0100
11530+++ linux-4.4/include/uapi/linux/gfs2_ondisk.h 2021-02-24 16:56:24.592822971 +0100
4bf69007
AM
11531@@ -225,6 +225,9 @@ enum {
11532 gfs2fl_Sync = 8,
11533 gfs2fl_System = 9,
11534 gfs2fl_TopLevel = 10,
11535+ gfs2fl_IXUnlink = 16,
11536+ gfs2fl_Barrier = 17,
11537+ gfs2fl_Cow = 18,
11538 gfs2fl_TruncInProg = 29,
11539 gfs2fl_InheritDirectio = 30,
11540 gfs2fl_InheritJdata = 31,
11541@@ -242,6 +245,9 @@ enum {
11542 #define GFS2_DIF_SYNC 0x00000100
11543 #define GFS2_DIF_SYSTEM 0x00000200 /* New in gfs2 */
11544 #define GFS2_DIF_TOPDIR 0x00000400 /* New in gfs2 */
11545+#define GFS2_DIF_IXUNLINK 0x00010000
11546+#define GFS2_DIF_BARRIER 0x00020000
11547+#define GFS2_DIF_COW 0x00040000
11548 #define GFS2_DIF_TRUNC_IN_PROG 0x20000000 /* New in gfs2 */
11549 #define GFS2_DIF_INHERIT_DIRECTIO 0x40000000 /* only in gfs1 */
11550 #define GFS2_DIF_INHERIT_JDATA 0x80000000
c2806d43
AM
11551diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/if_tun.h linux-4.4/include/uapi/linux/if_tun.h
11552--- linux-4.4/include/uapi/linux/if_tun.h 2016-01-11 00:01:32.000000000 +0100
11553+++ linux-4.4/include/uapi/linux/if_tun.h 2021-02-24 16:56:24.592822971 +0100
927ca606
AM
11554@@ -56,6 +56,7 @@
11555 */
11556 #define TUNSETVNETBE _IOW('T', 222, int)
11557 #define TUNGETVNETBE _IOR('T', 223, int)
11558+#define TUNSETNID _IOW('T', 224, int)
4bf69007
AM
11559
11560 /* TUNSETIFF ifr flags */
11561 #define IFF_TUN 0x0001
c2806d43
AM
11562diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/major.h linux-4.4/include/uapi/linux/major.h
11563--- linux-4.4/include/uapi/linux/major.h 2016-01-11 00:01:32.000000000 +0100
11564+++ linux-4.4/include/uapi/linux/major.h 2021-02-24 16:56:24.592822971 +0100
4bf69007
AM
11565@@ -15,6 +15,7 @@
11566 #define HD_MAJOR IDE0_MAJOR
11567 #define PTY_SLAVE_MAJOR 3
11568 #define TTY_MAJOR 4
11569+#define VROOT_MAJOR 4
11570 #define TTYAUX_MAJOR 5
11571 #define LP_MAJOR 6
11572 #define VCS_MAJOR 7
c2806d43
AM
11573diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/nfs_mount.h linux-4.4/include/uapi/linux/nfs_mount.h
11574--- linux-4.4/include/uapi/linux/nfs_mount.h 2016-01-11 00:01:32.000000000 +0100
11575+++ linux-4.4/include/uapi/linux/nfs_mount.h 2021-02-24 16:56:24.592822971 +0100
4bf69007 11576@@ -63,7 +63,8 @@ struct nfs_mount_data {
c2e5f7c8 11577 #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 non-text parsed mount data only */
4bf69007
AM
11578 #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */
11579 #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
11580-#define NFS_MOUNT_FLAGMASK 0xFFFF
11581+#define NFS_MOUNT_TAGGED 0x10000 /* context tagging */
11582+#define NFS_MOUNT_FLAGMASK 0x1FFFF
11583
11584 /* The following are for internal use only */
11585 #define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
c2806d43
AM
11586diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/reboot.h linux-4.4/include/uapi/linux/reboot.h
11587--- linux-4.4/include/uapi/linux/reboot.h 2016-01-11 00:01:32.000000000 +0100
11588+++ linux-4.4/include/uapi/linux/reboot.h 2021-02-24 16:56:24.592822971 +0100
4bf69007
AM
11589@@ -33,7 +33,7 @@
11590 #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
11591 #define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2
11592 #define LINUX_REBOOT_CMD_KEXEC 0x45584543
11593-
11594+#define LINUX_REBOOT_CMD_OOM 0xDEADBEEF
11595
11596
11597 #endif /* _UAPI_LINUX_REBOOT_H */
c2806d43
AM
11598diff -urNp -x '*.orig' linux-4.4/include/uapi/linux/sysctl.h linux-4.4/include/uapi/linux/sysctl.h
11599--- linux-4.4/include/uapi/linux/sysctl.h 2021-02-24 16:56:11.925757384 +0100
11600+++ linux-4.4/include/uapi/linux/sysctl.h 2021-02-24 16:56:24.592822971 +0100
4bf69007
AM
11601@@ -60,6 +60,7 @@ enum
11602 CTL_ABI=9, /* Binary emulation */
11603 CTL_CPU=10, /* CPU stuff (speed scaling, etc) */
11604 CTL_ARLAN=254, /* arlan wireless driver */
11605+ CTL_VSERVER=4242, /* Linux-VServer debug */
11606 CTL_S390DBF=5677, /* s390 debug */
11607 CTL_SUNRPC=7249, /* sunrpc debug */
11608 CTL_PM=9899, /* frv power management */
11609@@ -94,6 +95,7 @@ enum
11610
11611 KERN_PANIC=15, /* int: panic timeout */
11612 KERN_REALROOTDEV=16, /* real root device to mount after initrd */
11613+ KERN_VSHELPER=17, /* string: path to vshelper policy agent */
11614
11615 KERN_SPARC_REBOOT=21, /* reboot command on Sparc */
11616 KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */
c2806d43
AM
11617diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/Kbuild linux-4.4/include/uapi/vserver/Kbuild
11618--- linux-4.4/include/uapi/vserver/Kbuild 1970-01-01 01:00:00.000000000 +0100
11619+++ linux-4.4/include/uapi/vserver/Kbuild 2021-02-24 16:56:24.596156409 +0100
11620@@ -0,0 +1,9 @@
11621+
11622+header-y += context_cmd.h network_cmd.h space_cmd.h \
11623+ cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \
11624+ inode_cmd.h tag_cmd.h sched_cmd.h signal_cmd.h \
11625+ debug_cmd.h device_cmd.h
11626+
11627+header-y += switch.h context.h network.h monitor.h \
11628+ limit.h inode.h device.h
11629+
11630diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/cacct_cmd.h linux-4.4/include/uapi/vserver/cacct_cmd.h
11631--- linux-4.4/include/uapi/vserver/cacct_cmd.h 1970-01-01 01:00:00.000000000 +0100
11632+++ linux-4.4/include/uapi/vserver/cacct_cmd.h 2021-02-24 16:56:24.592822971 +0100
4bf69007
AM
11633@@ -0,0 +1,15 @@
11634+#ifndef _UAPI_VS_CACCT_CMD_H
11635+#define _UAPI_VS_CACCT_CMD_H
d337f35e
JR
11636+
11637+
4bf69007 11638+/* virtual host info name commands */
d337f35e 11639+
4bf69007 11640+#define VCMD_sock_stat VC_CMD(VSTAT, 5, 0)
d337f35e 11641+
4bf69007
AM
11642+struct vcmd_sock_stat_v0 {
11643+ uint32_t field;
11644+ uint32_t count[3];
11645+ uint64_t total[3];
11646+};
d337f35e 11647+
4bf69007 11648+#endif /* _UAPI_VS_CACCT_CMD_H */
c2806d43
AM
11649diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/context.h linux-4.4/include/uapi/vserver/context.h
11650--- linux-4.4/include/uapi/vserver/context.h 1970-01-01 01:00:00.000000000 +0100
11651+++ linux-4.4/include/uapi/vserver/context.h 2021-02-24 16:56:24.592822971 +0100
11652@@ -0,0 +1,81 @@
11653+#ifndef _UAPI_VS_CONTEXT_H
11654+#define _UAPI_VS_CONTEXT_H
11655+
11656+#include <linux/types.h>
11657+#include <linux/capability.h>
11658+
11659+
11660+/* context flags */
11661+
11662+#define VXF_INFO_SCHED 0x00000002
11663+#define VXF_INFO_NPROC 0x00000004
11664+#define VXF_INFO_PRIVATE 0x00000008
11665+
11666+#define VXF_INFO_INIT 0x00000010
11667+#define VXF_INFO_HIDE 0x00000020
11668+#define VXF_INFO_ULIMIT 0x00000040
11669+#define VXF_INFO_NSPACE 0x00000080
11670+
11671+#define VXF_SCHED_HARD 0x00000100
11672+#define VXF_SCHED_PRIO 0x00000200
11673+#define VXF_SCHED_PAUSE 0x00000400
11674+
11675+#define VXF_VIRT_MEM 0x00010000
11676+#define VXF_VIRT_UPTIME 0x00020000
11677+#define VXF_VIRT_CPU 0x00040000
11678+#define VXF_VIRT_LOAD 0x00080000
11679+#define VXF_VIRT_TIME 0x00100000
11680+
11681+#define VXF_HIDE_MOUNT 0x01000000
11682+/* was VXF_HIDE_NETIF 0x02000000 */
11683+#define VXF_HIDE_VINFO 0x04000000
11684+
11685+#define VXF_STATE_SETUP (1ULL << 32)
11686+#define VXF_STATE_INIT (1ULL << 33)
11687+#define VXF_STATE_ADMIN (1ULL << 34)
11688+
11689+#define VXF_SC_HELPER (1ULL << 36)
11690+#define VXF_REBOOT_KILL (1ULL << 37)
11691+#define VXF_PERSISTENT (1ULL << 38)
11692+
11693+#define VXF_FORK_RSS (1ULL << 48)
11694+#define VXF_PROLIFIC (1ULL << 49)
11695+
11696+#define VXF_IGNEG_NICE (1ULL << 52)
11697+
11698+#define VXF_ONE_TIME (0x0007ULL << 32)
11699+
11700+#define VXF_INIT_SET (VXF_STATE_SETUP | VXF_STATE_INIT | VXF_STATE_ADMIN)
11701+
11702+
11703+/* context migration */
11704+
11705+#define VXM_SET_INIT 0x00000001
11706+#define VXM_SET_REAPER 0x00000002
11707+
11708+/* context caps */
11709+
11710+#define VXC_SET_UTSNAME 0x00000001
11711+#define VXC_SET_RLIMIT 0x00000002
11712+#define VXC_FS_SECURITY 0x00000004
11713+#define VXC_FS_TRUSTED 0x00000008
11714+#define VXC_TIOCSTI 0x00000010
11715+
11716+/* was VXC_RAW_ICMP 0x00000100 */
11717+#define VXC_SYSLOG 0x00001000
11718+#define VXC_OOM_ADJUST 0x00002000
11719+#define VXC_AUDIT_CONTROL 0x00004000
11720+
11721+#define VXC_SECURE_MOUNT 0x00010000
11722+/* #define VXC_SECURE_REMOUNT 0x00020000 */
11723+#define VXC_BINARY_MOUNT 0x00040000
11724+#define VXC_DEV_MOUNT 0x00080000
11725+
11726+#define VXC_QUOTA_CTL 0x00100000
11727+#define VXC_ADMIN_MAPPER 0x00200000
11728+#define VXC_ADMIN_CLOOP 0x00400000
11729+
11730+#define VXC_KTHREAD 0x01000000
11731+#define VXC_NAMESPACE 0x02000000
11732+
11733+#endif /* _UAPI_VS_CONTEXT_H */
11734diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/context_cmd.h linux-4.4/include/uapi/vserver/context_cmd.h
11735--- linux-4.4/include/uapi/vserver/context_cmd.h 1970-01-01 01:00:00.000000000 +0100
11736+++ linux-4.4/include/uapi/vserver/context_cmd.h 2021-02-24 16:56:24.592822971 +0100
4bf69007
AM
11737@@ -0,0 +1,115 @@
11738+#ifndef _UAPI_VS_CONTEXT_CMD_H
11739+#define _UAPI_VS_CONTEXT_CMD_H
d33d7b00
AM
11740+
11741+
4bf69007 11742+/* vinfo commands */
3bac966d 11743+
4bf69007 11744+#define VCMD_task_xid VC_CMD(VINFO, 1, 0)
3bac966d 11745+
3bac966d 11746+
4bf69007 11747+#define VCMD_vx_info VC_CMD(VINFO, 5, 0)
3bac966d 11748+
4bf69007
AM
11749+struct vcmd_vx_info_v0 {
11750+ uint32_t xid;
11751+ uint32_t initpid;
11752+ /* more to come */
11753+};
3bac966d
AM
11754+
11755+
4bf69007 11756+#define VCMD_ctx_stat VC_CMD(VSTAT, 0, 0)
3bac966d 11757+
4bf69007
AM
11758+struct vcmd_ctx_stat_v0 {
11759+ uint32_t usecnt;
11760+ uint32_t tasks;
11761+ /* more to come */
11762+};
3bac966d 11763+
3bac966d 11764+
4bf69007 11765+/* context commands */
3bac966d 11766+
4bf69007
AM
11767+#define VCMD_ctx_create_v0 VC_CMD(VPROC, 1, 0)
11768+#define VCMD_ctx_create VC_CMD(VPROC, 1, 1)
3bac966d 11769+
4bf69007
AM
11770+struct vcmd_ctx_create {
11771+ uint64_t flagword;
11772+};
3bac966d 11773+
4bf69007
AM
11774+#define VCMD_ctx_migrate_v0 VC_CMD(PROCMIG, 1, 0)
11775+#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 1)
3bac966d 11776+
4bf69007
AM
11777+struct vcmd_ctx_migrate {
11778+ uint64_t flagword;
11779+};
3bac966d 11780+
d33d7b00 11781+
d33d7b00 11782+
4bf69007 11783+/* flag commands */
d33d7b00 11784+
4bf69007
AM
11785+#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0)
11786+#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0)
d33d7b00 11787+
4bf69007
AM
11788+struct vcmd_ctx_flags_v0 {
11789+ uint64_t flagword;
11790+ uint64_t mask;
11791+};
3bac966d
AM
11792+
11793+
3bac966d 11794+
4bf69007 11795+/* context caps commands */
3bac966d 11796+
4bf69007
AM
11797+#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 1)
11798+#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 1)
d33d7b00 11799+
4bf69007
AM
11800+struct vcmd_ctx_caps_v1 {
11801+ uint64_t ccaps;
11802+ uint64_t cmask;
11803+};
d33d7b00 11804+
d33d7b00
AM
11805+
11806+
4bf69007 11807+/* bcaps commands */
d33d7b00 11808+
4bf69007
AM
11809+#define VCMD_get_bcaps VC_CMD(FLAGS, 9, 0)
11810+#define VCMD_set_bcaps VC_CMD(FLAGS, 10, 0)
d33d7b00 11811+
4bf69007
AM
11812+struct vcmd_bcaps {
11813+ uint64_t bcaps;
11814+ uint64_t bmask;
11815+};
3bac966d 11816+
d33d7b00 11817+
d33d7b00 11818+
4bf69007 11819+/* umask commands */
d33d7b00 11820+
4bf69007
AM
11821+#define VCMD_get_umask VC_CMD(FLAGS, 13, 0)
11822+#define VCMD_set_umask VC_CMD(FLAGS, 14, 0)
3bac966d 11823+
4bf69007
AM
11824+struct vcmd_umask {
11825+ uint64_t umask;
11826+ uint64_t mask;
11827+};
d33d7b00 11828+
d33d7b00
AM
11829+
11830+
4bf69007 11831+/* wmask commands */
d33d7b00 11832+
4bf69007
AM
11833+#define VCMD_get_wmask VC_CMD(FLAGS, 15, 0)
11834+#define VCMD_set_wmask VC_CMD(FLAGS, 16, 0)
d33d7b00 11835+
4bf69007
AM
11836+struct vcmd_wmask {
11837+ uint64_t wmask;
11838+ uint64_t mask;
d33d7b00
AM
11839+};
11840+
d33d7b00 11841+
d33d7b00 11842+
4bf69007 11843+/* OOM badness */
d33d7b00 11844+
4bf69007
AM
11845+#define VCMD_get_badness VC_CMD(MEMCTRL, 5, 0)
11846+#define VCMD_set_badness VC_CMD(MEMCTRL, 6, 0)
d33d7b00 11847+
4bf69007
AM
11848+struct vcmd_badness_v0 {
11849+ int64_t bias;
11850+};
d33d7b00 11851+
4bf69007 11852+#endif /* _UAPI_VS_CONTEXT_CMD_H */
c2806d43
AM
11853diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/cvirt_cmd.h linux-4.4/include/uapi/vserver/cvirt_cmd.h
11854--- linux-4.4/include/uapi/vserver/cvirt_cmd.h 1970-01-01 01:00:00.000000000 +0100
11855+++ linux-4.4/include/uapi/vserver/cvirt_cmd.h 2021-02-24 16:56:24.592822971 +0100
4bf69007
AM
11856@@ -0,0 +1,41 @@
11857+#ifndef _UAPI_VS_CVIRT_CMD_H
11858+#define _UAPI_VS_CVIRT_CMD_H
d33d7b00 11859+
d33d7b00 11860+
4bf69007 11861+/* virtual host info name commands */
d33d7b00 11862+
4bf69007
AM
11863+#define VCMD_set_vhi_name VC_CMD(VHOST, 1, 0)
11864+#define VCMD_get_vhi_name VC_CMD(VHOST, 2, 0)
d33d7b00 11865+
4bf69007
AM
11866+struct vcmd_vhi_name_v0 {
11867+ uint32_t field;
11868+ char name[65];
11869+};
d33d7b00 11870+
d33d7b00 11871+
4bf69007
AM
11872+enum vhi_name_field {
11873+ VHIN_CONTEXT = 0,
11874+ VHIN_SYSNAME,
11875+ VHIN_NODENAME,
11876+ VHIN_RELEASE,
11877+ VHIN_VERSION,
11878+ VHIN_MACHINE,
11879+ VHIN_DOMAINNAME,
11880+};
d33d7b00 11881+
d33d7b00 11882+
d33d7b00 11883+
4bf69007 11884+#define VCMD_virt_stat VC_CMD(VSTAT, 3, 0)
d33d7b00 11885+
4bf69007
AM
11886+struct vcmd_virt_stat_v0 {
11887+ uint64_t offset;
11888+ uint64_t uptime;
11889+ uint32_t nr_threads;
11890+ uint32_t nr_running;
11891+ uint32_t nr_uninterruptible;
11892+ uint32_t nr_onhold;
11893+ uint32_t nr_forks;
11894+ uint32_t load[3];
11895+};
2380c486 11896+
4bf69007 11897+#endif /* _UAPI_VS_CVIRT_CMD_H */
c2806d43
AM
11898diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/debug_cmd.h linux-4.4/include/uapi/vserver/debug_cmd.h
11899--- linux-4.4/include/uapi/vserver/debug_cmd.h 1970-01-01 01:00:00.000000000 +0100
11900+++ linux-4.4/include/uapi/vserver/debug_cmd.h 2021-02-24 16:56:24.592822971 +0100
4bf69007
AM
11901@@ -0,0 +1,24 @@
11902+#ifndef _UAPI_VS_DEBUG_CMD_H
11903+#define _UAPI_VS_DEBUG_CMD_H
537831f9 11904+
537831f9 11905+
4bf69007 11906+/* debug commands */
537831f9 11907+
4bf69007 11908+#define VCMD_dump_history VC_CMD(DEBUG, 1, 0)
537831f9 11909+
4bf69007
AM
11910+#define VCMD_read_history VC_CMD(DEBUG, 5, 0)
11911+#define VCMD_read_monitor VC_CMD(DEBUG, 6, 0)
537831f9 11912+
4bf69007
AM
11913+struct vcmd_read_history_v0 {
11914+ uint32_t index;
11915+ uint32_t count;
11916+ char __user *data;
11917+};
537831f9 11918+
4bf69007
AM
11919+struct vcmd_read_monitor_v0 {
11920+ uint32_t index;
11921+ uint32_t count;
11922+ char __user *data;
11923+};
537831f9 11924+
4bf69007 11925+#endif /* _UAPI_VS_DEBUG_CMD_H */
c2806d43
AM
11926diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/device.h linux-4.4/include/uapi/vserver/device.h
11927--- linux-4.4/include/uapi/vserver/device.h 1970-01-01 01:00:00.000000000 +0100
11928+++ linux-4.4/include/uapi/vserver/device.h 2021-02-24 16:56:24.592822971 +0100
11929@@ -0,0 +1,12 @@
11930+#ifndef _UAPI_VS_DEVICE_H
11931+#define _UAPI_VS_DEVICE_H
11932+
11933+
11934+#define DATTR_CREATE 0x00000001
11935+#define DATTR_OPEN 0x00000002
11936+
11937+#define DATTR_REMAP 0x00000010
11938+
11939+#define DATTR_MASK 0x00000013
11940+
11941+#endif /* _UAPI_VS_DEVICE_H */
11942diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/device_cmd.h linux-4.4/include/uapi/vserver/device_cmd.h
11943--- linux-4.4/include/uapi/vserver/device_cmd.h 1970-01-01 01:00:00.000000000 +0100
11944+++ linux-4.4/include/uapi/vserver/device_cmd.h 2021-02-24 16:56:24.592822971 +0100
4bf69007
AM
11945@@ -0,0 +1,16 @@
11946+#ifndef _UAPI_VS_DEVICE_CMD_H
11947+#define _UAPI_VS_DEVICE_CMD_H
2380c486 11948+
1163e6ab 11949+
4bf69007 11950+/* device vserver commands */
1163e6ab 11951+
4bf69007
AM
11952+#define VCMD_set_mapping VC_CMD(DEVICE, 1, 0)
11953+#define VCMD_unset_mapping VC_CMD(DEVICE, 2, 0)
e915af4e 11954+
4bf69007
AM
11955+struct vcmd_set_mapping_v0 {
11956+ const char __user *device;
11957+ const char __user *target;
11958+ uint32_t flags;
11959+};
e915af4e 11960+
4bf69007 11961+#endif /* _UAPI_VS_DEVICE_CMD_H */
c2806d43
AM
11962diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/dlimit_cmd.h linux-4.4/include/uapi/vserver/dlimit_cmd.h
11963--- linux-4.4/include/uapi/vserver/dlimit_cmd.h 1970-01-01 01:00:00.000000000 +0100
11964+++ linux-4.4/include/uapi/vserver/dlimit_cmd.h 2021-02-24 16:56:24.592822971 +0100
4bf69007
AM
11965@@ -0,0 +1,67 @@
11966+#ifndef _UAPI_VS_DLIMIT_CMD_H
11967+#define _UAPI_VS_DLIMIT_CMD_H
e915af4e 11968+
42bc425c 11969+
4bf69007 11970+/* dlimit vserver commands */
d337f35e 11971+
4bf69007
AM
11972+#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0)
11973+#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0)
d337f35e 11974+
4bf69007
AM
11975+#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0)
11976+#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0)
d337f35e 11977+
4bf69007
AM
11978+struct vcmd_ctx_dlimit_base_v0 {
11979+ const char __user *name;
11980+ uint32_t flags;
11981+};
11982+
11983+struct vcmd_ctx_dlimit_v0 {
11984+ const char __user *name;
11985+ uint32_t space_used; /* used space in kbytes */
11986+ uint32_t space_total; /* maximum space in kbytes */
11987+ uint32_t inodes_used; /* used inodes */
11988+ uint32_t inodes_total; /* maximum inodes */
11989+ uint32_t reserved; /* reserved for root in % */
11990+ uint32_t flags;
11991+};
11992+
11993+#define CDLIM_UNSET ((uint32_t)0UL)
11994+#define CDLIM_INFINITY ((uint32_t)~0UL)
11995+#define CDLIM_KEEP ((uint32_t)~1UL)
11996+
11997+#define DLIME_UNIT 0
11998+#define DLIME_KILO 1
11999+#define DLIME_MEGA 2
12000+#define DLIME_GIGA 3
12001+
12002+#define DLIMF_SHIFT 0x10
12003+
12004+#define DLIMS_USED 0
12005+#define DLIMS_TOTAL 2
12006+
12007+static inline
12008+uint64_t dlimit_space_32to64(uint32_t val, uint32_t flags, int shift)
2380c486 12009+{
4bf69007
AM
12010+ int exp = (flags & DLIMF_SHIFT) ?
12011+ (flags >> shift) & DLIME_GIGA : DLIME_KILO;
12012+ return ((uint64_t)val) << (10 * exp);
2380c486
JR
12013+}
12014+
c2806d43
AM
12015+static inline
12016+uint32_t dlimit_space_64to32(uint64_t val, uint32_t *flags, int shift)
12017+{
12018+ int exp = 0;
12019+
12020+ if (*flags & DLIMF_SHIFT) {
12021+ while (val > (1LL << 32) && (exp < 3)) {
12022+ val >>= 10;
12023+ exp++;
12024+ }
12025+ *flags &= ~(DLIME_GIGA << shift);
12026+ *flags |= exp << shift;
12027+ } else
12028+ val >>= 10;
12029+ return val;
12030+}
12031+
12032+#endif /* _UAPI_VS_DLIMIT_CMD_H */
12033diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/inode.h linux-4.4/include/uapi/vserver/inode.h
12034--- linux-4.4/include/uapi/vserver/inode.h 1970-01-01 01:00:00.000000000 +0100
12035+++ linux-4.4/include/uapi/vserver/inode.h 2021-02-24 16:56:24.596156409 +0100
12036@@ -0,0 +1,23 @@
12037+#ifndef _UAPI_VS_INODE_H
12038+#define _UAPI_VS_INODE_H
12039+
12040+
12041+#define IATTR_TAG 0x01000000
12042+
12043+#define IATTR_ADMIN 0x00000001
12044+#define IATTR_WATCH 0x00000002
12045+#define IATTR_HIDE 0x00000004
12046+#define IATTR_FLAGS 0x00000007
12047+
12048+#define IATTR_BARRIER 0x00010000
12049+#define IATTR_IXUNLINK 0x00020000
12050+#define IATTR_IMMUTABLE 0x00040000
12051+#define IATTR_COW 0x00080000
12052+
12053+
12054+/* inode ioctls */
ec22aa5c 12055+
c2806d43
AM
12056+#define FIOC_GETXFLG _IOR('x', 5, long)
12057+#define FIOC_SETXFLG _IOW('x', 6, long)
2380c486 12058+
c2806d43
AM
12059+#endif /* _UAPI_VS_INODE_H */
12060diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/inode_cmd.h linux-4.4/include/uapi/vserver/inode_cmd.h
12061--- linux-4.4/include/uapi/vserver/inode_cmd.h 1970-01-01 01:00:00.000000000 +0100
12062+++ linux-4.4/include/uapi/vserver/inode_cmd.h 2021-02-24 16:56:24.596156409 +0100
3cc86a71
AM
12063@@ -0,0 +1,26 @@
12064+#ifndef _UAPI_VS_INODE_CMD_H
12065+#define _UAPI_VS_INODE_CMD_H
12066+
12067+
12068+/* inode vserver commands */
12069+
12070+#define VCMD_get_iattr VC_CMD(INODE, 1, 1)
12071+#define VCMD_set_iattr VC_CMD(INODE, 2, 1)
12072+
12073+#define VCMD_fget_iattr VC_CMD(INODE, 3, 0)
12074+#define VCMD_fset_iattr VC_CMD(INODE, 4, 0)
12075+
12076+struct vcmd_ctx_iattr_v1 {
12077+ const char __user *name;
12078+ uint32_t tag;
12079+ uint32_t flags;
12080+ uint32_t mask;
12081+};
12082+
12083+struct vcmd_ctx_fiattr_v0 {
12084+ uint32_t tag;
12085+ uint32_t flags;
12086+ uint32_t mask;
12087+};
12088+
12089+#endif /* _UAPI_VS_INODE_CMD_H */
c2806d43
AM
12090diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/limit.h linux-4.4/include/uapi/vserver/limit.h
12091--- linux-4.4/include/uapi/vserver/limit.h 1970-01-01 01:00:00.000000000 +0100
12092+++ linux-4.4/include/uapi/vserver/limit.h 2021-02-24 16:56:24.596156409 +0100
12093@@ -0,0 +1,14 @@
12094+#ifndef _UAPI_VS_LIMIT_H
12095+#define _UAPI_VS_LIMIT_H
4a036bed 12096+
42bc425c 12097+
c2806d43
AM
12098+#define VLIMIT_NSOCK 16
12099+#define VLIMIT_OPENFD 17
12100+#define VLIMIT_ANON 18
12101+#define VLIMIT_SHMEM 19
12102+#define VLIMIT_SEMARY 20
12103+#define VLIMIT_NSEMS 21
12104+#define VLIMIT_DENTRY 22
12105+#define VLIMIT_MAPPED 23
adc1caaa 12106+
c2806d43
AM
12107+#endif /* _UAPI_VS_LIMIT_H */
12108diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/limit_cmd.h linux-4.4/include/uapi/vserver/limit_cmd.h
12109--- linux-4.4/include/uapi/vserver/limit_cmd.h 1970-01-01 01:00:00.000000000 +0100
12110+++ linux-4.4/include/uapi/vserver/limit_cmd.h 2021-02-24 16:56:24.596156409 +0100
4bf69007
AM
12111@@ -0,0 +1,40 @@
12112+#ifndef _UAPI_VS_LIMIT_CMD_H
12113+#define _UAPI_VS_LIMIT_CMD_H
adc1caaa 12114+
adc1caaa 12115+
4bf69007 12116+/* rlimit vserver commands */
adc1caaa 12117+
4bf69007
AM
12118+#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0)
12119+#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0)
12120+#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0)
12121+#define VCMD_reset_hits VC_CMD(RLIMIT, 7, 0)
12122+#define VCMD_reset_minmax VC_CMD(RLIMIT, 9, 0)
adc1caaa 12123+
4bf69007
AM
12124+struct vcmd_ctx_rlimit_v0 {
12125+ uint32_t id;
12126+ uint64_t minimum;
12127+ uint64_t softlimit;
12128+ uint64_t maximum;
12129+};
d33d7b00 12130+
4bf69007
AM
12131+struct vcmd_ctx_rlimit_mask_v0 {
12132+ uint32_t minimum;
12133+ uint32_t softlimit;
12134+ uint32_t maximum;
12135+};
d33d7b00 12136+
4bf69007 12137+#define VCMD_rlimit_stat VC_CMD(VSTAT, 1, 0)
d33d7b00 12138+
4bf69007
AM
12139+struct vcmd_rlimit_stat_v0 {
12140+ uint32_t id;
12141+ uint32_t hits;
12142+ uint64_t value;
12143+ uint64_t minimum;
12144+ uint64_t maximum;
12145+};
d33d7b00 12146+
4bf69007
AM
12147+#define CRLIM_UNSET (0ULL)
12148+#define CRLIM_INFINITY (~0ULL)
12149+#define CRLIM_KEEP (~1ULL)
d33d7b00 12150+
4bf69007 12151+#endif /* _UAPI_VS_LIMIT_CMD_H */
c2806d43
AM
12152diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/monitor.h linux-4.4/include/uapi/vserver/monitor.h
12153--- linux-4.4/include/uapi/vserver/monitor.h 1970-01-01 01:00:00.000000000 +0100
12154+++ linux-4.4/include/uapi/vserver/monitor.h 2021-02-24 16:56:24.596156409 +0100
4bf69007
AM
12155@@ -0,0 +1,96 @@
12156+#ifndef _UAPI_VS_MONITOR_H
12157+#define _UAPI_VS_MONITOR_H
d33d7b00 12158+
4bf69007 12159+#include <linux/types.h>
d33d7b00 12160+
d33d7b00 12161+
4bf69007
AM
12162+enum {
12163+ VXM_UNUSED = 0,
d33d7b00 12164+
4bf69007 12165+ VXM_SYNC = 0x10,
d33d7b00 12166+
4bf69007
AM
12167+ VXM_UPDATE = 0x20,
12168+ VXM_UPDATE_1,
12169+ VXM_UPDATE_2,
d33d7b00 12170+
4bf69007
AM
12171+ VXM_RQINFO_1 = 0x24,
12172+ VXM_RQINFO_2,
d33d7b00 12173+
4bf69007
AM
12174+ VXM_ACTIVATE = 0x40,
12175+ VXM_DEACTIVATE,
12176+ VXM_IDLE,
d33d7b00 12177+
4bf69007
AM
12178+ VXM_HOLD = 0x44,
12179+ VXM_UNHOLD,
d33d7b00 12180+
4bf69007
AM
12181+ VXM_MIGRATE = 0x48,
12182+ VXM_RESCHED,
d33d7b00 12183+
4bf69007
AM
12184+ /* all other bits are flags */
12185+ VXM_SCHED = 0x80,
12186+};
d33d7b00 12187+
4bf69007
AM
12188+struct _vxm_update_1 {
12189+ uint32_t tokens_max;
12190+ uint32_t fill_rate;
12191+ uint32_t interval;
12192+};
d33d7b00 12193+
4bf69007
AM
12194+struct _vxm_update_2 {
12195+ uint32_t tokens_min;
12196+ uint32_t fill_rate;
12197+ uint32_t interval;
12198+};
d33d7b00 12199+
4bf69007
AM
12200+struct _vxm_rqinfo_1 {
12201+ uint16_t running;
12202+ uint16_t onhold;
12203+ uint16_t iowait;
12204+ uint16_t uintr;
12205+ uint32_t idle_tokens;
12206+};
d33d7b00 12207+
4bf69007
AM
12208+struct _vxm_rqinfo_2 {
12209+ uint32_t norm_time;
12210+ uint32_t idle_time;
12211+ uint32_t idle_skip;
12212+};
d33d7b00 12213+
4bf69007
AM
12214+struct _vxm_sched {
12215+ uint32_t tokens;
12216+ uint32_t norm_time;
12217+ uint32_t idle_time;
12218+};
d33d7b00 12219+
4bf69007
AM
12220+struct _vxm_task {
12221+ uint16_t pid;
12222+ uint16_t state;
12223+};
d33d7b00 12224+
4bf69007
AM
12225+struct _vxm_event {
12226+ uint32_t jif;
12227+ union {
12228+ uint32_t seq;
12229+ uint32_t sec;
12230+ };
12231+ union {
12232+ uint32_t tokens;
12233+ uint32_t nsec;
12234+ struct _vxm_task tsk;
12235+ };
12236+};
61b0c03f 12237+
4bf69007
AM
12238+struct _vx_mon_entry {
12239+ uint16_t type;
12240+ uint16_t xid;
12241+ union {
12242+ struct _vxm_event ev;
12243+ struct _vxm_sched sd;
12244+ struct _vxm_update_1 u1;
12245+ struct _vxm_update_2 u2;
12246+ struct _vxm_rqinfo_1 q1;
12247+ struct _vxm_rqinfo_2 q2;
12248+ };
12249+};
d33d7b00 12250+
4bf69007 12251+#endif /* _UAPI_VS_MONITOR_H */
c2806d43
AM
12252diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/network.h linux-4.4/include/uapi/vserver/network.h
12253--- linux-4.4/include/uapi/vserver/network.h 1970-01-01 01:00:00.000000000 +0100
12254+++ linux-4.4/include/uapi/vserver/network.h 2021-02-24 16:56:24.596156409 +0100
12255@@ -0,0 +1,76 @@
12256+#ifndef _UAPI_VS_NETWORK_H
12257+#define _UAPI_VS_NETWORK_H
12258+
12259+#include <linux/types.h>
12260+
12261+
12262+#define MAX_N_CONTEXT 65535 /* Arbitrary limit */
12263+
12264+
12265+/* network flags */
12266+
12267+#define NXF_INFO_PRIVATE 0x00000008
12268+
12269+#define NXF_SINGLE_IP 0x00000100
12270+#define NXF_LBACK_REMAP 0x00000200
12271+#define NXF_LBACK_ALLOW 0x00000400
12272+
12273+#define NXF_HIDE_NETIF 0x02000000
12274+#define NXF_HIDE_LBACK 0x04000000
12275+
12276+#define NXF_STATE_SETUP (1ULL << 32)
12277+#define NXF_STATE_ADMIN (1ULL << 34)
12278+
12279+#define NXF_SC_HELPER (1ULL << 36)
12280+#define NXF_PERSISTENT (1ULL << 38)
12281+
12282+#define NXF_ONE_TIME (0x0005ULL << 32)
12283+
12284+
12285+#define NXF_INIT_SET (__nxf_init_set())
12286+
12287+static inline uint64_t __nxf_init_set(void) {
12288+ return NXF_STATE_ADMIN
12289+#ifdef CONFIG_VSERVER_AUTO_LBACK
12290+ | NXF_LBACK_REMAP
12291+ | NXF_HIDE_LBACK
12292+#endif
12293+#ifdef CONFIG_VSERVER_AUTO_SINGLE
12294+ | NXF_SINGLE_IP
12295+#endif
12296+ | NXF_HIDE_NETIF;
12297+}
12298+
12299+
12300+/* network caps */
12301+
12302+#define NXC_TUN_CREATE 0x00000001
12303+
12304+#define NXC_RAW_ICMP 0x00000100
12305+
12306+#define NXC_MULTICAST 0x00001000
12307+
12308+
12309+/* address types */
12310+
12311+#define NXA_TYPE_IPV4 0x0001
12312+#define NXA_TYPE_IPV6 0x0002
12313+
12314+#define NXA_TYPE_NONE 0x0000
12315+#define NXA_TYPE_ANY 0x00FF
12316+
12317+#define NXA_TYPE_ADDR 0x0010
12318+#define NXA_TYPE_MASK 0x0020
12319+#define NXA_TYPE_RANGE 0x0040
12320+
12321+#define NXA_MASK_ALL (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
12322+
12323+#define NXA_MOD_BCAST 0x0100
12324+#define NXA_MOD_LBACK 0x0200
12325+
12326+#define NXA_LOOPBACK 0x1000
12327+
12328+#define NXA_MASK_BIND (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
12329+#define NXA_MASK_SHOW (NXA_MASK_ALL | NXA_LOOPBACK)
12330+
12331+#endif /* _UAPI_VS_NETWORK_H */
12332diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/network_cmd.h linux-4.4/include/uapi/vserver/network_cmd.h
12333--- linux-4.4/include/uapi/vserver/network_cmd.h 1970-01-01 01:00:00.000000000 +0100
12334+++ linux-4.4/include/uapi/vserver/network_cmd.h 2021-02-24 16:56:24.596156409 +0100
4bf69007
AM
12335@@ -0,0 +1,123 @@
12336+#ifndef _UAPI_VS_NETWORK_CMD_H
12337+#define _UAPI_VS_NETWORK_CMD_H
2380c486 12338+
2380c486 12339+
4bf69007 12340+/* vinfo commands */
2380c486 12341+
4bf69007 12342+#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
2380c486 12343+
2380c486 12344+
4bf69007 12345+#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
2380c486 12346+
4bf69007
AM
12347+struct vcmd_nx_info_v0 {
12348+ uint32_t nid;
12349+ /* more to come */
12350+};
2380c486 12351+
2380c486 12352+
4bf69007
AM
12353+#include <linux/in.h>
12354+#include <linux/in6.h>
2380c486 12355+
4bf69007
AM
12356+#define VCMD_net_create_v0 VC_CMD(VNET, 1, 0)
12357+#define VCMD_net_create VC_CMD(VNET, 1, 1)
2380c486 12358+
4bf69007
AM
12359+struct vcmd_net_create {
12360+ uint64_t flagword;
12361+};
2380c486 12362+
4bf69007 12363+#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0)
2380c486 12364+
4bf69007
AM
12365+#define VCMD_net_add VC_CMD(NETALT, 1, 0)
12366+#define VCMD_net_remove VC_CMD(NETALT, 2, 0)
2380c486 12367+
4bf69007
AM
12368+struct vcmd_net_addr_v0 {
12369+ uint16_t type;
12370+ uint16_t count;
12371+ struct in_addr ip[4];
12372+ struct in_addr mask[4];
12373+};
2380c486 12374+
4bf69007
AM
12375+#define VCMD_net_add_ipv4_v1 VC_CMD(NETALT, 1, 1)
12376+#define VCMD_net_rem_ipv4_v1 VC_CMD(NETALT, 2, 1)
2380c486 12377+
4bf69007
AM
12378+struct vcmd_net_addr_ipv4_v1 {
12379+ uint16_t type;
12380+ uint16_t flags;
12381+ struct in_addr ip;
12382+ struct in_addr mask;
12383+};
2380c486 12384+
4bf69007
AM
12385+#define VCMD_net_add_ipv4 VC_CMD(NETALT, 1, 2)
12386+#define VCMD_net_rem_ipv4 VC_CMD(NETALT, 2, 2)
2380c486 12387+
4bf69007
AM
12388+struct vcmd_net_addr_ipv4_v2 {
12389+ uint16_t type;
12390+ uint16_t flags;
12391+ struct in_addr ip;
12392+ struct in_addr ip2;
12393+ struct in_addr mask;
12394+};
2380c486 12395+
4bf69007
AM
12396+#define VCMD_net_add_ipv6 VC_CMD(NETALT, 3, 1)
12397+#define VCMD_net_remove_ipv6 VC_CMD(NETALT, 4, 1)
2380c486 12398+
4bf69007
AM
12399+struct vcmd_net_addr_ipv6_v1 {
12400+ uint16_t type;
12401+ uint16_t flags;
12402+ uint32_t prefix;
12403+ struct in6_addr ip;
12404+ struct in6_addr mask;
12405+};
2380c486 12406+
4bf69007
AM
12407+#define VCMD_add_match_ipv4 VC_CMD(NETALT, 5, 0)
12408+#define VCMD_get_match_ipv4 VC_CMD(NETALT, 6, 0)
2380c486 12409+
4bf69007
AM
12410+struct vcmd_match_ipv4_v0 {
12411+ uint16_t type;
12412+ uint16_t flags;
12413+ uint16_t parent;
12414+ uint16_t prefix;
12415+ struct in_addr ip;
12416+ struct in_addr ip2;
12417+ struct in_addr mask;
12418+};
2380c486 12419+
4bf69007
AM
12420+#define VCMD_add_match_ipv6 VC_CMD(NETALT, 7, 0)
12421+#define VCMD_get_match_ipv6 VC_CMD(NETALT, 8, 0)
2380c486 12422+
4bf69007
AM
12423+struct vcmd_match_ipv6_v0 {
12424+ uint16_t type;
12425+ uint16_t flags;
12426+ uint16_t parent;
12427+ uint16_t prefix;
12428+ struct in6_addr ip;
12429+ struct in6_addr ip2;
12430+ struct in6_addr mask;
12431+};
2380c486 12432+
2380c486 12433+
2380c486 12434+
2380c486 12435+
4bf69007 12436+/* flag commands */
2380c486 12437+
4bf69007
AM
12438+#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0)
12439+#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0)
2380c486 12440+
4bf69007
AM
12441+struct vcmd_net_flags_v0 {
12442+ uint64_t flagword;
12443+ uint64_t mask;
12444+};
2380c486 12445+
2380c486 12446+
ab30d09f 12447+
4bf69007 12448+/* network caps commands */
ab30d09f 12449+
4bf69007
AM
12450+#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0)
12451+#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0)
ec22aa5c 12452+
4bf69007
AM
12453+struct vcmd_net_caps_v0 {
12454+ uint64_t ncaps;
12455+ uint64_t cmask;
12456+};
3bac966d 12457+
4bf69007 12458+#endif /* _UAPI_VS_NETWORK_CMD_H */
c2806d43
AM
12459diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/sched_cmd.h linux-4.4/include/uapi/vserver/sched_cmd.h
12460--- linux-4.4/include/uapi/vserver/sched_cmd.h 1970-01-01 01:00:00.000000000 +0100
12461+++ linux-4.4/include/uapi/vserver/sched_cmd.h 2021-02-24 16:56:24.596156409 +0100
4bf69007
AM
12462@@ -0,0 +1,13 @@
12463+#ifndef _UAPI_VS_SCHED_CMD_H
12464+#define _UAPI_VS_SCHED_CMD_H
d337f35e 12465+
d337f35e 12466+
4bf69007
AM
12467+struct vcmd_prio_bias {
12468+ int32_t cpu_id;
12469+ int32_t prio_bias;
12470+};
2380c486 12471+
4bf69007
AM
12472+#define VCMD_set_prio_bias VC_CMD(SCHED, 4, 0)
12473+#define VCMD_get_prio_bias VC_CMD(SCHED, 5, 0)
d337f35e 12474+
4bf69007 12475+#endif /* _UAPI_VS_SCHED_CMD_H */
c2806d43
AM
12476diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/signal_cmd.h linux-4.4/include/uapi/vserver/signal_cmd.h
12477--- linux-4.4/include/uapi/vserver/signal_cmd.h 1970-01-01 01:00:00.000000000 +0100
12478+++ linux-4.4/include/uapi/vserver/signal_cmd.h 2021-02-24 16:56:24.596156409 +0100
4bf69007
AM
12479@@ -0,0 +1,31 @@
12480+#ifndef _UAPI_VS_SIGNAL_CMD_H
12481+#define _UAPI_VS_SIGNAL_CMD_H
d337f35e 12482+
d337f35e 12483+
4bf69007 12484+/* signalling vserver commands */
d337f35e 12485+
4bf69007
AM
12486+#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0)
12487+#define VCMD_wait_exit VC_CMD(EVENT, 99, 0)
d337f35e 12488+
4bf69007
AM
12489+struct vcmd_ctx_kill_v0 {
12490+ int32_t pid;
12491+ int32_t sig;
12492+};
d337f35e 12493+
4bf69007
AM
12494+struct vcmd_wait_exit_v0 {
12495+ int32_t reboot_cmd;
12496+ int32_t exit_code;
12497+};
d337f35e 12498+
d337f35e 12499+
4bf69007 12500+/* process alteration commands */
ab30d09f 12501+
4bf69007
AM
12502+#define VCMD_get_pflags VC_CMD(PROCALT, 5, 0)
12503+#define VCMD_set_pflags VC_CMD(PROCALT, 6, 0)
d337f35e 12504+
4bf69007
AM
12505+struct vcmd_pflags_v0 {
12506+ uint32_t flagword;
12507+ uint32_t mask;
12508+};
3bac966d 12509+
4bf69007 12510+#endif /* _UAPI_VS_SIGNAL_CMD_H */
c2806d43
AM
12511diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/space_cmd.h linux-4.4/include/uapi/vserver/space_cmd.h
12512--- linux-4.4/include/uapi/vserver/space_cmd.h 1970-01-01 01:00:00.000000000 +0100
12513+++ linux-4.4/include/uapi/vserver/space_cmd.h 2021-02-24 16:56:24.596156409 +0100
4bf69007
AM
12514@@ -0,0 +1,28 @@
12515+#ifndef _UAPI_VS_SPACE_CMD_H
12516+#define _UAPI_VS_SPACE_CMD_H
d337f35e 12517+
d337f35e 12518+
4bf69007
AM
12519+#define VCMD_enter_space_v0 VC_CMD(PROCALT, 1, 0)
12520+#define VCMD_enter_space_v1 VC_CMD(PROCALT, 1, 1)
12521+#define VCMD_enter_space VC_CMD(PROCALT, 1, 2)
2380c486 12522+
4bf69007
AM
12523+#define VCMD_set_space_v0 VC_CMD(PROCALT, 3, 0)
12524+#define VCMD_set_space_v1 VC_CMD(PROCALT, 3, 1)
12525+#define VCMD_set_space VC_CMD(PROCALT, 3, 2)
d337f35e 12526+
4bf69007 12527+#define VCMD_get_space_mask_v0 VC_CMD(PROCALT, 4, 0)
d337f35e 12528+
4bf69007
AM
12529+#define VCMD_get_space_mask VC_CMD(VSPACE, 0, 1)
12530+#define VCMD_get_space_default VC_CMD(VSPACE, 1, 0)
d337f35e 12531+
d337f35e 12532+
4bf69007
AM
12533+struct vcmd_space_mask_v1 {
12534+ uint64_t mask;
12535+};
d337f35e 12536+
4bf69007
AM
12537+struct vcmd_space_mask_v2 {
12538+ uint64_t mask;
12539+ uint32_t index;
12540+};
d337f35e 12541+
4bf69007 12542+#endif /* _UAPI_VS_SPACE_CMD_H */
c2806d43
AM
12543diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/switch.h linux-4.4/include/uapi/vserver/switch.h
12544--- linux-4.4/include/uapi/vserver/switch.h 1970-01-01 01:00:00.000000000 +0100
12545+++ linux-4.4/include/uapi/vserver/switch.h 2021-02-24 16:56:24.596156409 +0100
4bf69007
AM
12546@@ -0,0 +1,90 @@
12547+#ifndef _UAPI_VS_SWITCH_H
12548+#define _UAPI_VS_SWITCH_H
d337f35e 12549+
4bf69007 12550+#include <linux/types.h>
d337f35e 12551+
d337f35e 12552+
4bf69007
AM
12553+#define VC_CATEGORY(c) (((c) >> 24) & 0x3F)
12554+#define VC_COMMAND(c) (((c) >> 16) & 0xFF)
12555+#define VC_VERSION(c) ((c) & 0xFFF)
d337f35e 12556+
4bf69007
AM
12557+#define VC_CMD(c, i, v) ((((VC_CAT_ ## c) & 0x3F) << 24) \
12558+ | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
d337f35e 12559+
4bf69007 12560+/*
d337f35e 12561+
4bf69007 12562+ Syscall Matrix V2.8
d337f35e 12563+
4bf69007
AM
12564+ |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
12565+ |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | |
12566+ |INFO |SETUP | |MOVE | | | | | |
12567+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12568+ SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICE | |
12569+ HOST | 00| 01| 02| 03| 04| 05| | 06| 07|
12570+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12571+ CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | |
12572+ PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15|
12573+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12574+ MEMORY | | | | |MEMCTRL| | |SWAP | |
12575+ | 16| 17| 18| 19| 20| 21| | 22| 23|
12576+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12577+ NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | |
12578+ | 24| 25| 26| 27| 28| 29| | 30| 31|
12579+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12580+ DISK | | | |TAGMIG |DLIMIT | | |INODE | |
12581+ VFS | 32| 33| 34| 35| 36| 37| | 38| 39|
12582+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12583+ OTHER |VSTAT | | | | | | |VINFO | |
12584+ | 40| 41| 42| 43| 44| 45| | 46| 47|
12585+ =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
12586+ SPECIAL|EVENT | | | |FLAGS | | |VSPACE | |
12587+ | 48| 49| 50| 51| 52| 53| | 54| 55|
12588+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12589+ SPECIAL|DEBUG | | | |RLIMIT |SYSCALL| | |COMPAT |
12590+ | 56| 57| 58| 59| 60|TEST 61| | 62| 63|
12591+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
d337f35e 12592+
4bf69007 12593+*/
d337f35e 12594+
4bf69007 12595+#define VC_CAT_VERSION 0
d337f35e 12596+
4bf69007
AM
12597+#define VC_CAT_VSETUP 1
12598+#define VC_CAT_VHOST 2
d337f35e 12599+
4bf69007 12600+#define VC_CAT_DEVICE 6
d337f35e 12601+
4bf69007
AM
12602+#define VC_CAT_VPROC 9
12603+#define VC_CAT_PROCALT 10
12604+#define VC_CAT_PROCMIG 11
12605+#define VC_CAT_PROCTRL 12
d337f35e 12606+
4bf69007
AM
12607+#define VC_CAT_SCHED 14
12608+#define VC_CAT_MEMCTRL 20
d337f35e 12609+
4bf69007
AM
12610+#define VC_CAT_VNET 25
12611+#define VC_CAT_NETALT 26
12612+#define VC_CAT_NETMIG 27
12613+#define VC_CAT_NETCTRL 28
d337f35e 12614+
4bf69007
AM
12615+#define VC_CAT_TAGMIG 35
12616+#define VC_CAT_DLIMIT 36
12617+#define VC_CAT_INODE 38
d337f35e 12618+
4bf69007
AM
12619+#define VC_CAT_VSTAT 40
12620+#define VC_CAT_VINFO 46
12621+#define VC_CAT_EVENT 48
d337f35e 12622+
4bf69007
AM
12623+#define VC_CAT_FLAGS 52
12624+#define VC_CAT_VSPACE 54
12625+#define VC_CAT_DEBUG 56
12626+#define VC_CAT_RLIMIT 60
d337f35e 12627+
4bf69007
AM
12628+#define VC_CAT_SYSTEST 61
12629+#define VC_CAT_COMPAT 63
d337f35e 12630+
4bf69007 12631+/* query version */
d337f35e 12632+
4bf69007
AM
12633+#define VCMD_get_version VC_CMD(VERSION, 0, 0)
12634+#define VCMD_get_vci VC_CMD(VERSION, 1, 0)
2380c486 12635+
4bf69007 12636+#endif /* _UAPI_VS_SWITCH_H */
c2806d43
AM
12637diff -urNp -x '*.orig' linux-4.4/include/uapi/vserver/tag_cmd.h linux-4.4/include/uapi/vserver/tag_cmd.h
12638--- linux-4.4/include/uapi/vserver/tag_cmd.h 1970-01-01 01:00:00.000000000 +0100
12639+++ linux-4.4/include/uapi/vserver/tag_cmd.h 2021-02-24 16:56:24.596156409 +0100
4bf69007
AM
12640@@ -0,0 +1,14 @@
12641+#ifndef _UAPI_VS_TAG_CMD_H
12642+#define _UAPI_VS_TAG_CMD_H
d337f35e 12643+
d337f35e 12644+
4bf69007 12645+/* vinfo commands */
d337f35e 12646+
4bf69007 12647+#define VCMD_task_tag VC_CMD(VINFO, 3, 0)
d337f35e
JR
12648+
12649+
4bf69007 12650+/* context commands */
d337f35e 12651+
4bf69007 12652+#define VCMD_tag_migrate VC_CMD(TAGMIG, 1, 0)
2380c486 12653+
4bf69007 12654+#endif /* _UAPI_VS_TAG_CMD_H */
c2806d43
AM
12655diff -urNp -x '*.orig' linux-4.4/init/Kconfig linux-4.4/init/Kconfig
12656--- linux-4.4/init/Kconfig 2021-02-24 16:56:11.929090822 +0100
12657+++ linux-4.4/init/Kconfig 2021-02-24 16:56:24.596156409 +0100
12658@@ -938,6 +938,7 @@ config NUMA_BALANCING_DEFAULT_ENABLED
4bf69007 12659 menuconfig CGROUPS
927ca606 12660 bool "Control Group support"
265de2f7 12661 select KERNFS
4bf69007
AM
12662+ default y
12663 help
12664 This option adds support for grouping sets of processes together, for
12665 use with process control subsystems such as Cpusets, CFS, memory
c2806d43
AM
12666diff -urNp -x '*.orig' linux-4.4/init/main.c linux-4.4/init/main.c
12667--- linux-4.4/init/main.c 2021-02-24 16:56:11.929090822 +0100
12668+++ linux-4.4/init/main.c 2021-02-24 16:56:24.596156409 +0100
927ca606
AM
12669@@ -82,6 +82,7 @@
12670 #include <linux/proc_ns.h>
12671 #include <linux/io.h>
12672 #include <linux/kaiser.h>
4bf69007
AM
12673+#include <linux/vserver/percpu.h>
12674
12675 #include <asm/io.h>
12676 #include <asm/bugs.h>
c2806d43
AM
12677diff -urNp -x '*.orig' linux-4.4/ipc/mqueue.c linux-4.4/ipc/mqueue.c
12678--- linux-4.4/ipc/mqueue.c 2021-02-24 16:56:11.932424260 +0100
12679+++ linux-4.4/ipc/mqueue.c 2021-02-24 16:56:24.596156409 +0100
4bf69007
AM
12680@@ -35,6 +35,8 @@
12681 #include <linux/ipc_namespace.h>
12682 #include <linux/user_namespace.h>
12683 #include <linux/slab.h>
12684+#include <linux/vs_context.h>
12685+#include <linux/vs_limit.h>
12686
12687 #include <net/sock.h>
12688 #include "util.h"
927ca606 12689@@ -75,6 +77,7 @@ struct mqueue_inode_info {
bb20add7 12690 struct pid *notify_owner;
4bf69007
AM
12691 struct user_namespace *notify_user_ns;
12692 struct user_struct *user; /* user who created, for accounting */
12693+ struct vx_info *vxi;
12694 struct sock *notify_sock;
12695 struct sk_buff *notify_cookie;
12696
927ca606 12697@@ -230,6 +233,7 @@ static struct inode *mqueue_get_inode(st
4bf69007
AM
12698 if (S_ISREG(mode)) {
12699 struct mqueue_inode_info *info;
12700 unsigned long mq_bytes, mq_treesize;
12701+ struct vx_info *vxi = current_vx_info();
12702
12703 inode->i_fop = &mqueue_file_operations;
12704 inode->i_size = FILENT_SIZE;
927ca606 12705@@ -243,6 +247,7 @@ static struct inode *mqueue_get_inode(st
4bf69007
AM
12706 info->notify_user_ns = NULL;
12707 info->qsize = 0;
12708 info->user = NULL; /* set when all is ok */
12709+ info->vxi = NULL;
12710 info->msg_tree = RB_ROOT;
12711 info->node_cache = NULL;
12712 memset(&info->attr, 0, sizeof(info->attr));
927ca606 12713@@ -276,17 +281,20 @@ static struct inode *mqueue_get_inode(st
4bf69007
AM
12714
12715 spin_lock(&mq_lock);
12716 if (u->mq_bytes + mq_bytes < u->mq_bytes ||
12717- u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
12718+ u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE) ||
12719+ !vx_ipcmsg_avail(vxi, mq_bytes)) {
12720 spin_unlock(&mq_lock);
12721 /* mqueue_evict_inode() releases info->messages */
12722 ret = -EMFILE;
12723 goto out_inode;
12724 }
12725 u->mq_bytes += mq_bytes;
12726+ vx_ipcmsg_add(vxi, u, mq_bytes);
12727 spin_unlock(&mq_lock);
12728
12729 /* all is ok */
12730 info->user = get_uid(u);
12731+ info->vxi = get_vx_info(vxi);
12732 } else if (S_ISDIR(mode)) {
12733 inc_nlink(inode);
12734 /* Some things misbehave if size == 0 on a directory */
3cc86a71
AM
12735@@ -395,6 +403,7 @@ static void mqueue_evict_inode(struct in
12736
4bf69007
AM
12737 user = info->user;
12738 if (user) {
12739+ struct vx_info *vxi = info->vxi;
3cc86a71 12740 unsigned long mq_bytes, mq_treesize;
7c48c125
AM
12741
12742 /* Total amount of bytes accounted for the mqueue */
3cc86a71 12743@@ -407,6 +416,7 @@ static void mqueue_evict_inode(struct in
7c48c125 12744
4bf69007
AM
12745 spin_lock(&mq_lock);
12746 user->mq_bytes -= mq_bytes;
12747+ vx_ipcmsg_sub(vxi, user, mq_bytes);
12748 /*
12749 * get_ns_from_inode() ensures that the
12750 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
3cc86a71 12751@@ -416,6 +426,7 @@ static void mqueue_evict_inode(struct in
4bf69007
AM
12752 if (ipc_ns)
12753 ipc_ns->mq_queues_count--;
12754 spin_unlock(&mq_lock);
12755+ put_vx_info(vxi);
12756 free_uid(user);
12757 }
12758 if (ipc_ns)
c2806d43
AM
12759diff -urNp -x '*.orig' linux-4.4/ipc/msg.c linux-4.4/ipc/msg.c
12760--- linux-4.4/ipc/msg.c 2021-02-24 16:56:11.932424260 +0100
12761+++ linux-4.4/ipc/msg.c 2021-02-24 16:56:24.596156409 +0100
4bf69007
AM
12762@@ -37,6 +37,7 @@
12763 #include <linux/rwsem.h>
12764 #include <linux/nsproxy.h>
12765 #include <linux/ipc_namespace.h>
12766+#include <linux/vs_base.h>
12767
12768 #include <asm/current.h>
bb20add7
AM
12769 #include <linux/uaccess.h>
12770@@ -129,6 +130,7 @@ static int newque(struct ipc_namespace *
4bf69007
AM
12771
12772 msq->q_perm.mode = msgflg & S_IRWXUGO;
12773 msq->q_perm.key = key;
12774+ msq->q_perm.xid = vx_current_xid();
12775
12776 msq->q_perm.security = NULL;
12777 retval = security_msg_queue_alloc(msq);
c2806d43
AM
12778diff -urNp -x '*.orig' linux-4.4/ipc/sem.c linux-4.4/ipc/sem.c
12779--- linux-4.4/ipc/sem.c 2021-02-24 16:56:11.932424260 +0100
12780+++ linux-4.4/ipc/sem.c 2021-02-24 16:56:24.596156409 +0100
bb20add7 12781@@ -85,6 +85,8 @@
4bf69007
AM
12782 #include <linux/rwsem.h>
12783 #include <linux/nsproxy.h>
12784 #include <linux/ipc_namespace.h>
12785+#include <linux/vs_base.h>
12786+#include <linux/vs_limit.h>
12787
bb20add7 12788 #include <linux/uaccess.h>
4bf69007 12789 #include "util.h"
927ca606 12790@@ -533,6 +535,7 @@ static int newary(struct ipc_namespace *
4bf69007
AM
12791
12792 sma->sem_perm.mode = (semflg & S_IRWXUGO);
12793 sma->sem_perm.key = key;
12794+ sma->sem_perm.xid = vx_current_xid();
12795
12796 sma->sem_perm.security = NULL;
12797 retval = security_sem_alloc(sma);
927ca606 12798@@ -563,6 +566,9 @@ static int newary(struct ipc_namespace *
4bf69007
AM
12799 return id;
12800 }
12801 ns->used_sems += nsems;
12802+ /* FIXME: obsoleted? */
12803+ vx_semary_inc(sma);
12804+ vx_nsems_add(sma, nsems);
12805
bb20add7
AM
12806 sem_unlock(sma, -1);
12807 rcu_read_unlock();
927ca606 12808@@ -1151,6 +1157,9 @@ static void freeary(struct ipc_namespace
4bf69007
AM
12809
12810 wake_up_sem_queue_do(&tasks);
12811 ns->used_sems -= sma->sem_nsems;
12812+ /* FIXME: obsoleted? */
12813+ vx_nsems_sub(sma, sma->sem_nsems);
12814+ vx_semary_dec(sma);
926e38e0 12815 ipc_rcu_putref(sma, sem_rcu_free);
4bf69007 12816 }
926e38e0 12817
c2806d43
AM
12818diff -urNp -x '*.orig' linux-4.4/ipc/shm.c linux-4.4/ipc/shm.c
12819--- linux-4.4/ipc/shm.c 2021-02-24 16:56:11.932424260 +0100
12820+++ linux-4.4/ipc/shm.c 2021-02-24 16:56:24.599489848 +0100
c2e5f7c8 12821@@ -42,6 +42,8 @@
4bf69007
AM
12822 #include <linux/nsproxy.h>
12823 #include <linux/mount.h>
12824 #include <linux/ipc_namespace.h>
12825+#include <linux/vs_context.h>
12826+#include <linux/vs_limit.h>
12827
bb20add7 12828 #include <linux/uaccess.h>
4bf69007 12829
8931d859 12830@@ -234,10 +236,14 @@ static void shm_open(struct vm_area_stru
4bf69007
AM
12831 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
12832 {
c2e5f7c8 12833 struct file *shm_file;
4bf69007
AM
12834+ struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
12835+ int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
c2e5f7c8
JR
12836
12837 shm_file = shp->shm_file;
12838 shp->shm_file = NULL;
12839- ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
4bf69007
AM
12840+ vx_ipcshm_sub(vxi, shp, numpages);
12841+ ns->shm_tot -= numpages;
d337f35e 12842+
eb458c31 12843 shm_rmid(shp);
4bf69007 12844 shm_unlock(shp);
c2e5f7c8 12845 if (!is_file_hugepages(shm_file))
8931d859 12846@@ -246,6 +252,7 @@ static void shm_destroy(struct ipc_names
927ca606
AM
12847 user_shm_unlock(i_size_read(file_inode(shm_file)),
12848 shp->mlock_user);
c2e5f7c8 12849 fput(shm_file);
4bf69007 12850+ put_vx_info(vxi);
926e38e0 12851 ipc_rcu_putref(shp, shm_rcu_free);
4bf69007
AM
12852 }
12853
8931d859 12854@@ -545,11 +552,15 @@ static int newseg(struct ipc_namespace *
bb20add7 12855 ns->shm_tot + numpages > ns->shm_ctlall)
4bf69007
AM
12856 return -ENOSPC;
12857
12858+ if (!vx_ipcshm_avail(current_vx_info(), numpages))
12859+ return -ENOSPC;
d337f35e 12860+
4bf69007
AM
12861 shp = ipc_rcu_alloc(sizeof(*shp));
12862 if (!shp)
12863 return -ENOMEM;
12864
12865 shp->shm_perm.key = key;
12866+ shp->shm_perm.xid = vx_current_xid();
12867 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
12868 shp->mlock_user = NULL;
12869
8931d859 12870@@ -620,6 +631,7 @@ static int newseg(struct ipc_namespace *
926e38e0
JR
12871
12872 ipc_unlock_object(&shp->shm_perm);
12873 rcu_read_unlock();
4bf69007
AM
12874+ vx_ipcshm_add(current_vx_info(), key, numpages);
12875 return error;
12876
12877 no_id:
c2806d43
AM
12878diff -urNp -x '*.orig' linux-4.4/kernel/Makefile linux-4.4/kernel/Makefile
12879--- linux-4.4/kernel/Makefile 2021-02-24 16:56:11.932424260 +0100
12880+++ linux-4.4/kernel/Makefile 2021-02-24 16:56:24.599489848 +0100
12881@@ -29,6 +29,7 @@ obj-y += printk/
12882 obj-y += irq/
12883 obj-y += rcu/
12884 obj-y += livepatch/
12885+obj-y += vserver/
12886
12887 obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
12888 obj-$(CONFIG_FREEZER) += freezer.o
12889diff -urNp -x '*.orig' linux-4.4/kernel/auditsc.c linux-4.4/kernel/auditsc.c
12890--- linux-4.4/kernel/auditsc.c 2021-02-24 16:56:11.935757699 +0100
12891+++ linux-4.4/kernel/auditsc.c 2021-02-24 16:56:24.599489848 +0100
8931d859 12892@@ -1964,7 +1964,7 @@ static int audit_set_loginuid_perm(kuid_
c2e5f7c8 12893 if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
4bf69007 12894 return -EPERM;
c2e5f7c8 12895 /* it is set, you need permission */
4bf69007
AM
12896- if (!capable(CAP_AUDIT_CONTROL))
12897+ if (!vx_capable(CAP_AUDIT_CONTROL, VXC_AUDIT_CONTROL))
12898 return -EPERM;
c2e5f7c8
JR
12899 /* reject if this is not an unset and we don't allow that */
12900 if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid))
c2806d43
AM
12901diff -urNp -x '*.orig' linux-4.4/kernel/capability.c linux-4.4/kernel/capability.c
12902--- linux-4.4/kernel/capability.c 2021-02-24 16:56:11.935757699 +0100
12903+++ linux-4.4/kernel/capability.c 2021-02-24 16:56:24.599489848 +0100
bb20add7 12904@@ -17,6 +17,7 @@
4bf69007
AM
12905 #include <linux/syscalls.h>
12906 #include <linux/pid_namespace.h>
12907 #include <linux/user_namespace.h>
12908+#include <linux/vs_context.h>
12909 #include <asm/uaccess.h>
12910
12911 /*
927ca606 12912@@ -107,6 +108,7 @@ static int cap_validate_magic(cap_user_h
4bf69007
AM
12913 return 0;
12914 }
12915
2380c486 12916+
4bf69007
AM
12917 /*
12918 * The only thing that can change the capabilities of the current
12919 * process is the current process. As such, we can't be in this code
927ca606 12920@@ -344,6 +346,8 @@ bool has_ns_capability_noaudit(struct ta
4bf69007
AM
12921 return (ret == 0);
12922 }
12923
12924+#include <linux/vserver/base.h>
d337f35e 12925+
4bf69007
AM
12926 /**
12927 * has_capability_noaudit - Does a task have a capability (unaudited) in the
12928 * initial user ns
c2806d43
AM
12929diff -urNp -x '*.orig' linux-4.4/kernel/compat.c linux-4.4/kernel/compat.c
12930--- linux-4.4/kernel/compat.c 2016-01-11 00:01:32.000000000 +0100
12931+++ linux-4.4/kernel/compat.c 2021-02-24 16:56:24.599489848 +0100
4bf69007
AM
12932@@ -27,6 +27,7 @@
12933 #include <linux/times.h>
12934 #include <linux/ptrace.h>
12935 #include <linux/gfp.h>
12936+#include <linux/vs_time.h>
12937
12938 #include <asm/uaccess.h>
12939
927ca606 12940@@ -1059,7 +1060,7 @@ COMPAT_SYSCALL_DEFINE1(stime, compat_tim
4bf69007
AM
12941 if (err)
12942 return err;
12943
12944- do_settimeofday(&tv);
12945+ vx_settimeofday(&tv);
12946 return 0;
12947 }
12948
c2806d43
AM
12949diff -urNp -x '*.orig' linux-4.4/kernel/cred.c linux-4.4/kernel/cred.c
12950--- linux-4.4/kernel/cred.c 2021-02-24 16:56:11.939091137 +0100
12951+++ linux-4.4/kernel/cred.c 2021-02-24 16:56:24.599489848 +0100
927ca606 12952@@ -64,31 +64,6 @@ struct cred init_cred = {
b00e13aa 12953 .group_info = &init_groups,
4bf69007
AM
12954 };
12955
12956-static inline void set_cred_subscribers(struct cred *cred, int n)
12957-{
12958-#ifdef CONFIG_DEBUG_CREDENTIALS
12959- atomic_set(&cred->subscribers, n);
12960-#endif
12961-}
12962-
12963-static inline int read_cred_subscribers(const struct cred *cred)
12964-{
12965-#ifdef CONFIG_DEBUG_CREDENTIALS
12966- return atomic_read(&cred->subscribers);
12967-#else
12968- return 0;
12969-#endif
12970-}
12971-
12972-static inline void alter_cred_subscribers(const struct cred *_cred, int n)
12973-{
12974-#ifdef CONFIG_DEBUG_CREDENTIALS
12975- struct cred *cred = (struct cred *) _cred;
12976-
12977- atomic_add(n, &cred->subscribers);
12978-#endif
12979-}
12980-
12981 /*
b00e13aa 12982 * The RCU callback to actually dispose of a set of credentials
4bf69007 12983 */
3cc86a71 12984@@ -243,21 +218,16 @@ error:
4bf69007
AM
12985 *
12986 * Call commit_creds() or abort_creds() to clean up.
12987 */
12988-struct cred *prepare_creds(void)
12989+struct cred *__prepare_creds(const struct cred *old)
12990 {
12991- struct task_struct *task = current;
12992- const struct cred *old;
12993 struct cred *new;
12994
12995- validate_process_creds();
12996-
12997 new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
12998 if (!new)
12999 return NULL;
13000
13001 kdebug("prepare_creds() alloc %p", new);
13002
13003- old = task->cred;
13004 memcpy(new, old, sizeof(struct cred));
13005
3cc86a71
AM
13006 new->non_rcu = 0;
13007@@ -287,6 +257,13 @@ error:
4bf69007
AM
13008 abort_creds(new);
13009 return NULL;
13010 }
d337f35e 13011+
4bf69007 13012+struct cred *prepare_creds(void)
2380c486 13013+{
4bf69007 13014+ validate_process_creds();
d337f35e 13015+
4bf69007 13016+ return __prepare_creds(current->cred);
2380c486 13017+}
4bf69007
AM
13018 EXPORT_SYMBOL(prepare_creds);
13019
13020 /*
c2806d43
AM
13021diff -urNp -x '*.orig' linux-4.4/kernel/exit.c linux-4.4/kernel/exit.c
13022--- linux-4.4/kernel/exit.c 2021-02-24 16:56:11.945758014 +0100
13023+++ linux-4.4/kernel/exit.c 2021-02-24 16:56:24.599489848 +0100
4bf69007
AM
13024@@ -48,6 +48,10 @@
13025 #include <linux/fs_struct.h>
13026 #include <linux/init_task.h>
13027 #include <linux/perf_event.h>
13028+#include <linux/vs_limit.h>
13029+#include <linux/vs_context.h>
13030+#include <linux/vs_network.h>
13031+#include <linux/vs_pid.h>
13032 #include <trace/events/sched.h>
13033 #include <linux/hw_breakpoint.h>
13034 #include <linux/oom.h>
c2806d43 13035@@ -460,15 +464,25 @@ static struct task_struct *find_child_re
3cc86a71 13036 {
4bf69007 13037 struct pid_namespace *pid_ns = task_active_pid_ns(father);
927ca606 13038 struct task_struct *reaper = pid_ns->child_reaper;
4bf69007 13039+ struct vx_info *vxi = task_get_vx_info(father);
3cc86a71
AM
13040 struct task_struct *p, *n;
13041
4bf69007
AM
13042+ if (vxi) {
13043+ BUG_ON(!vxi->vx_reaper);
13044+ if (vxi->vx_reaper != init_pid_ns.child_reaper &&
927ca606 13045+ vxi->vx_reaper != father) {
4bf69007 13046+ reaper = vxi->vx_reaper;
927ca606
AM
13047+ goto out_put;
13048+ }
13049+ }
3cc86a71 13050+
927ca606
AM
13051 if (likely(reaper != father))
13052- return reaper;
13053+ goto out_put;
13054
13055 reaper = find_alive_thread(father);
13056 if (reaper) {
13057 pid_ns->child_reaper = reaper;
13058- return reaper;
13059+ goto out_put;
4bf69007
AM
13060 }
13061
927ca606 13062 write_unlock_irq(&tasklist_lock);
c2806d43 13063@@ -485,7 +499,10 @@ static struct task_struct *find_child_re
927ca606
AM
13064 zap_pid_ns_processes(pid_ns);
13065 write_lock_irq(&tasklist_lock);
13066
13067- return father;
13068+ reaper = father;
4bf69007
AM
13069+out_put:
13070+ put_vx_info(vxi);
13071+ return reaper;
13072 }
13073
13074 /*
c2806d43 13075@@ -573,9 +590,13 @@ static void forget_original_parent(struc
927ca606 13076 return;
bb20add7 13077
927ca606
AM
13078 reaper = find_new_reaper(father, reaper);
13079- list_for_each_entry(p, &father->children, sibling) {
13080+ for (p = list_first_entry(&father->children, struct task_struct, sibling);
13081+ &p->sibling != &father->children; ) {
13082+ struct task_struct *next, *this_reaper = reaper;
13083+ if (p == reaper)
13084+ this_reaper = task_active_pid_ns(reaper)->child_reaper;
13085 for_each_thread(p, t) {
4bf69007 13086- t->real_parent = reaper;
927ca606
AM
13087+ t->real_parent = this_reaper;
13088 BUG_ON((!t->ptrace) != (t->parent == father));
13089 if (likely(!t->ptrace))
13090 t->parent = t->real_parent;
c2806d43 13091@@ -587,10 +608,13 @@ static void forget_original_parent(struc
927ca606
AM
13092 * If this is a threaded reparent there is no need to
13093 * notify anyone anything has happened.
13094 */
13095- if (!same_thread_group(reaper, father))
13096+ if (!same_thread_group(this_reaper, father))
13097 reparent_leader(father, p, dead);
13098+ next = list_next_entry(p, sibling);
13099+ list_add(&p->sibling, &this_reaper->children);
13100+ p = next;
13101 }
13102- list_splice_tail_init(&father->children, &reaper->children);
13103+ INIT_LIST_HEAD(&father->children);
13104 }
13105
13106 /*
c2806d43 13107@@ -759,6 +783,9 @@ void do_exit(long code)
4bf69007 13108 */
c2e5f7c8 13109 flush_ptrace_hw_breakpoint(tsk);
4bf69007
AM
13110
13111+ /* needs to stay before exit_notify() */
13112+ exit_vx_info_early(tsk, code);
d337f35e 13113+
927ca606 13114 TASKS_RCU(preempt_disable());
bb20add7 13115 TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
927ca606 13116 TASKS_RCU(preempt_enable());
c2806d43 13117@@ -790,6 +817,10 @@ void do_exit(long code)
4bf69007 13118
0e1bbc97
AM
13119 validate_creds_for_do_exit(tsk);
13120
13121+ /* needs to stay after exit_notify() and before preempt_disable() */
4bf69007
AM
13122+ exit_vx_info(tsk, code);
13123+ exit_nx_info(tsk);
d337f35e 13124+
0e1bbc97
AM
13125 check_stack_usage();
13126 preempt_disable();
13127 if (tsk->nr_dirtied)
c2806d43 13128@@ -816,6 +847,7 @@ void do_exit(long code)
4bf69007
AM
13129 tsk->state = TASK_DEAD;
13130 tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
13131 schedule();
13132+ printk("bad task: %p [%lx]\n", current, current->state);
13133 BUG();
13134 /* Avoid "noreturn function does return". */
13135 for (;;)
c2806d43
AM
13136diff -urNp -x '*.orig' linux-4.4/kernel/fork.c linux-4.4/kernel/fork.c
13137--- linux-4.4/kernel/fork.c 2021-02-24 16:56:11.945758014 +0100
13138+++ linux-4.4/kernel/fork.c 2021-02-24 16:56:24.599489848 +0100
927ca606 13139@@ -76,6 +76,9 @@
09be7631 13140 #include <linux/aio.h>
265de2f7 13141 #include <linux/compiler.h>
927ca606 13142 #include <linux/sysctl.h>
4bf69007
AM
13143+#include <linux/vs_context.h>
13144+#include <linux/vs_network.h>
13145+#include <linux/vs_limit.h>
13146
13147 #include <asm/pgtable.h>
13148 #include <asm/pgalloc.h>
927ca606 13149@@ -227,6 +230,8 @@ void free_task(struct task_struct *tsk)
4bf69007
AM
13150 arch_release_thread_info(tsk->stack);
13151 free_thread_info(tsk->stack);
13152 rt_mutex_debug_task_free(tsk);
13153+ clr_vx_info(&tsk->vx_info);
13154+ clr_nx_info(&tsk->nx_info);
13155 ftrace_graph_exit_task(tsk);
13156 put_seccomp_filter(tsk);
13157 arch_release_task_struct(tsk);
c2806d43 13158@@ -1280,6 +1285,8 @@ static struct task_struct *copy_process(
8d50a2ea 13159 {
4bf69007
AM
13160 int retval;
13161 struct task_struct *p;
4bf69007
AM
13162+ struct vx_info *vxi;
13163+ struct nx_info *nxi;
927ca606 13164 void *cgrp_ss_priv[CGROUP_CANFORK_COUNT] = {};
4bf69007
AM
13165
13166 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
c2806d43 13167@@ -1353,7 +1360,12 @@ static struct task_struct *copy_process(
4bf69007
AM
13168 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
13169 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
13170 #endif
13171+ init_vx_info(&p->vx_info, current_vx_info());
13172+ init_nx_info(&p->nx_info, current_nx_info());
13173+
13174 retval = -EAGAIN;
13175+ if (!vx_nproc_avail(1))
13176+ goto bad_fork_free;
13177 if (atomic_read(&p->real_cred->user->processes) >=
13178 task_rlimit(p, RLIMIT_NPROC)) {
c2e5f7c8 13179 if (p->real_cred->user != INIT_USER &&
c2806d43 13180@@ -1648,6 +1660,18 @@ static struct task_struct *copy_process(
4bf69007
AM
13181 total_forks++;
13182 spin_unlock(&current->sighand->siglock);
bb20add7 13183 syscall_tracepoint_update(p);
4bf69007
AM
13184+
13185+ /* p is copy of current */
13186+ vxi = p->vx_info;
13187+ if (vxi) {
13188+ claim_vx_info(vxi, p);
13189+ atomic_inc(&vxi->cvirt.nr_threads);
13190+ atomic_inc(&vxi->cvirt.total_forks);
13191+ vx_nproc_inc(p);
2380c486 13192+ }
4bf69007
AM
13193+ nxi = p->nx_info;
13194+ if (nxi)
13195+ claim_nx_info(nxi, p);
13196 write_unlock_irq(&tasklist_lock);
bb20add7 13197
4bf69007 13198 proc_fork_connector(p);
c2806d43
AM
13199diff -urNp -x '*.orig' linux-4.4/kernel/kthread.c linux-4.4/kernel/kthread.c
13200--- linux-4.4/kernel/kthread.c 2021-02-24 16:56:11.949091452 +0100
13201+++ linux-4.4/kernel/kthread.c 2021-02-24 16:56:24.599489848 +0100
927ca606 13202@@ -19,6 +19,7 @@
4bf69007 13203 #include <linux/ptrace.h>
09be7631 13204 #include <linux/uaccess.h>
927ca606 13205 #include <linux/cgroup.h>
4bf69007
AM
13206+#include <linux/vs_pid.h>
13207 #include <trace/events/sched.h>
13208
13209 static DEFINE_SPINLOCK(kthread_create_lock);
c2806d43
AM
13210diff -urNp -x '*.orig' linux-4.4/kernel/nsproxy.c linux-4.4/kernel/nsproxy.c
13211--- linux-4.4/kernel/nsproxy.c 2016-01-11 00:01:32.000000000 +0100
13212+++ linux-4.4/kernel/nsproxy.c 2021-02-24 16:56:24.599489848 +0100
4bf69007
AM
13213@@ -20,11 +20,14 @@
13214 #include <linux/mnt_namespace.h>
13215 #include <linux/utsname.h>
13216 #include <linux/pid_namespace.h>
13217+#include <linux/vserver/global.h>
13218+#include <linux/vserver/debug.h>
13219 #include <net/net_namespace.h>
13220 #include <linux/ipc_namespace.h>
09be7631 13221 #include <linux/proc_ns.h>
4bf69007
AM
13222 #include <linux/file.h>
13223 #include <linux/syscalls.h>
13224+#include "../fs/mount.h"
13225
13226 static struct kmem_cache *nsproxy_cachep;
13227
13228@@ -46,8 +49,11 @@ static inline struct nsproxy *create_nsp
13229 struct nsproxy *nsproxy;
13230
13231 nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
13232- if (nsproxy)
13233+ if (nsproxy) {
13234 atomic_set(&nsproxy->count, 1);
13235+ atomic_inc(&vs_global_nsproxy);
13236+ }
13237+ vxdprintk(VXD_CBIT(space, 2), "create_nsproxy = %p[1]", nsproxy);
13238 return nsproxy;
13239 }
13240
b00e13aa 13241@@ -56,9 +62,12 @@ static inline struct nsproxy *create_nsp
4bf69007
AM
13242 * Return the newly created nsproxy. Do not attach this to the task,
13243 * leave it to the caller to do proper locking and attach it to task.
13244 */
13245-static struct nsproxy *create_new_namespaces(unsigned long flags,
b00e13aa
AM
13246- struct task_struct *tsk, struct user_namespace *user_ns,
13247- struct fs_struct *new_fs)
13248+static struct nsproxy *unshare_namespaces(
13249+ unsigned long flags,
13250+ struct nsproxy *orig,
13251+ struct fs_struct *new_fs,
13252+ struct user_namespace *new_user,
13253+ struct pid_namespace *new_pid)
4bf69007
AM
13254 {
13255 struct nsproxy *new_nsp;
13256 int err;
c2e5f7c8 13257@@ -67,32 +76,31 @@ static struct nsproxy *create_new_namesp
4bf69007
AM
13258 if (!new_nsp)
13259 return ERR_PTR(-ENOMEM);
13260
b00e13aa
AM
13261- new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
13262+ new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_user, new_fs);
4bf69007
AM
13263 if (IS_ERR(new_nsp->mnt_ns)) {
13264 err = PTR_ERR(new_nsp->mnt_ns);
13265 goto out_ns;
13266 }
13267
b00e13aa
AM
13268- new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
13269+ new_nsp->uts_ns = copy_utsname(flags, new_user, orig->uts_ns);
4bf69007
AM
13270 if (IS_ERR(new_nsp->uts_ns)) {
13271 err = PTR_ERR(new_nsp->uts_ns);
13272 goto out_uts;
13273 }
13274
b00e13aa
AM
13275- new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
13276+ new_nsp->ipc_ns = copy_ipcs(flags, new_user, orig->ipc_ns);
4bf69007
AM
13277 if (IS_ERR(new_nsp->ipc_ns)) {
13278 err = PTR_ERR(new_nsp->ipc_ns);
13279 goto out_ipc;
13280 }
13281
c2e5f7c8
JR
13282- new_nsp->pid_ns_for_children =
13283- copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
13284+ new_nsp->pid_ns_for_children = copy_pid_ns(flags, new_user, new_pid);
13285 if (IS_ERR(new_nsp->pid_ns_for_children)) {
13286 err = PTR_ERR(new_nsp->pid_ns_for_children);
4bf69007
AM
13287 goto out_pid;
13288 }
13289
b00e13aa
AM
13290- new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
13291+ new_nsp->net_ns = copy_net_ns(flags, new_user, orig->net_ns);
4bf69007
AM
13292 if (IS_ERR(new_nsp->net_ns)) {
13293 err = PTR_ERR(new_nsp->net_ns);
13294 goto out_net;
c2e5f7c8 13295@@ -117,6 +125,41 @@ out_ns:
4bf69007
AM
13296 return ERR_PTR(err);
13297 }
13298
13299+static struct nsproxy *create_new_namespaces(unsigned long flags,
b00e13aa
AM
13300+ struct task_struct *tsk, struct user_namespace *user_ns,
13301+ struct fs_struct *new_fs)
13302+
4bf69007
AM
13303+{
13304+ return unshare_namespaces(flags, tsk->nsproxy,
b00e13aa 13305+ new_fs, user_ns, task_active_pid_ns(tsk));
2380c486 13306+}
d337f35e 13307+
4bf69007
AM
13308+/*
13309+ * copies the nsproxy, setting refcount to 1, and grabbing a
13310+ * reference to all contained namespaces.
13311+ */
13312+struct nsproxy *copy_nsproxy(struct nsproxy *orig)
2380c486 13313+{
4bf69007 13314+ struct nsproxy *ns = create_nsproxy();
d337f35e 13315+
4bf69007
AM
13316+ if (ns) {
13317+ memcpy(ns, orig, sizeof(struct nsproxy));
13318+ atomic_set(&ns->count, 1);
d337f35e 13319+
4bf69007
AM
13320+ if (ns->mnt_ns)
13321+ get_mnt_ns(ns->mnt_ns);
13322+ if (ns->uts_ns)
13323+ get_uts_ns(ns->uts_ns);
13324+ if (ns->ipc_ns)
13325+ get_ipc_ns(ns->ipc_ns);
c2e5f7c8
JR
13326+ if (ns->pid_ns_for_children)
13327+ get_pid_ns(ns->pid_ns_for_children);
4bf69007
AM
13328+ if (ns->net_ns)
13329+ get_net(ns->net_ns);
13330+ }
13331+ return ns;
13332+}
d337f35e 13333+
4bf69007
AM
13334 /*
13335 * called from clone. This now handles copy for nsproxy and all
13336 * namespaces therein.
c2e5f7c8 13337@@ -125,7 +168,10 @@ int copy_namespaces(unsigned long flags,
4bf69007
AM
13338 {
13339 struct nsproxy *old_ns = tsk->nsproxy;
b00e13aa 13340 struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
4bf69007
AM
13341- struct nsproxy *new_ns;
13342+ struct nsproxy *new_ns = NULL;
c2e5f7c8 13343+
4bf69007
AM
13344+ vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08lx,%p[%p])",
13345+ flags, tsk, old_ns);
4bf69007 13346
c2e5f7c8
JR
13347 if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
13348 CLONE_NEWPID | CLONE_NEWNET)))) {
13349@@ -133,7 +179,7 @@ int copy_namespaces(unsigned long flags,
4bf69007 13350 return 0;
4bf69007 13351 }
4bf69007 13352
c2e5f7c8
JR
13353- if (!ns_capable(user_ns, CAP_SYS_ADMIN))
13354+ if (!vx_ns_can_unshare(user_ns, CAP_SYS_ADMIN, flags))
13355 return -EPERM;
13356
13357 /*
13358@@ -152,6 +198,9 @@ int copy_namespaces(unsigned long flags,
13359 return PTR_ERR(new_ns);
13360
13361 tsk->nsproxy = new_ns;
4bf69007 13362+ vxdprintk(VXD_CBIT(space, 3),
c2e5f7c8
JR
13363+ "copy_namespaces(0x%08lx,%p[%p]) = [%p]",
13364+ flags, tsk, old_ns, new_ns);
13365 return 0;
4bf69007
AM
13366 }
13367
c2e5f7c8 13368@@ -165,7 +214,9 @@ void free_nsproxy(struct nsproxy *ns)
4bf69007 13369 put_ipc_ns(ns->ipc_ns);
c2e5f7c8
JR
13370 if (ns->pid_ns_for_children)
13371 put_pid_ns(ns->pid_ns_for_children);
4bf69007
AM
13372- put_net(ns->net_ns);
13373+ if (ns->net_ns)
13374+ put_net(ns->net_ns);
13375+ atomic_dec(&vs_global_nsproxy);
13376 kmem_cache_free(nsproxy_cachep, ns);
13377 }
13378
c2e5f7c8 13379@@ -179,12 +230,16 @@ int unshare_nsproxy_namespaces(unsigned
b00e13aa 13380 struct user_namespace *user_ns;
4bf69007
AM
13381 int err = 0;
13382
13383+ vxdprintk(VXD_CBIT(space, 4),
13384+ "unshare_nsproxy_namespaces(0x%08lx,[%p])",
13385+ unshare_flags, current->nsproxy);
d337f35e 13386+
4bf69007 13387 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
b00e13aa 13388 CLONE_NEWNET | CLONE_NEWPID)))
4bf69007
AM
13389 return 0;
13390
b00e13aa
AM
13391 user_ns = new_cred ? new_cred->user_ns : current_user_ns();
13392- if (!ns_capable(user_ns, CAP_SYS_ADMIN))
13393+ if (!vx_ns_can_unshare(user_ns, CAP_SYS_ADMIN, unshare_flags))
4bf69007
AM
13394 return -EPERM;
13395
b00e13aa 13396 *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
c2806d43
AM
13397diff -urNp -x '*.orig' linux-4.4/kernel/pid.c linux-4.4/kernel/pid.c
13398--- linux-4.4/kernel/pid.c 2021-02-24 16:56:11.955758328 +0100
13399+++ linux-4.4/kernel/pid.c 2021-02-24 16:56:24.599489848 +0100
09be7631 13400@@ -38,6 +38,7 @@
4bf69007 13401 #include <linux/syscalls.h>
09be7631 13402 #include <linux/proc_ns.h>
b00e13aa 13403 #include <linux/proc_fs.h>
4bf69007
AM
13404+#include <linux/vs_pid.h>
13405
13406 #define pid_hashfn(nr, ns) \
13407 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
8931d859 13408@@ -381,7 +382,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
4bf69007
AM
13409
13410 struct pid *find_vpid(int nr)
13411 {
b00e13aa
AM
13412- return find_pid_ns(nr, task_active_pid_ns(current));
13413+ return find_pid_ns(vx_rmap_pid(nr), task_active_pid_ns(current));
4bf69007
AM
13414 }
13415 EXPORT_SYMBOL_GPL(find_vpid);
13416
8931d859 13417@@ -437,6 +438,9 @@ void transfer_pid(struct task_struct *ol
4bf69007
AM
13418 struct task_struct *pid_task(struct pid *pid, enum pid_type type)
13419 {
13420 struct task_struct *result = NULL;
d337f35e 13421+
927ca606 13422+ if (type == __PIDTYPE_REALPID)
4bf69007
AM
13423+ type = PIDTYPE_PID;
13424 if (pid) {
13425 struct hlist_node *first;
13426 first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
8931d859 13427@@ -455,7 +459,7 @@ struct task_struct *find_task_by_pid_ns(
927ca606
AM
13428 {
13429 RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
13430 "find_task_by_pid_ns() needs rcu_read_lock() protection");
4bf69007
AM
13431- return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
13432+ return pid_task(find_pid_ns(vx_rmap_pid(nr), ns), PIDTYPE_PID);
13433 }
13434
13435 struct task_struct *find_task_by_vpid(pid_t vnr)
8931d859 13436@@ -499,7 +503,7 @@ struct pid *find_get_pid(pid_t nr)
4bf69007
AM
13437 }
13438 EXPORT_SYMBOL_GPL(find_get_pid);
13439
13440-pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
13441+pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns)
13442 {
13443 struct upid *upid;
13444 pid_t nr = 0;
8931d859 13445@@ -513,6 +517,11 @@ pid_t pid_nr_ns(struct pid *pid, struct
4bf69007
AM
13446 }
13447 EXPORT_SYMBOL_GPL(pid_nr_ns);
13448
13449+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
2380c486 13450+{
4bf69007
AM
13451+ return vx_map_pid(pid_unmapped_nr_ns(pid, ns));
13452+}
d337f35e 13453+
4bf69007
AM
13454 pid_t pid_vnr(struct pid *pid)
13455 {
b00e13aa 13456 return pid_nr_ns(pid, task_active_pid_ns(current));
c2806d43
AM
13457diff -urNp -x '*.orig' linux-4.4/kernel/pid_namespace.c linux-4.4/kernel/pid_namespace.c
13458--- linux-4.4/kernel/pid_namespace.c 2021-02-24 16:56:11.955758328 +0100
13459+++ linux-4.4/kernel/pid_namespace.c 2021-02-24 16:56:24.599489848 +0100
b00e13aa 13460@@ -18,6 +18,7 @@
09be7631 13461 #include <linux/proc_ns.h>
4bf69007
AM
13462 #include <linux/reboot.h>
13463 #include <linux/export.h>
13464+#include <linux/vserver/global.h>
13465
09be7631
JR
13466 struct pid_cache {
13467 int nr_ids;
927ca606
AM
13468@@ -111,6 +112,7 @@ static struct pid_namespace *create_pid_
13469 ns->ns.ops = &pidns_operations;
4bf69007
AM
13470
13471 kref_init(&ns->kref);
13472+ atomic_inc(&vs_global_pid_ns);
13473 ns->level = level;
13474 ns->parent = get_pid_ns(parent_pid_ns);
b00e13aa 13475 ns->user_ns = get_user_ns(user_ns);
927ca606 13476@@ -128,6 +130,7 @@ static struct pid_namespace *create_pid_
c2e5f7c8
JR
13477 out_free_map:
13478 kfree(ns->pidmap[0].page);
13479 out_free:
4bf69007
AM
13480+ atomic_dec(&vs_global_pid_ns);
13481 kmem_cache_free(pid_ns_cachep, ns);
c2e5f7c8
JR
13482 out:
13483 return ERR_PTR(err);
c2806d43
AM
13484diff -urNp -x '*.orig' linux-4.4/kernel/printk/printk.c linux-4.4/kernel/printk/printk.c
13485--- linux-4.4/kernel/printk/printk.c 2021-02-24 16:56:11.959091767 +0100
13486+++ linux-4.4/kernel/printk/printk.c 2021-02-24 16:56:24.602823286 +0100
bb20add7 13487@@ -46,6 +46,7 @@
09be7631 13488 #include <linux/utsname.h>
bb20add7 13489 #include <linux/ctype.h>
927ca606 13490 #include <linux/uio.h>
4bf69007
AM
13491+#include <linux/vs_cvirt.h>
13492
13493 #include <asm/uaccess.h>
13494
48cb6a3c 13495@@ -503,7 +504,7 @@ int check_syslog_permissions(int type, i
927ca606 13496 goto ok;
4bf69007
AM
13497
13498 if (syslog_action_restricted(type)) {
13499- if (capable(CAP_SYSLOG))
13500+ if (vx_capable(CAP_SYSLOG, VXC_SYSLOG))
927ca606 13501 goto ok;
092a4f51
JR
13502 /*
13503 * For historical reasons, accept CAP_SYS_ADMIN too, with
48cb6a3c 13504@@ -1315,12 +1316,9 @@ int do_syslog(int type, char __user *buf
4bf69007 13505 if (error)
927ca606 13506 goto out;
4bf69007
AM
13507
13508- switch (type) {
13509- case SYSLOG_ACTION_CLOSE: /* Close log */
13510- break;
13511- case SYSLOG_ACTION_OPEN: /* Open log */
13512- break;
13513- case SYSLOG_ACTION_READ: /* Read from log */
13514+ if ((type == SYSLOG_ACTION_READ) ||
13515+ (type == SYSLOG_ACTION_READ_ALL) ||
13516+ (type == SYSLOG_ACTION_READ_CLEAR)) {
13517 error = -EINVAL;
13518 if (!buf || len < 0)
13519 goto out;
48cb6a3c 13520@@ -1331,6 +1329,16 @@ int do_syslog(int type, char __user *buf
4bf69007
AM
13521 error = -EFAULT;
13522 goto out;
13523 }
13524+ }
13525+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
13526+ return vx_do_syslog(type, buf, len);
d337f35e 13527+
4bf69007
AM
13528+ switch (type) {
13529+ case SYSLOG_ACTION_CLOSE: /* Close log */
13530+ break;
13531+ case SYSLOG_ACTION_OPEN: /* Open log */
13532+ break;
13533+ case SYSLOG_ACTION_READ: /* Read from log */
13534 error = wait_event_interruptible(log_wait,
13535 syslog_seq != log_next_seq);
13536 if (error)
48cb6a3c 13537@@ -1343,16 +1351,6 @@ int do_syslog(int type, char __user *buf
4bf69007
AM
13538 /* FALL THRU */
13539 /* Read last kernel messages */
13540 case SYSLOG_ACTION_READ_ALL:
13541- error = -EINVAL;
13542- if (!buf || len < 0)
13543- goto out;
13544- error = 0;
13545- if (!len)
13546- goto out;
13547- if (!access_ok(VERIFY_WRITE, buf, len)) {
13548- error = -EFAULT;
13549- goto out;
13550- }
13551 error = syslog_print_all(buf, len, clear);
13552 break;
13553 /* Clear ring buffer */
c2806d43
AM
13554diff -urNp -x '*.orig' linux-4.4/kernel/ptrace.c linux-4.4/kernel/ptrace.c
13555--- linux-4.4/kernel/ptrace.c 2021-02-24 16:56:11.959091767 +0100
13556+++ linux-4.4/kernel/ptrace.c 2021-02-24 16:56:24.602823286 +0100
09be7631 13557@@ -23,6 +23,7 @@
4bf69007
AM
13558 #include <linux/syscalls.h>
13559 #include <linux/uaccess.h>
13560 #include <linux/regset.h>
13561+#include <linux/vs_context.h>
13562 #include <linux/hw_breakpoint.h>
13563 #include <linux/cn_proc.h>
09be7631 13564 #include <linux/compat.h>
3cc86a71
AM
13565@@ -306,6 +307,11 @@ ok:
13566 !ptrace_has_cap(mm->user_ns, mode)))
13567 return -EPERM;
b00e13aa 13568
4bf69007
AM
13569+ if (!vx_check(task->xid, VS_ADMIN_P|VS_WATCH_P|VS_IDENT))
13570+ return -EPERM;
13571+ if (!vx_check(task->xid, VS_IDENT) &&
3cc86a71 13572+ !task_vx_flags(task, VXF_STATE_ADMIN, 0))
4bf69007 13573+ return -EACCES;
3cc86a71
AM
13574 if (mode & PTRACE_MODE_SCHED)
13575 return 0;
4bf69007 13576 return security_ptrace_access_check(task, mode);
c2806d43
AM
13577diff -urNp -x '*.orig' linux-4.4/kernel/reboot.c linux-4.4/kernel/reboot.c
13578--- linux-4.4/kernel/reboot.c 2021-02-24 16:56:11.959091767 +0100
13579+++ linux-4.4/kernel/reboot.c 2021-02-24 16:56:24.602823286 +0100
c2e5f7c8
JR
13580@@ -16,6 +16,7 @@
13581 #include <linux/syscalls.h>
13582 #include <linux/syscore_ops.h>
13583 #include <linux/uaccess.h>
13584+#include <linux/vs_pid.h>
13585
13586 /*
13587 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
bb20add7 13588@@ -269,6 +270,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off);
c2e5f7c8
JR
13589
13590 static DEFINE_MUTEX(reboot_mutex);
13591
13592+long vs_reboot(unsigned int, void __user *);
13593+
13594 /*
13595 * Reboot system call: for obvious reasons only root may call it,
13596 * and even root needs to set up some magic numbers in the registers
bb20add7 13597@@ -311,6 +314,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int
c2e5f7c8
JR
13598 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
13599 cmd = LINUX_REBOOT_CMD_HALT;
13600
13601+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
13602+ return vs_reboot(cmd, arg);
13603+
13604 mutex_lock(&reboot_mutex);
13605 switch (cmd) {
13606 case LINUX_REBOOT_CMD_RESTART:
c2806d43
AM
13607diff -urNp -x '*.orig' linux-4.4/kernel/sched/core.c linux-4.4/kernel/sched/core.c
13608--- linux-4.4/kernel/sched/core.c 2021-02-24 16:56:11.962425205 +0100
13609+++ linux-4.4/kernel/sched/core.c 2021-02-24 16:56:24.602823286 +0100
bb20add7 13610@@ -74,6 +74,8 @@
4bf69007 13611 #include <linux/binfmts.h>
b00e13aa 13612 #include <linux/context_tracking.h>
265de2f7 13613 #include <linux/compiler.h>
4bf69007
AM
13614+#include <linux/vs_sched.h>
13615+#include <linux/vs_cvirt.h>
13616
13617 #include <asm/switch_to.h>
13618 #include <asm/tlb.h>
c2806d43 13619@@ -3561,7 +3563,7 @@ SYSCALL_DEFINE1(nice, int, increment)
4bf69007 13620
bb20add7 13621 nice = clamp_val(nice, MIN_NICE, MAX_NICE);
4bf69007
AM
13622 if (increment < 0 && !can_nice(current, nice))
13623- return -EPERM;
13624+ return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
13625
13626 retval = security_task_setnice(current, nice);
13627 if (retval)
c2806d43
AM
13628diff -urNp -x '*.orig' linux-4.4/kernel/sched/cputime.c linux-4.4/kernel/sched/cputime.c
13629--- linux-4.4/kernel/sched/cputime.c 2021-02-24 16:56:11.962425205 +0100
13630+++ linux-4.4/kernel/sched/cputime.c 2021-02-24 16:56:24.602823286 +0100
b00e13aa 13631@@ -4,6 +4,7 @@
4bf69007
AM
13632 #include <linux/kernel_stat.h>
13633 #include <linux/static_key.h>
b00e13aa 13634 #include <linux/context_tracking.h>
4bf69007
AM
13635+#include <linux/vs_sched.h>
13636 #include "sched.h"
13637
13638
bb20add7 13639@@ -135,14 +136,17 @@ static inline void task_group_account_fi
4bf69007
AM
13640 void account_user_time(struct task_struct *p, cputime_t cputime,
13641 cputime_t cputime_scaled)
13642 {
13643+ struct vx_info *vxi = p->vx_info; /* p is _always_ current */
ca5d134c 13644+ int nice = (task_nice(p) > 0);
4bf69007
AM
13645 int index;
13646
13647 /* Add user time to process. */
13648 p->utime += cputime;
13649 p->utimescaled += cputime_scaled;
13650+ vx_account_user(vxi, cputime, nice);
13651 account_group_user_time(p, cputime);
13652
ca5d134c 13653- index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
4bf69007
AM
13654+ index = (nice) ? CPUTIME_NICE : CPUTIME_USER;
13655
13656 /* Add user time to cpustat. */
13657 task_group_account_field(p, index, (__force u64) cputime);
bb20add7 13658@@ -189,9 +193,12 @@ static inline
ca5d134c
JR
13659 void __account_system_time(struct task_struct *p, cputime_t cputime,
13660 cputime_t cputime_scaled, int index)
13661 {
13662+ struct vx_info *vxi = p->vx_info; /* p is _always_ current */
13663+
13664 /* Add system time to process. */
13665 p->stime += cputime;
13666 p->stimescaled += cputime_scaled;
13667+ vx_account_system(vxi, cputime, 0 /* do we have idle time? */);
13668 account_group_system_time(p, cputime);
13669
13670 /* Add system time to cpustat. */
c2806d43
AM
13671diff -urNp -x '*.orig' linux-4.4/kernel/sched/fair.c linux-4.4/kernel/sched/fair.c
13672--- linux-4.4/kernel/sched/fair.c 2021-02-24 16:56:11.965758643 +0100
13673+++ linux-4.4/kernel/sched/fair.c 2021-02-24 16:56:24.602823286 +0100
bb20add7 13674@@ -30,6 +30,7 @@
b00e13aa
AM
13675 #include <linux/mempolicy.h>
13676 #include <linux/migrate.h>
13677 #include <linux/task_work.h>
4bf69007
AM
13678+#include <linux/vs_cvirt.h>
13679
13680 #include <trace/events/sched.h>
13681
c2806d43 13682@@ -3089,6 +3090,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
4bf69007
AM
13683 __enqueue_entity(cfs_rq, se);
13684 se->on_rq = 1;
13685
13686+ if (entity_is_task(se))
13687+ vx_activate_task(task_of(se));
13688 if (cfs_rq->nr_running == 1) {
13689 list_add_leaf_cfs_rq(cfs_rq);
13690 check_enqueue_throttle(cfs_rq);
c2806d43 13691@@ -3170,6 +3173,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
4bf69007
AM
13692 if (se != cfs_rq->curr)
13693 __dequeue_entity(cfs_rq, se);
13694 se->on_rq = 0;
13695+ if (entity_is_task(se))
13696+ vx_deactivate_task(task_of(se));
4bf69007
AM
13697 account_entity_dequeue(cfs_rq, se);
13698
b00e13aa 13699 /*
c2806d43
AM
13700diff -urNp -x '*.orig' linux-4.4/kernel/sched/loadavg.c linux-4.4/kernel/sched/loadavg.c
13701--- linux-4.4/kernel/sched/loadavg.c 2021-02-24 16:56:11.965758643 +0100
13702+++ linux-4.4/kernel/sched/loadavg.c 2021-02-24 16:56:24.606156724 +0100
1d9ad342
AM
13703@@ -73,9 +73,16 @@ EXPORT_SYMBOL(avenrun); /* should be rem
13704 */
13705 void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
13706 {
13707- loads[0] = (avenrun[0] + offset) << shift;
13708- loads[1] = (avenrun[1] + offset) << shift;
13709- loads[2] = (avenrun[2] + offset) << shift;
13710+ if (vx_flags(VXF_VIRT_LOAD, 0)) {
13711+ struct vx_info *vxi = current_vx_info();
13712+ loads[0] = (vxi->cvirt.load[0] + offset) << shift;
13713+ loads[1] = (vxi->cvirt.load[1] + offset) << shift;
13714+ loads[2] = (vxi->cvirt.load[2] + offset) << shift;
13715+ } else {
13716+ loads[0] = (avenrun[0] + offset) << shift;
13717+ loads[1] = (avenrun[1] + offset) << shift;
13718+ loads[2] = (avenrun[2] + offset) << shift;
13719+ }
13720 }
13721
13722 long calc_load_fold_active(struct rq *this_rq)
c2806d43
AM
13723diff -urNp -x '*.orig' linux-4.4/kernel/signal.c linux-4.4/kernel/signal.c
13724--- linux-4.4/kernel/signal.c 2021-02-24 16:56:11.965758643 +0100
13725+++ linux-4.4/kernel/signal.c 2021-02-24 16:56:24.606156724 +0100
bb20add7 13726@@ -34,6 +34,8 @@
b00e13aa 13727 #include <linux/compat.h>
09be7631 13728 #include <linux/cn_proc.h>
265de2f7 13729 #include <linux/compiler.h>
4bf69007
AM
13730+#include <linux/vs_context.h>
13731+#include <linux/vs_pid.h>
265de2f7 13732
4bf69007
AM
13733 #define CREATE_TRACE_POINTS
13734 #include <trace/events/signal.h>
48cb6a3c 13735@@ -782,9 +784,18 @@ static int check_kill_permission(int sig
4bf69007
AM
13736 struct pid *sid;
13737 int error;
13738
13739+ vxdprintk(VXD_CBIT(misc, 7),
13740+ "check_kill_permission(%d,%p,%p[#%u,%u])",
13741+ sig, info, t, vx_task_xid(t), t->pid);
d337f35e 13742+
4bf69007
AM
13743 if (!valid_signal(sig))
13744 return -EINVAL;
13745
13746+/* FIXME: needed? if so, why?
13747+ if ((info != SEND_SIG_NOINFO) &&
13748+ (is_si_special(info) || !si_fromuser(info)))
13749+ goto skip; */
d337f35e 13750+
4bf69007
AM
13751 if (!si_fromuser(info))
13752 return 0;
13753
48cb6a3c 13754@@ -808,6 +819,20 @@ static int check_kill_permission(int sig
4bf69007
AM
13755 }
13756 }
13757
13758+ error = -EPERM;
13759+ if (t->pid == 1 && current->xid)
13760+ return error;
d337f35e 13761+
4bf69007
AM
13762+ error = -ESRCH;
13763+ /* FIXME: we shouldn't return ESRCH ever, to avoid
13764+ loops, maybe ENOENT or EACCES? */
13765+ if (!vx_check(vx_task_xid(t), VS_WATCH_P | VS_IDENT)) {
13766+ vxdprintk(current->xid || VXD_CBIT(misc, 7),
13767+ "signal %d[%p] xid mismatch %p[#%u,%u] xid=#%u",
13768+ sig, info, t, vx_task_xid(t), t->pid, current->xid);
13769+ return error;
2380c486 13770+ }
4bf69007
AM
13771+/* skip: */
13772 return security_task_kill(t, info, sig, 0);
13773 }
13774
48cb6a3c 13775@@ -1359,8 +1384,14 @@ int kill_pid_info(int sig, struct siginf
927ca606
AM
13776 for (;;) {
13777 rcu_read_lock();
13778 p = pid_task(pid, PIDTYPE_PID);
13779- if (p)
13780- error = group_send_sig_info(sig, info, p);
13781+ if (p) {
13782+ if (vx_check(vx_task_xid(p), VS_IDENT))
13783+ error = group_send_sig_info(sig, info, p);
13784+ else {
13785+ rcu_read_unlock();
13786+ return -ESRCH;
13787+ }
13788+ }
13789 rcu_read_unlock();
13790 if (likely(!p || error != -ESRCH))
13791 return error;
48cb6a3c 13792@@ -1405,7 +1436,7 @@ int kill_pid_info_as_cred(int sig, struc
4bf69007
AM
13793
13794 rcu_read_lock();
13795 p = pid_task(pid, PIDTYPE_PID);
13796- if (!p) {
13797+ if (!p || !vx_check(vx_task_xid(p), VS_IDENT)) {
13798 ret = -ESRCH;
13799 goto out_unlock;
13800 }
48cb6a3c 13801@@ -1461,8 +1492,10 @@ static int kill_something_info(int sig,
4bf69007
AM
13802 struct task_struct * p;
13803
13804 for_each_process(p) {
13805- if (task_pid_vnr(p) > 1 &&
13806- !same_thread_group(p, current)) {
13807+ if (vx_check(vx_task_xid(p), VS_ADMIN|VS_IDENT) &&
13808+ task_pid_vnr(p) > 1 &&
13809+ !same_thread_group(p, current) &&
13810+ !vx_current_initpid(p->pid)) {
13811 int err = group_send_sig_info(sig, info, p);
13812 ++count;
13813 if (err != -EPERM)
48cb6a3c 13814@@ -2333,6 +2366,11 @@ relock:
4bf69007
AM
13815 !sig_kernel_only(signr))
13816 continue;
13817
13818+ /* virtual init is protected against user signals */
bb20add7 13819+ if ((ksig->info.si_code == SI_USER) &&
4bf69007
AM
13820+ vx_current_initpid(current->pid))
13821+ continue;
d337f35e 13822+
4bf69007
AM
13823 if (sig_kernel_stop(signr)) {
13824 /*
13825 * The default action is to stop all threads in
c2806d43
AM
13826diff -urNp -x '*.orig' linux-4.4/kernel/softirq.c linux-4.4/kernel/softirq.c
13827--- linux-4.4/kernel/softirq.c 2016-01-11 00:01:32.000000000 +0100
13828+++ linux-4.4/kernel/softirq.c 2021-02-24 16:56:24.606156724 +0100
bb20add7 13829@@ -26,6 +26,7 @@
4bf69007
AM
13830 #include <linux/smpboot.h>
13831 #include <linux/tick.h>
265de2f7 13832 #include <linux/irq.h>
4bf69007
AM
13833+#include <linux/vs_context.h>
13834
13835 #define CREATE_TRACE_POINTS
13836 #include <trace/events/irq.h>
c2806d43
AM
13837diff -urNp -x '*.orig' linux-4.4/kernel/sys.c linux-4.4/kernel/sys.c
13838--- linux-4.4/kernel/sys.c 2021-02-24 16:56:11.965758643 +0100
13839+++ linux-4.4/kernel/sys.c 2021-02-24 16:56:24.606156724 +0100
8931d859
AM
13840@@ -56,6 +56,7 @@
13841 #include <linux/nospec.h>
4bf69007
AM
13842
13843 #include <linux/kmsg_dump.h>
b00e13aa 13844+#include <linux/vs_pid.h>
4bf69007 13845 /* Move somewhere else to avoid recompiling? */
b00e13aa
AM
13846 #include <generated/utsrelease.h>
13847
8931d859 13848@@ -159,7 +160,10 @@ static int set_one_prio(struct task_stru
4bf69007
AM
13849 goto out;
13850 }
13851 if (niceval < task_nice(p) && !can_nice(p, niceval)) {
13852- error = -EACCES;
13853+ if (vx_flags(VXF_IGNEG_NICE, 0))
13854+ error = 0;
13855+ else
13856+ error = -EACCES;
13857 goto out;
13858 }
13859 no_nice = security_task_setnice(p, niceval);
8931d859 13860@@ -210,6 +214,8 @@ SYSCALL_DEFINE3(setpriority, int, which,
bb20add7
AM
13861 else
13862 pgrp = task_pgrp(current);
13863 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
13864+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13865+ continue;
13866 error = set_one_prio(p, niceval, error);
13867 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
13868 break;
8931d859 13869@@ -276,6 +282,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
bb20add7
AM
13870 else
13871 pgrp = task_pgrp(current);
13872 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
13873+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13874+ continue;
13875 niceval = nice_to_rlimit(task_nice(p));
13876 if (niceval > retval)
13877 retval = niceval;
8931d859 13878@@ -292,6 +300,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
bb20add7
AM
13879 goto out_unlock; /* No processes for this user */
13880 }
13881 do_each_thread(g, p) {
13882+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13883+ continue;
927ca606 13884 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
bb20add7 13885 niceval = nice_to_rlimit(task_nice(p));
4bf69007 13886 if (niceval > retval)
c2806d43 13887@@ -1213,7 +1223,8 @@ SYSCALL_DEFINE2(sethostname, char __user
4bf69007
AM
13888 int errno;
13889 char tmp[__NEW_UTS_LEN];
13890
13891- if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
13892+ if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
13893+ CAP_SYS_ADMIN, VXC_SET_UTSNAME))
13894 return -EPERM;
13895
13896 if (len < 0 || len > __NEW_UTS_LEN)
c2806d43 13897@@ -1266,7 +1277,8 @@ SYSCALL_DEFINE2(setdomainname, char __us
4bf69007
AM
13898 int errno;
13899 char tmp[__NEW_UTS_LEN];
13900
13901- if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
13902+ if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
13903+ CAP_SYS_ADMIN, VXC_SET_UTSNAME))
13904 return -EPERM;
13905 if (len < 0 || len > __NEW_UTS_LEN)
13906 return -EINVAL;
c2806d43 13907@@ -1386,7 +1398,7 @@ int do_prlimit(struct task_struct *tsk,
4bf69007
AM
13908 /* Keep the capable check against init_user_ns until
13909 cgroups can contain all limits */
13910 if (new_rlim->rlim_max > rlim->rlim_max &&
13911- !capable(CAP_SYS_RESOURCE))
13912+ !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
13913 retval = -EPERM;
13914 if (!retval)
13915 retval = security_task_setrlimit(tsk->group_leader,
c2806d43 13916@@ -1439,7 +1451,8 @@ static int check_prlimit_permission(stru
4bf69007
AM
13917 gid_eq(cred->gid, tcred->sgid) &&
13918 gid_eq(cred->gid, tcred->gid))
13919 return 0;
13920- if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
13921+ if (vx_ns_capable(tcred->user_ns,
13922+ CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
13923 return 0;
13924
13925 return -EPERM;
c2806d43
AM
13926diff -urNp -x '*.orig' linux-4.4/kernel/sysctl.c linux-4.4/kernel/sysctl.c
13927--- linux-4.4/kernel/sysctl.c 2021-02-24 16:56:11.969092082 +0100
13928+++ linux-4.4/kernel/sysctl.c 2021-02-24 16:56:24.606156724 +0100
927ca606 13929@@ -87,6 +87,7 @@
4bf69007
AM
13930 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
13931 #include <linux/lockdep.h>
13932 #endif
13933+extern char vshelper_path[];
13934 #ifdef CONFIG_CHR_DEV_SG
13935 #include <scsi/sg.h>
13936 #endif
3cc86a71 13937@@ -281,6 +282,13 @@ static int max_extfrag_threshold = 1000;
bb20add7
AM
13938
13939 static struct ctl_table kern_table[] = {
13940 {
4bf69007
AM
13941+ .procname = "vshelper",
13942+ .data = &vshelper_path,
13943+ .maxlen = 256,
13944+ .mode = 0644,
bb20add7 13945+ .proc_handler = proc_dostring,
4bf69007 13946+ },
bb20add7
AM
13947+ {
13948 .procname = "sched_child_runs_first",
13949 .data = &sysctl_sched_child_runs_first,
13950 .maxlen = sizeof(unsigned int),
3cc86a71 13951@@ -1388,7 +1396,6 @@ static struct ctl_table vm_table[] = {
927ca606
AM
13952 .extra1 = &zero,
13953 .extra2 = &one,
bb20add7
AM
13954 },
13955-
13956 #endif /* CONFIG_COMPACTION */
3cc86a71
AM
13957 {
13958 .procname = "min_free_kbytes",
c2806d43
AM
13959diff -urNp -x '*.orig' linux-4.4/kernel/sysctl_binary.c linux-4.4/kernel/sysctl_binary.c
13960--- linux-4.4/kernel/sysctl_binary.c 2021-02-24 16:56:11.969092082 +0100
13961+++ linux-4.4/kernel/sysctl_binary.c 2021-02-24 16:56:24.606156724 +0100
13962@@ -73,6 +73,7 @@ static const struct bin_table bin_kern_t
13963
13964 { CTL_INT, KERN_PANIC, "panic" },
13965 { CTL_INT, KERN_REALROOTDEV, "real-root-dev" },
13966+ { CTL_STR, KERN_VSHELPER, "vshelper" },
13967
13968 { CTL_STR, KERN_SPARC_REBOOT, "reboot-cmd" },
13969 { CTL_INT, KERN_CTLALTDEL, "ctrl-alt-del" },
13970diff -urNp -x '*.orig' linux-4.4/kernel/time/posix-timers.c linux-4.4/kernel/time/posix-timers.c
13971--- linux-4.4/kernel/time/posix-timers.c 2021-02-24 16:56:11.969092082 +0100
13972+++ linux-4.4/kernel/time/posix-timers.c 2021-02-24 16:56:24.606156724 +0100
bb20add7
AM
13973@@ -48,6 +48,7 @@
13974 #include <linux/workqueue.h>
13975 #include <linux/export.h>
13976 #include <linux/hashtable.h>
13977+#include <linux/vs_context.h>
4bf69007 13978
bb20add7 13979 #include "timekeeping.h"
4bf69007 13980
3cc86a71 13981@@ -417,6 +418,7 @@ int posix_timer_event(struct k_itimer *t
bb20add7
AM
13982 {
13983 struct task_struct *task;
13984 int shared, ret = -1;
13985+
13986 /*
13987 * FIXME: if ->sigq is queued we can race with
13988 * dequeue_signal()->do_schedule_next_timer().
3cc86a71 13989@@ -433,10 +435,18 @@ int posix_timer_event(struct k_itimer *t
bb20add7
AM
13990 rcu_read_lock();
13991 task = pid_task(timr->it_pid, PIDTYPE_PID);
13992 if (task) {
13993+ struct vx_info_save vxis;
13994+ struct vx_info *vxi;
13995+
13996+ vxi = get_vx_info(task->vx_info);
13997+ enter_vx_info(vxi, &vxis);
13998 shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
13999 ret = send_sigqueue(timr->sigq, task, shared);
14000+ leave_vx_info(&vxis);
14001+ put_vx_info(vxi);
14002 }
14003 rcu_read_unlock();
14004+
14005 /* If we failed to send the signal the timer stops. */
14006 return ret > 0;
4bf69007 14007 }
c2806d43
AM
14008diff -urNp -x '*.orig' linux-4.4/kernel/time/time.c linux-4.4/kernel/time/time.c
14009--- linux-4.4/kernel/time/time.c 2021-02-24 16:56:11.972425520 +0100
14010+++ linux-4.4/kernel/time/time.c 2021-02-24 16:56:24.606156724 +0100
8931d859 14011@@ -38,6 +38,7 @@
4bf69007
AM
14012 #include <linux/fs.h>
14013 #include <linux/math64.h>
14014 #include <linux/ptrace.h>
14015+#include <linux/vs_time.h>
14016
14017 #include <asm/uaccess.h>
14018 #include <asm/unistd.h>
8931d859 14019@@ -94,7 +95,7 @@ SYSCALL_DEFINE1(stime, time_t __user *,
4bf69007
AM
14020 if (err)
14021 return err;
14022
14023- do_settimeofday(&tv);
14024+ vx_settimeofday(&tv);
14025 return 0;
14026 }
14027
8931d859 14028@@ -187,7 +188,7 @@ int do_sys_settimeofday(const struct tim
4bf69007
AM
14029 }
14030 }
14031 if (tv)
14032- return do_settimeofday(tv);
14033+ return vx_settimeofday(tv);
14034 return 0;
14035 }
14036
c2806d43
AM
14037diff -urNp -x '*.orig' linux-4.4/kernel/time/timekeeping.c linux-4.4/kernel/time/timekeeping.c
14038--- linux-4.4/kernel/time/timekeeping.c 2021-02-24 16:56:11.972425520 +0100
14039+++ linux-4.4/kernel/time/timekeeping.c 2021-02-24 16:56:24.606156724 +0100
bb20add7
AM
14040@@ -23,6 +23,7 @@
14041 #include <linux/stop_machine.h>
14042 #include <linux/pvclock_gtod.h>
14043 #include <linux/compiler.h>
14044+#include <linux/vs_time.h>
14045
14046 #include "tick-internal.h"
14047 #include "ntp_internal.h"
3cc86a71 14048@@ -922,7 +923,9 @@ void ktime_get_raw_and_real_ts64(struct
bb20add7
AM
14049 } while (read_seqcount_retry(&tk_core.seq, seq));
14050
927ca606 14051 timespec64_add_ns(ts_raw, nsecs_raw);
bb20add7 14052+ vx_adjust_timespec(ts_raw);
927ca606 14053 timespec64_add_ns(ts_real, nsecs_real);
bb20add7
AM
14054+ vx_adjust_timespec(ts_real);
14055 }
927ca606 14056 EXPORT_SYMBOL(ktime_get_raw_and_real_ts64);
bb20add7 14057
c2806d43
AM
14058diff -urNp -x '*.orig' linux-4.4/kernel/time/timer.c linux-4.4/kernel/time/timer.c
14059--- linux-4.4/kernel/time/timer.c 2021-02-24 16:56:11.972425520 +0100
14060+++ linux-4.4/kernel/time/timer.c 2021-02-24 16:56:24.609490163 +0100
14061@@ -43,6 +43,10 @@
4bf69007 14062 #include <linux/slab.h>
09be7631 14063 #include <linux/compat.h>
b4d4647f 14064 #include <linux/random.h>
4bf69007
AM
14065+#include <linux/vs_base.h>
14066+#include <linux/vs_cvirt.h>
14067+#include <linux/vs_pid.h>
14068+#include <linux/vserver/sched.h>
14069
14070 #include <asm/uaccess.h>
14071 #include <asm/unistd.h>
c2806d43
AM
14072diff -urNp -x '*.orig' linux-4.4/kernel/user_namespace.c linux-4.4/kernel/user_namespace.c
14073--- linux-4.4/kernel/user_namespace.c 2021-02-24 16:56:11.982425835 +0100
14074+++ linux-4.4/kernel/user_namespace.c 2021-02-24 16:56:24.609490163 +0100
b00e13aa 14075@@ -22,6 +22,7 @@
4bf69007
AM
14076 #include <linux/ctype.h>
14077 #include <linux/projid.h>
b00e13aa 14078 #include <linux/fs_struct.h>
4bf69007
AM
14079+#include <linux/vserver/global.h>
14080
14081 static struct kmem_cache *user_ns_cachep __read_mostly;
bb20add7 14082 static DEFINE_MUTEX(userns_state_mutex);
927ca606 14083@@ -97,6 +98,7 @@ int create_user_ns(struct cred *new)
4bf69007 14084
b00e13aa
AM
14085 atomic_set(&ns->count, 1);
14086 /* Leave the new->user_ns reference with the new user namespace. */
4bf69007
AM
14087+ atomic_inc(&vs_global_user_ns);
14088 ns->parent = parent_ns;
09be7631 14089 ns->level = parent_ns->level + 1;
4bf69007 14090 ns->owner = owner;
927ca606
AM
14091@@ -145,6 +147,7 @@ void free_user_ns(struct user_namespace
14092 key_put(ns->persistent_keyring_register);
14093 #endif
14094 ns_free_inum(&ns->ns);
14095+ atomic_dec(&vs_global_user_ns);
14096 kmem_cache_free(user_ns_cachep, ns);
14097 ns = parent;
14098 } while (atomic_dec_and_test(&parent->count));
14099@@ -358,6 +361,18 @@ gid_t from_kgid_munged(struct user_names
bb20add7
AM
14100 }
14101 EXPORT_SYMBOL(from_kgid_munged);
14102
14103+ktag_t make_ktag(struct user_namespace *from, vtag_t tag)
14104+{
14105+ return KTAGT_INIT(tag);
14106+}
14107+EXPORT_SYMBOL(make_ktag);
14108+
14109+vtag_t from_ktag(struct user_namespace *to, ktag_t tag)
14110+{
14111+ return __ktag_val(tag);
14112+}
14113+EXPORT_SYMBOL(from_ktag);
14114+
14115 /**
14116 * make_kprojid - Map a user-namespace projid pair into a kprojid.
14117 * @ns: User namespace that the projid is in
c2806d43
AM
14118diff -urNp -x '*.orig' linux-4.4/kernel/utsname.c linux-4.4/kernel/utsname.c
14119--- linux-4.4/kernel/utsname.c 2016-01-11 00:01:32.000000000 +0100
14120+++ linux-4.4/kernel/utsname.c 2021-02-24 16:56:24.609490163 +0100
4bf69007
AM
14121@@ -16,14 +16,17 @@
14122 #include <linux/slab.h>
14123 #include <linux/user_namespace.h>
09be7631 14124 #include <linux/proc_ns.h>
4bf69007
AM
14125+#include <linux/vserver/global.h>
14126
14127 static struct uts_namespace *create_uts_ns(void)
14128 {
14129 struct uts_namespace *uts_ns;
14130
14131 uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
14132- if (uts_ns)
14133+ if (uts_ns) {
c2e5f7c8 14134 kref_init(&uts_ns->kref);
4bf69007
AM
14135+ atomic_inc(&vs_global_uts_ns);
14136+ }
14137 return uts_ns;
14138 }
14139
927ca606 14140@@ -87,6 +90,7 @@ void free_uts_ns(struct kref *kref)
4bf69007
AM
14141 ns = container_of(kref, struct uts_namespace, kref);
14142 put_user_ns(ns->user_ns);
927ca606 14143 ns_free_inum(&ns->ns);
3cc86a71
AM
14144+ atomic_dec(&vs_global_uts_ns);
14145 kfree(ns);
14146 }
14147
c2806d43
AM
14148diff -urNp -x '*.orig' linux-4.4/kernel/vserver/Kconfig linux-4.4/kernel/vserver/Kconfig
14149--- linux-4.4/kernel/vserver/Kconfig 1970-01-01 01:00:00.000000000 +0100
14150+++ linux-4.4/kernel/vserver/Kconfig 2021-02-24 16:56:24.612823601 +0100
14151@@ -0,0 +1,230 @@
14152+#
14153+# Linux VServer configuration
14154+#
14155+
14156+menu "Linux VServer"
14157+
14158+config VSERVER_AUTO_LBACK
14159+ bool "Automatically Assign Loopback IP"
14160+ default y
14161+ help
14162+ Automatically assign a guest specific loopback
14163+ IP and add it to the kernel network stack on
14164+ startup.
14165+
14166+config VSERVER_AUTO_SINGLE
14167+ bool "Automatic Single IP Special Casing"
14168+ default n
14169+ help
14170+ This allows network contexts with a single IP to
14171+ automatically remap 0.0.0.0 bindings to that IP,
14172+ avoiding further network checks and improving
14173+ performance.
14174+
14175+ (note: such guests do not allow to change the ip
14176+ on the fly and do not show loopback addresses)
14177+
14178+config VSERVER_COWBL
14179+ bool "Enable COW Immutable Link Breaking"
14180+ default y
14181+ help
14182+ This enables the COW (Copy-On-Write) link break code.
14183+ It allows you to treat unified files like normal files
14184+ when writing to them (which will implicitely break the
14185+ link and create a copy of the unified file)
14186+
14187+config VSERVER_VTIME
14188+ bool "Enable Virtualized Guest Time (EXPERIMENTAL)"
14189+ default n
14190+ help
14191+ This enables per guest time offsets to allow for
14192+ adjusting the system clock individually per guest.
14193+ this adds some overhead to the time functions and
14194+ therefore should not be enabled without good reason.
14195+
14196+config VSERVER_DEVICE
14197+ bool "Enable Guest Device Mapping (EXPERIMENTAL)"
14198+ default n
14199+ help
14200+ This enables generic device remapping.
14201+
14202+config VSERVER_PROC_SECURE
14203+ bool "Enable Proc Security"
14204+ depends on PROC_FS
14205+ default y
14206+ help
14207+ This configures ProcFS security to initially hide
14208+ non-process entries for all contexts except the main and
14209+ spectator context (i.e. for all guests), which is a secure
14210+ default.
14211+
14212+ (note: on 1.2x the entries were visible by default)
14213+
14214+choice
14215+ prompt "Persistent Inode Tagging"
14216+ default TAGGING_ID24
14217+ help
14218+ This adds persistent context information to filesystems
14219+ mounted with the tagxid option. Tagging is a requirement
14220+ for per-context disk limits and per-context quota.
14221+
14222+
14223+config TAGGING_NONE
14224+ bool "Disabled"
14225+ help
14226+ do not store per-context information in inodes.
14227+
14228+config TAGGING_UID16
14229+ bool "UID16/GID32"
14230+ help
14231+ reduces UID to 16 bit, but leaves GID at 32 bit.
14232+
14233+config TAGGING_GID16
14234+ bool "UID32/GID16"
14235+ help
14236+ reduces GID to 16 bit, but leaves UID at 32 bit.
14237+
14238+config TAGGING_ID24
14239+ bool "UID24/GID24"
14240+ help
14241+ uses the upper 8bit from UID and GID for XID tagging
14242+ which leaves 24bit for UID/GID each, which should be
14243+ more than sufficient for normal use.
14244+
14245+config TAGGING_INTERN
14246+ bool "UID32/GID32"
14247+ help
14248+ this uses otherwise reserved inode fields in the on
14249+ disk representation, which limits the use to a few
14250+ filesystems (currently ext2 and ext3)
14251+
14252+endchoice
14253+
14254+config TAG_NFSD
14255+ bool "Tag NFSD User Auth and Files"
14256+ default n
14257+ help
14258+ Enable this if you do want the in-kernel NFS
14259+ Server to use the tagging specified above.
14260+ (will require patched clients too)
14261+
14262+config VSERVER_PRIVACY
14263+ bool "Honor Privacy Aspects of Guests"
14264+ default n
14265+ help
14266+ When enabled, most context checks will disallow
14267+ access to structures assigned to a specific context,
14268+ like ptys or loop devices.
14269+
14270+config VSERVER_CONTEXTS
14271+ int "Maximum number of Contexts (1-65533)" if EMBEDDED
14272+ range 1 65533
14273+ default "768" if 64BIT
14274+ default "256"
14275+ help
14276+ This setting will optimize certain data structures
14277+ and memory allocations according to the expected
14278+ maximum.
14279+
14280+ note: this is not a strict upper limit.
14281+
14282+config VSERVER_WARN
14283+ bool "VServer Warnings"
14284+ default y
14285+ help
14286+ This enables various runtime warnings, which will
14287+ notify about potential manipulation attempts or
14288+ resource shortage. It is generally considered to
14289+ be a good idea to have that enabled.
14290+
14291+config VSERVER_WARN_DEVPTS
14292+ bool "VServer DevPTS Warnings"
14293+ depends on VSERVER_WARN
14294+ default y
14295+ help
14296+ This enables DevPTS related warnings, issued when a
14297+ process inside a context tries to lookup or access
14298+ a dynamic pts from the host or a different context.
14299+
14300+config VSERVER_DEBUG
14301+ bool "VServer Debugging Code"
14302+ default n
14303+ help
14304+ Set this to yes if you want to be able to activate
14305+ debugging output at runtime. It adds a very small
14306+ overhead to all vserver related functions and
14307+ increases the kernel size by about 20k.
14308+
14309+config VSERVER_HISTORY
14310+ bool "VServer History Tracing"
14311+ depends on VSERVER_DEBUG
14312+ default n
14313+ help
14314+ Set this to yes if you want to record the history of
14315+ linux-vserver activities, so they can be replayed in
14316+ the event of a kernel panic or oops.
14317+
14318+config VSERVER_HISTORY_SIZE
14319+ int "Per-CPU History Size (32-65536)"
14320+ depends on VSERVER_HISTORY
14321+ range 32 65536
14322+ default 64
14323+ help
14324+ This allows you to specify the number of entries in
14325+ the per-CPU history buffer.
14326+
14327+config VSERVER_EXTRA_MNT_CHECK
14328+ bool "Extra Checks for Reachability"
14329+ default n
14330+ help
14331+ Set this to yes if you want to do extra checks for
14332+ vfsmount reachability in the proc filesystem code.
14333+ This shouldn't be required on any setup utilizing
14334+ mnt namespaces.
14335+
14336+choice
14337+ prompt "Quotes used in debug and warn messages"
14338+ default QUOTES_ISO8859
14339+
14340+config QUOTES_ISO8859
14341+ bool "Extended ASCII (ISO 8859) angle quotes"
14342+ help
14343+ This uses the extended ASCII characters \xbb
14344+ and \xab for quoting file and process names.
14345+
14346+config QUOTES_UTF8
14347+ bool "UTF-8 angle quotes"
14348+ help
14349+ This uses the the UTF-8 sequences for angle
14350+ quotes to quote file and process names.
14351+
14352+config QUOTES_ASCII
14353+ bool "ASCII single quotes"
14354+ help
14355+ This uses the ASCII single quote character
14356+ (\x27) to quote file and process names.
14357+
14358+endchoice
14359+
14360+endmenu
14361+
14362+
14363+config VSERVER
14364+ bool
14365+ default y
14366+ select NAMESPACES
14367+ select UTS_NS
14368+ select IPC_NS
14369+# select USER_NS
14370+ select SYSVIPC
14371+
14372+config VSERVER_SECURITY
14373+ bool
14374+ depends on SECURITY
14375+ default y
14376+ select SECURITY_CAPABILITIES
14377+
14378+config VSERVER_DISABLED
14379+ bool
14380+ default n
14381+
14382diff -urNp -x '*.orig' linux-4.4/kernel/vserver/Makefile linux-4.4/kernel/vserver/Makefile
14383--- linux-4.4/kernel/vserver/Makefile 1970-01-01 01:00:00.000000000 +0100
14384+++ linux-4.4/kernel/vserver/Makefile 2021-02-24 16:56:24.612823601 +0100
14385@@ -0,0 +1,18 @@
14386+#
14387+# Makefile for the Linux vserver routines.
14388+#
14389+
14390+
14391+obj-y += vserver.o
14392+
14393+vserver-y := switch.o context.o space.o sched.o network.o inode.o \
14394+ limit.o cvirt.o cacct.o signal.o helper.o init.o \
14395+ dlimit.o tag.o
14396+
14397+vserver-$(CONFIG_INET) += inet.o
14398+vserver-$(CONFIG_PROC_FS) += proc.o
14399+vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
14400+vserver-$(CONFIG_VSERVER_HISTORY) += history.o
14401+vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
14402+vserver-$(CONFIG_VSERVER_DEVICE) += device.o
14403+
14404diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cacct.c linux-4.4/kernel/vserver/cacct.c
14405--- linux-4.4/kernel/vserver/cacct.c 1970-01-01 01:00:00.000000000 +0100
14406+++ linux-4.4/kernel/vserver/cacct.c 2021-02-24 16:56:24.609490163 +0100
4bf69007
AM
14407@@ -0,0 +1,42 @@
14408+/*
14409+ * linux/kernel/vserver/cacct.c
14410+ *
14411+ * Virtual Server: Context Accounting
14412+ *
c2806d43 14413+ * Copyright (C) 2006-2007 Herbert P?tzl
4bf69007
AM
14414+ *
14415+ * V0.01 added accounting stats
14416+ *
14417+ */
d337f35e 14418+
4bf69007
AM
14419+#include <linux/types.h>
14420+#include <linux/vs_context.h>
14421+#include <linux/vserver/cacct_cmd.h>
14422+#include <linux/vserver/cacct_int.h>
d337f35e 14423+
4bf69007
AM
14424+#include <asm/errno.h>
14425+#include <asm/uaccess.h>
14426+
14427+
14428+int vc_sock_stat(struct vx_info *vxi, void __user *data)
d337f35e 14429+{
4bf69007
AM
14430+ struct vcmd_sock_stat_v0 vc_data;
14431+ int j, field;
d337f35e 14432+
2380c486
JR
14433+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
14434+ return -EFAULT;
14435+
4bf69007
AM
14436+ field = vc_data.field;
14437+ if ((field < 0) || (field >= VXA_SOCK_SIZE))
14438+ return -EINVAL;
7e46296a 14439+
4bf69007
AM
14440+ for (j = 0; j < 3; j++) {
14441+ vc_data.count[j] = vx_sock_count(&vxi->cacct, field, j);
14442+ vc_data.total[j] = vx_sock_total(&vxi->cacct, field, j);
14443+ }
7e46296a
AM
14444+
14445+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
14446+ return -EFAULT;
14447+ return 0;
14448+}
14449+
c2806d43
AM
14450diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cacct_init.h linux-4.4/kernel/vserver/cacct_init.h
14451--- linux-4.4/kernel/vserver/cacct_init.h 1970-01-01 01:00:00.000000000 +0100
14452+++ linux-4.4/kernel/vserver/cacct_init.h 2021-02-24 16:56:24.609490163 +0100
4bf69007 14453@@ -0,0 +1,25 @@
7e46296a
AM
14454+
14455+
4bf69007 14456+static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
265d6dcc 14457+{
4bf69007 14458+ int i, j;
265d6dcc 14459+
265d6dcc 14460+
4bf69007
AM
14461+ for (i = 0; i < VXA_SOCK_SIZE; i++) {
14462+ for (j = 0; j < 3; j++) {
14463+ atomic_long_set(&cacct->sock[i][j].count, 0);
14464+ atomic_long_set(&cacct->sock[i][j].total, 0);
14465+ }
14466+ }
14467+ for (i = 0; i < 8; i++)
14468+ atomic_set(&cacct->slab[i], 0);
14469+ for (i = 0; i < 5; i++)
14470+ for (j = 0; j < 4; j++)
14471+ atomic_set(&cacct->page[i][j], 0);
265d6dcc
JR
14472+}
14473+
4bf69007 14474+static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
265d6dcc 14475+{
4bf69007 14476+ return;
265d6dcc
JR
14477+}
14478+
c2806d43
AM
14479diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cacct_proc.h linux-4.4/kernel/vserver/cacct_proc.h
14480--- linux-4.4/kernel/vserver/cacct_proc.h 1970-01-01 01:00:00.000000000 +0100
14481+++ linux-4.4/kernel/vserver/cacct_proc.h 2021-02-24 16:56:24.609490163 +0100
4bf69007
AM
14482@@ -0,0 +1,53 @@
14483+#ifndef _VX_CACCT_PROC_H
14484+#define _VX_CACCT_PROC_H
265d6dcc 14485+
4bf69007 14486+#include <linux/vserver/cacct_int.h>
d337f35e 14487+
d337f35e 14488+
4bf69007
AM
14489+#define VX_SOCKA_TOP \
14490+ "Type\t recv #/bytes\t\t send #/bytes\t\t fail #/bytes\n"
d337f35e 14491+
4bf69007 14492+static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
d337f35e 14493+{
4bf69007
AM
14494+ int i, j, length = 0;
14495+ static char *type[VXA_SOCK_SIZE] = {
14496+ "UNSPEC", "UNIX", "INET", "INET6", "PACKET", "OTHER"
14497+ };
d337f35e 14498+
4bf69007
AM
14499+ length += sprintf(buffer + length, VX_SOCKA_TOP);
14500+ for (i = 0; i < VXA_SOCK_SIZE; i++) {
14501+ length += sprintf(buffer + length, "%s:", type[i]);
14502+ for (j = 0; j < 3; j++) {
14503+ length += sprintf(buffer + length,
14504+ "\t%10lu/%-10lu",
14505+ vx_sock_count(cacct, i, j),
14506+ vx_sock_total(cacct, i, j));
14507+ }
14508+ buffer[length++] = '\n';
14509+ }
d337f35e 14510+
4bf69007
AM
14511+ length += sprintf(buffer + length, "\n");
14512+ length += sprintf(buffer + length,
14513+ "slab:\t %8u %8u %8u %8u\n",
14514+ atomic_read(&cacct->slab[1]),
14515+ atomic_read(&cacct->slab[4]),
14516+ atomic_read(&cacct->slab[0]),
14517+ atomic_read(&cacct->slab[2]));
d337f35e 14518+
4bf69007
AM
14519+ length += sprintf(buffer + length, "\n");
14520+ for (i = 0; i < 5; i++) {
14521+ length += sprintf(buffer + length,
14522+ "page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n", i,
14523+ atomic_read(&cacct->page[i][0]),
14524+ atomic_read(&cacct->page[i][1]),
14525+ atomic_read(&cacct->page[i][2]),
14526+ atomic_read(&cacct->page[i][3]),
14527+ atomic_read(&cacct->page[i][4]),
14528+ atomic_read(&cacct->page[i][5]),
14529+ atomic_read(&cacct->page[i][6]),
14530+ atomic_read(&cacct->page[i][7]));
14531+ }
14532+ return length;
14533+}
d337f35e 14534+
4bf69007 14535+#endif /* _VX_CACCT_PROC_H */
c2806d43
AM
14536diff -urNp -x '*.orig' linux-4.4/kernel/vserver/context.c linux-4.4/kernel/vserver/context.c
14537--- linux-4.4/kernel/vserver/context.c 1970-01-01 01:00:00.000000000 +0100
14538+++ linux-4.4/kernel/vserver/context.c 2021-02-24 16:56:24.609490163 +0100
4bf69007 14539@@ -0,0 +1,1119 @@
2380c486 14540+/*
4bf69007 14541+ * linux/kernel/vserver/context.c
2380c486 14542+ *
4bf69007 14543+ * Virtual Server: Context Support
2380c486 14544+ *
c2806d43 14545+ * Copyright (C) 2003-2011 Herbert P?tzl
2380c486 14546+ *
4bf69007
AM
14547+ * V0.01 context helper
14548+ * V0.02 vx_ctx_kill syscall command
14549+ * V0.03 replaced context_info calls
14550+ * V0.04 redesign of struct (de)alloc
14551+ * V0.05 rlimit basic implementation
14552+ * V0.06 task_xid and info commands
14553+ * V0.07 context flags and caps
14554+ * V0.08 switch to RCU based hash
14555+ * V0.09 revert to non RCU for now
14556+ * V0.10 and back to working RCU hash
14557+ * V0.11 and back to locking again
14558+ * V0.12 referenced context store
14559+ * V0.13 separate per cpu data
14560+ * V0.14 changed vcmds to vxi arg
14561+ * V0.15 added context stat
14562+ * V0.16 have __create claim() the vxi
14563+ * V0.17 removed older and legacy stuff
14564+ * V0.18 added user credentials
14565+ * V0.19 added warn mask
2380c486
JR
14566+ *
14567+ */
d337f35e 14568+
4bf69007 14569+#include <linux/slab.h>
2380c486 14570+#include <linux/types.h>
4bf69007
AM
14571+#include <linux/security.h>
14572+#include <linux/pid_namespace.h>
14573+#include <linux/capability.h>
1e8b8f9b 14574+
4bf69007
AM
14575+#include <linux/vserver/context.h>
14576+#include <linux/vserver/network.h>
14577+#include <linux/vserver/debug.h>
14578+#include <linux/vserver/limit.h>
14579+#include <linux/vserver/limit_int.h>
14580+#include <linux/vserver/space.h>
14581+#include <linux/init_task.h>
14582+#include <linux/fs_struct.h>
14583+#include <linux/cred.h>
1e8b8f9b 14584+
4bf69007
AM
14585+#include <linux/vs_context.h>
14586+#include <linux/vs_limit.h>
14587+#include <linux/vs_pid.h>
14588+#include <linux/vserver/context_cmd.h>
d337f35e 14589+
4bf69007
AM
14590+#include "cvirt_init.h"
14591+#include "cacct_init.h"
14592+#include "limit_init.h"
14593+#include "sched_init.h"
d337f35e 14594+
d337f35e 14595+
4bf69007
AM
14596+atomic_t vx_global_ctotal = ATOMIC_INIT(0);
14597+atomic_t vx_global_cactive = ATOMIC_INIT(0);
d337f35e 14598+
d337f35e 14599+
4bf69007 14600+/* now inactive context structures */
d337f35e 14601+
4bf69007 14602+static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
2380c486 14603+
4bf69007 14604+static DEFINE_SPINLOCK(vx_info_inactive_lock);
d337f35e 14605+
2380c486 14606+
4bf69007 14607+/* __alloc_vx_info()
d337f35e 14608+
4bf69007
AM
14609+ * allocate an initialized vx_info struct
14610+ * doesn't make it visible (hash) */
d337f35e 14611+
61333608 14612+static struct vx_info *__alloc_vx_info(vxid_t xid)
4bf69007
AM
14613+{
14614+ struct vx_info *new = NULL;
14615+ int cpu, index;
d337f35e 14616+
4bf69007 14617+ vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
d337f35e 14618+
4bf69007
AM
14619+ /* would this benefit from a slab cache? */
14620+ new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
14621+ if (!new)
14622+ return 0;
2380c486 14623+
4bf69007
AM
14624+ memset(new, 0, sizeof(struct vx_info));
14625+#ifdef CONFIG_SMP
14626+ new->ptr_pc = alloc_percpu(struct _vx_info_pc);
14627+ if (!new->ptr_pc)
14628+ goto error;
14629+#endif
14630+ new->vx_id = xid;
14631+ INIT_HLIST_NODE(&new->vx_hlist);
14632+ atomic_set(&new->vx_usecnt, 0);
14633+ atomic_set(&new->vx_tasks, 0);
14634+ new->vx_parent = NULL;
14635+ new->vx_state = 0;
14636+ init_waitqueue_head(&new->vx_wait);
2380c486 14637+
4bf69007
AM
14638+ /* prepare reaper */
14639+ get_task_struct(init_pid_ns.child_reaper);
14640+ new->vx_reaper = init_pid_ns.child_reaper;
14641+ new->vx_badness_bias = 0;
d337f35e 14642+
4bf69007
AM
14643+ /* rest of init goes here */
14644+ vx_info_init_limit(&new->limit);
14645+ vx_info_init_sched(&new->sched);
14646+ vx_info_init_cvirt(&new->cvirt);
14647+ vx_info_init_cacct(&new->cacct);
d337f35e 14648+
4bf69007
AM
14649+ /* per cpu data structures */
14650+ for_each_possible_cpu(cpu) {
14651+ vx_info_init_sched_pc(
14652+ &vx_per_cpu(new, sched_pc, cpu), cpu);
14653+ vx_info_init_cvirt_pc(
14654+ &vx_per_cpu(new, cvirt_pc, cpu), cpu);
14655+ }
d337f35e 14656+
4bf69007
AM
14657+ new->vx_flags = VXF_INIT_SET;
14658+ new->vx_bcaps = CAP_FULL_SET; // maybe ~CAP_SETPCAP
14659+ new->vx_ccaps = 0;
14660+ new->vx_umask = 0;
14661+ new->vx_wmask = 0;
d337f35e 14662+
4bf69007
AM
14663+ new->reboot_cmd = 0;
14664+ new->exit_code = 0;
d337f35e 14665+
4bf69007
AM
14666+ // preconfig spaces
14667+ for (index = 0; index < VX_SPACES; index++) {
14668+ struct _vx_space *space = &new->space[index];
d337f35e 14669+
4bf69007
AM
14670+ // filesystem
14671+ spin_lock(&init_fs.lock);
14672+ init_fs.users++;
14673+ spin_unlock(&init_fs.lock);
14674+ space->vx_fs = &init_fs;
2380c486 14675+
4bf69007
AM
14676+ /* FIXME: do we want defaults? */
14677+ // space->vx_real_cred = 0;
14678+ // space->vx_cred = 0;
2380c486 14679+ }
4bf69007
AM
14680+
14681+
14682+ vxdprintk(VXD_CBIT(xid, 0),
14683+ "alloc_vx_info(%d) = %p", xid, new);
14684+ vxh_alloc_vx_info(new);
14685+ atomic_inc(&vx_global_ctotal);
14686+ return new;
14687+#ifdef CONFIG_SMP
14688+error:
14689+ kfree(new);
14690+ return 0;
14691+#endif
d337f35e
JR
14692+}
14693+
4bf69007 14694+/* __dealloc_vx_info()
d337f35e 14695+
4bf69007 14696+ * final disposal of vx_info */
d337f35e 14697+
4bf69007 14698+static void __dealloc_vx_info(struct vx_info *vxi)
d337f35e 14699+{
4bf69007
AM
14700+#ifdef CONFIG_VSERVER_WARN
14701+ struct vx_info_save vxis;
14702+ int cpu;
14703+#endif
14704+ vxdprintk(VXD_CBIT(xid, 0),
14705+ "dealloc_vx_info(%p)", vxi);
14706+ vxh_dealloc_vx_info(vxi);
d337f35e 14707+
4bf69007
AM
14708+#ifdef CONFIG_VSERVER_WARN
14709+ enter_vx_info(vxi, &vxis);
14710+ vx_info_exit_limit(&vxi->limit);
14711+ vx_info_exit_sched(&vxi->sched);
14712+ vx_info_exit_cvirt(&vxi->cvirt);
14713+ vx_info_exit_cacct(&vxi->cacct);
d337f35e 14714+
4bf69007
AM
14715+ for_each_possible_cpu(cpu) {
14716+ vx_info_exit_sched_pc(
14717+ &vx_per_cpu(vxi, sched_pc, cpu), cpu);
14718+ vx_info_exit_cvirt_pc(
14719+ &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
14720+ }
14721+ leave_vx_info(&vxis);
14722+#endif
d337f35e 14723+
4bf69007
AM
14724+ vxi->vx_id = -1;
14725+ vxi->vx_state |= VXS_RELEASED;
d337f35e 14726+
4bf69007
AM
14727+#ifdef CONFIG_SMP
14728+ free_percpu(vxi->ptr_pc);
14729+#endif
14730+ kfree(vxi);
14731+ atomic_dec(&vx_global_ctotal);
d337f35e
JR
14732+}
14733+
4bf69007 14734+static void __shutdown_vx_info(struct vx_info *vxi)
d337f35e 14735+{
4bf69007
AM
14736+ struct nsproxy *nsproxy;
14737+ struct fs_struct *fs;
14738+ struct cred *cred;
14739+ int index, kill;
d337f35e 14740+
4bf69007 14741+ might_sleep();
d337f35e 14742+
4bf69007
AM
14743+ vxi->vx_state |= VXS_SHUTDOWN;
14744+ vs_state_change(vxi, VSC_SHUTDOWN);
d337f35e 14745+
4bf69007
AM
14746+ for (index = 0; index < VX_SPACES; index++) {
14747+ struct _vx_space *space = &vxi->space[index];
d337f35e 14748+
4bf69007
AM
14749+ nsproxy = xchg(&space->vx_nsproxy, NULL);
14750+ if (nsproxy)
14751+ put_nsproxy(nsproxy);
2380c486 14752+
4bf69007
AM
14753+ fs = xchg(&space->vx_fs, NULL);
14754+ spin_lock(&fs->lock);
14755+ kill = !--fs->users;
14756+ spin_unlock(&fs->lock);
14757+ if (kill)
14758+ free_fs_struct(fs);
d337f35e 14759+
4bf69007
AM
14760+ cred = (struct cred *)xchg(&space->vx_cred, NULL);
14761+ if (cred)
14762+ abort_creds(cred);
14763+ }
d337f35e
JR
14764+}
14765+
4bf69007 14766+/* exported stuff */
d337f35e 14767+
4bf69007 14768+void free_vx_info(struct vx_info *vxi)
d337f35e 14769+{
4bf69007
AM
14770+ unsigned long flags;
14771+ unsigned index;
d337f35e 14772+
4bf69007
AM
14773+ /* check for reference counts first */
14774+ BUG_ON(atomic_read(&vxi->vx_usecnt));
14775+ BUG_ON(atomic_read(&vxi->vx_tasks));
2380c486 14776+
4bf69007
AM
14777+ /* context must not be hashed */
14778+ BUG_ON(vx_info_state(vxi, VXS_HASHED));
d337f35e 14779+
4bf69007
AM
14780+ /* context shutdown is mandatory */
14781+ BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
d337f35e 14782+
4bf69007
AM
14783+ /* spaces check */
14784+ for (index = 0; index < VX_SPACES; index++) {
14785+ struct _vx_space *space = &vxi->space[index];
d337f35e 14786+
4bf69007
AM
14787+ BUG_ON(space->vx_nsproxy);
14788+ BUG_ON(space->vx_fs);
14789+ // BUG_ON(space->vx_real_cred);
14790+ // BUG_ON(space->vx_cred);
14791+ }
d337f35e 14792+
4bf69007
AM
14793+ spin_lock_irqsave(&vx_info_inactive_lock, flags);
14794+ hlist_del(&vxi->vx_hlist);
14795+ spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
d337f35e 14796+
4bf69007
AM
14797+ __dealloc_vx_info(vxi);
14798+}
eab5a9a6 14799+
d337f35e 14800+
4bf69007 14801+/* hash table for vx_info hash */
93de0823 14802+
4bf69007 14803+#define VX_HASH_SIZE 13
d337f35e 14804+
4bf69007
AM
14805+static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
14806+ { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
d337f35e 14807+
4bf69007 14808+static DEFINE_SPINLOCK(vx_info_hash_lock);
d337f35e 14809+
93de0823 14810+
61333608 14811+static inline unsigned int __hashval(vxid_t xid)
4bf69007
AM
14812+{
14813+ return (xid % VX_HASH_SIZE);
d337f35e
JR
14814+}
14815+
14816+
d337f35e 14817+
4bf69007 14818+/* __hash_vx_info()
d337f35e 14819+
4bf69007
AM
14820+ * add the vxi to the global hash table
14821+ * requires the hash_lock to be held */
d337f35e 14822+
4bf69007 14823+static inline void __hash_vx_info(struct vx_info *vxi)
d337f35e 14824+{
4bf69007 14825+ struct hlist_head *head;
d337f35e 14826+
4bf69007
AM
14827+ vxd_assert_lock(&vx_info_hash_lock);
14828+ vxdprintk(VXD_CBIT(xid, 4),
14829+ "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
14830+ vxh_hash_vx_info(vxi);
d337f35e 14831+
4bf69007
AM
14832+ /* context must not be hashed */
14833+ BUG_ON(vx_info_state(vxi, VXS_HASHED));
d337f35e 14834+
4bf69007
AM
14835+ vxi->vx_state |= VXS_HASHED;
14836+ head = &vx_info_hash[__hashval(vxi->vx_id)];
14837+ hlist_add_head(&vxi->vx_hlist, head);
14838+ atomic_inc(&vx_global_cactive);
2380c486 14839+}
d337f35e 14840+
4bf69007 14841+/* __unhash_vx_info()
d337f35e 14842+
4bf69007
AM
14843+ * remove the vxi from the global hash table
14844+ * requires the hash_lock to be held */
d337f35e 14845+
4bf69007 14846+static inline void __unhash_vx_info(struct vx_info *vxi)
d337f35e 14847+{
4bf69007
AM
14848+ unsigned long flags;
14849+
14850+ vxd_assert_lock(&vx_info_hash_lock);
14851+ vxdprintk(VXD_CBIT(xid, 4),
14852+ "__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
14853+ atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
14854+ vxh_unhash_vx_info(vxi);
14855+
14856+ /* context must be hashed */
14857+ BUG_ON(!vx_info_state(vxi, VXS_HASHED));
14858+ /* but without tasks */
14859+ BUG_ON(atomic_read(&vxi->vx_tasks));
14860+
14861+ vxi->vx_state &= ~VXS_HASHED;
14862+ hlist_del_init(&vxi->vx_hlist);
14863+ spin_lock_irqsave(&vx_info_inactive_lock, flags);
14864+ hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
14865+ spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
14866+ atomic_dec(&vx_global_cactive);
2380c486 14867+}
d337f35e 14868+
d337f35e 14869+
4bf69007 14870+/* __lookup_vx_info()
d337f35e 14871+
4bf69007
AM
14872+ * requires the hash_lock to be held
14873+ * doesn't increment the vx_refcnt */
2380c486 14874+
61333608 14875+static inline struct vx_info *__lookup_vx_info(vxid_t xid)
d337f35e 14876+{
4bf69007
AM
14877+ struct hlist_head *head = &vx_info_hash[__hashval(xid)];
14878+ struct hlist_node *pos;
14879+ struct vx_info *vxi;
d337f35e 14880+
4bf69007
AM
14881+ vxd_assert_lock(&vx_info_hash_lock);
14882+ hlist_for_each(pos, head) {
14883+ vxi = hlist_entry(pos, struct vx_info, vx_hlist);
d337f35e 14884+
4bf69007
AM
14885+ if (vxi->vx_id == xid)
14886+ goto found;
14887+ }
14888+ vxi = NULL;
14889+found:
14890+ vxdprintk(VXD_CBIT(xid, 0),
14891+ "__lookup_vx_info(#%u): %p[#%u]",
14892+ xid, vxi, vxi ? vxi->vx_id : 0);
14893+ vxh_lookup_vx_info(vxi, xid);
14894+ return vxi;
14895+}
d337f35e 14896+
d337f35e 14897+
4bf69007 14898+/* __create_vx_info()
d337f35e 14899+
4bf69007
AM
14900+ * create the requested context
14901+ * get(), claim() and hash it */
2380c486 14902+
4bf69007
AM
14903+static struct vx_info *__create_vx_info(int id)
14904+{
14905+ struct vx_info *new, *vxi = NULL;
2380c486 14906+
4bf69007 14907+ vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
d337f35e 14908+
4bf69007
AM
14909+ if (!(new = __alloc_vx_info(id)))
14910+ return ERR_PTR(-ENOMEM);
d337f35e 14911+
4bf69007
AM
14912+ /* required to make dynamic xids unique */
14913+ spin_lock(&vx_info_hash_lock);
d337f35e 14914+
4bf69007
AM
14915+ /* static context requested */
14916+ if ((vxi = __lookup_vx_info(id))) {
14917+ vxdprintk(VXD_CBIT(xid, 0),
14918+ "create_vx_info(%d) = %p (already there)", id, vxi);
14919+ if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
14920+ vxi = ERR_PTR(-EBUSY);
14921+ else
14922+ vxi = ERR_PTR(-EEXIST);
14923+ goto out_unlock;
14924+ }
14925+ /* new context */
14926+ vxdprintk(VXD_CBIT(xid, 0),
14927+ "create_vx_info(%d) = %p (new)", id, new);
14928+ claim_vx_info(new, NULL);
14929+ __hash_vx_info(get_vx_info(new));
14930+ vxi = new, new = NULL;
d337f35e 14931+
4bf69007
AM
14932+out_unlock:
14933+ spin_unlock(&vx_info_hash_lock);
14934+ vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
14935+ if (new)
14936+ __dealloc_vx_info(new);
14937+ return vxi;
14938+}
d337f35e 14939+
d337f35e 14940+
4bf69007 14941+/* exported stuff */
d337f35e 14942+
d337f35e 14943+
4bf69007 14944+void unhash_vx_info(struct vx_info *vxi)
d337f35e 14945+{
4bf69007
AM
14946+ spin_lock(&vx_info_hash_lock);
14947+ __unhash_vx_info(vxi);
14948+ spin_unlock(&vx_info_hash_lock);
14949+ __shutdown_vx_info(vxi);
14950+ __wakeup_vx_info(vxi);
2380c486 14951+}
d337f35e 14952+
2380c486 14953+
4bf69007 14954+/* lookup_vx_info()
2380c486 14955+
4bf69007
AM
14956+ * search for a vx_info and get() it
14957+ * negative id means current */
2380c486 14958+
4bf69007 14959+struct vx_info *lookup_vx_info(int id)
2380c486 14960+{
4bf69007
AM
14961+ struct vx_info *vxi = NULL;
14962+
14963+ if (id < 0) {
14964+ vxi = get_vx_info(current_vx_info());
14965+ } else if (id > 1) {
14966+ spin_lock(&vx_info_hash_lock);
14967+ vxi = get_vx_info(__lookup_vx_info(id));
14968+ spin_unlock(&vx_info_hash_lock);
2380c486 14969+ }
4bf69007 14970+ return vxi;
d337f35e
JR
14971+}
14972+
4bf69007 14973+/* xid_is_hashed()
d337f35e 14974+
4bf69007 14975+ * verify that xid is still hashed */
d337f35e 14976+
61333608 14977+int xid_is_hashed(vxid_t xid)
4bf69007
AM
14978+{
14979+ int hashed;
d337f35e 14980+
4bf69007
AM
14981+ spin_lock(&vx_info_hash_lock);
14982+ hashed = (__lookup_vx_info(xid) != NULL);
14983+ spin_unlock(&vx_info_hash_lock);
14984+ return hashed;
14985+}
d337f35e 14986+
4bf69007 14987+#ifdef CONFIG_PROC_FS
d337f35e 14988+
4bf69007 14989+/* get_xid_list()
d337f35e 14990+
4bf69007
AM
14991+ * get a subset of hashed xids for proc
14992+ * assumes size is at least one */
d337f35e 14993+
4bf69007
AM
14994+int get_xid_list(int index, unsigned int *xids, int size)
14995+{
14996+ int hindex, nr_xids = 0;
d337f35e 14997+
4bf69007
AM
14998+ /* only show current and children */
14999+ if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
15000+ if (index > 0)
15001+ return 0;
15002+ xids[nr_xids] = vx_current_xid();
15003+ return 1;
15004+ }
d337f35e 15005+
4bf69007
AM
15006+ for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
15007+ struct hlist_head *head = &vx_info_hash[hindex];
15008+ struct hlist_node *pos;
d337f35e 15009+
4bf69007
AM
15010+ spin_lock(&vx_info_hash_lock);
15011+ hlist_for_each(pos, head) {
15012+ struct vx_info *vxi;
d337f35e 15013+
4bf69007
AM
15014+ if (--index > 0)
15015+ continue;
d337f35e 15016+
4bf69007
AM
15017+ vxi = hlist_entry(pos, struct vx_info, vx_hlist);
15018+ xids[nr_xids] = vxi->vx_id;
15019+ if (++nr_xids >= size) {
15020+ spin_unlock(&vx_info_hash_lock);
15021+ goto out;
15022+ }
15023+ }
15024+ /* keep the lock time short */
15025+ spin_unlock(&vx_info_hash_lock);
15026+ }
15027+out:
15028+ return nr_xids;
15029+}
15030+#endif
d337f35e 15031+
4bf69007 15032+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 15033+
4bf69007 15034+void dump_vx_info_inactive(int level)
d337f35e 15035+{
4bf69007 15036+ struct hlist_node *entry, *next;
d337f35e 15037+
4bf69007
AM
15038+ hlist_for_each_safe(entry, next, &vx_info_inactive) {
15039+ struct vx_info *vxi =
15040+ list_entry(entry, struct vx_info, vx_hlist);
d337f35e 15041+
4bf69007
AM
15042+ dump_vx_info(vxi, level);
15043+ }
d337f35e
JR
15044+}
15045+
4bf69007 15046+#endif
d337f35e 15047+
4bf69007
AM
15048+#if 0
15049+int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
d337f35e 15050+{
4bf69007 15051+ struct user_struct *new_user, *old_user;
d337f35e 15052+
4bf69007
AM
15053+ if (!p || !vxi)
15054+ BUG();
d337f35e 15055+
4bf69007
AM
15056+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
15057+ return -EACCES;
d337f35e 15058+
4bf69007
AM
15059+ new_user = alloc_uid(vxi->vx_id, p->uid);
15060+ if (!new_user)
15061+ return -ENOMEM;
d337f35e 15062+
4bf69007
AM
15063+ old_user = p->user;
15064+ if (new_user != old_user) {
15065+ atomic_inc(&new_user->processes);
15066+ atomic_dec(&old_user->processes);
15067+ p->user = new_user;
d337f35e 15068+ }
4bf69007
AM
15069+ free_uid(old_user);
15070+ return 0;
d337f35e 15071+}
4bf69007 15072+#endif
d337f35e 15073+
4bf69007
AM
15074+#if 0
15075+void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
d337f35e 15076+{
4bf69007
AM
15077+ // p->cap_effective &= vxi->vx_cap_bset;
15078+ p->cap_effective =
15079+ cap_intersect(p->cap_effective, vxi->cap_bset);
15080+ // p->cap_inheritable &= vxi->vx_cap_bset;
15081+ p->cap_inheritable =
15082+ cap_intersect(p->cap_inheritable, vxi->cap_bset);
15083+ // p->cap_permitted &= vxi->vx_cap_bset;
15084+ p->cap_permitted =
15085+ cap_intersect(p->cap_permitted, vxi->cap_bset);
15086+}
15087+#endif
d337f35e
JR
15088+
15089+
4bf69007
AM
15090+#include <linux/file.h>
15091+#include <linux/fdtable.h>
d337f35e 15092+
4bf69007
AM
15093+static int vx_openfd_task(struct task_struct *tsk)
15094+{
15095+ struct files_struct *files = tsk->files;
15096+ struct fdtable *fdt;
15097+ const unsigned long *bptr;
15098+ int count, total;
d337f35e 15099+
4bf69007
AM
15100+ /* no rcu_read_lock() because of spin_lock() */
15101+ spin_lock(&files->file_lock);
15102+ fdt = files_fdtable(files);
15103+ bptr = fdt->open_fds;
15104+ count = fdt->max_fds / (sizeof(unsigned long) * 8);
15105+ for (total = 0; count > 0; count--) {
15106+ if (*bptr)
15107+ total += hweight_long(*bptr);
15108+ bptr++;
15109+ }
15110+ spin_unlock(&files->file_lock);
15111+ return total;
d337f35e
JR
15112+}
15113+
d337f35e 15114+
4bf69007
AM
15115+/* for *space compatibility */
15116+
15117+asmlinkage long sys_unshare(unsigned long);
15118+
15119+/*
15120+ * migrate task to new context
15121+ * gets vxi, puts old_vxi on change
15122+ * optionally unshares namespaces (hack)
2380c486 15123+ */
4bf69007
AM
15124+
15125+int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
2380c486 15126+{
4bf69007
AM
15127+ struct vx_info *old_vxi;
15128+ int ret = 0;
d337f35e 15129+
4bf69007
AM
15130+ if (!p || !vxi)
15131+ BUG();
d337f35e 15132+
4bf69007
AM
15133+ vxdprintk(VXD_CBIT(xid, 5),
15134+ "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
15135+ vxi->vx_id, atomic_read(&vxi->vx_usecnt));
d337f35e 15136+
4bf69007
AM
15137+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
15138+ !vx_info_flags(vxi, VXF_STATE_SETUP, 0))
15139+ return -EACCES;
2380c486 15140+
4bf69007
AM
15141+ if (vx_info_state(vxi, VXS_SHUTDOWN))
15142+ return -EFAULT;
d337f35e 15143+
4bf69007
AM
15144+ old_vxi = task_get_vx_info(p);
15145+ if (old_vxi == vxi)
15146+ goto out;
d337f35e 15147+
4bf69007
AM
15148+// if (!(ret = vx_migrate_user(p, vxi))) {
15149+ {
15150+ int openfd;
d337f35e 15151+
4bf69007
AM
15152+ task_lock(p);
15153+ openfd = vx_openfd_task(p);
15154+
15155+ if (old_vxi) {
15156+ atomic_dec(&old_vxi->cvirt.nr_threads);
15157+ atomic_dec(&old_vxi->cvirt.nr_running);
15158+ __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
15159+ /* FIXME: what about the struct files here? */
15160+ __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
15161+ /* account for the executable */
15162+ __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
2380c486 15163+ }
4bf69007
AM
15164+ atomic_inc(&vxi->cvirt.nr_threads);
15165+ atomic_inc(&vxi->cvirt.nr_running);
15166+ __rlim_inc(&vxi->limit, RLIMIT_NPROC);
15167+ /* FIXME: what about the struct files here? */
15168+ __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
15169+ /* account for the executable */
15170+ __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
2380c486 15171+
4bf69007
AM
15172+ if (old_vxi) {
15173+ release_vx_info(old_vxi, p);
15174+ clr_vx_info(&p->vx_info);
15175+ }
15176+ claim_vx_info(vxi, p);
15177+ set_vx_info(&p->vx_info, vxi);
15178+ p->xid = vxi->vx_id;
d337f35e 15179+
4bf69007
AM
15180+ vxdprintk(VXD_CBIT(xid, 5),
15181+ "moved task %p into vxi:%p[#%d]",
15182+ p, vxi, vxi->vx_id);
d337f35e 15183+
4bf69007
AM
15184+ // vx_mask_cap_bset(vxi, p);
15185+ task_unlock(p);
d337f35e 15186+
4bf69007
AM
15187+ /* hack for *spaces to provide compatibility */
15188+ if (unshare) {
15189+ struct nsproxy *old_nsp, *new_nsp;
d337f35e 15190+
4bf69007
AM
15191+ ret = unshare_nsproxy_namespaces(
15192+ CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER,
b00e13aa 15193+ &new_nsp, NULL, NULL);
4bf69007
AM
15194+ if (ret)
15195+ goto out;
d337f35e 15196+
4bf69007
AM
15197+ old_nsp = xchg(&p->nsproxy, new_nsp);
15198+ vx_set_space(vxi,
15199+ CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER, 0);
15200+ put_nsproxy(old_nsp);
15201+ }
15202+ }
15203+out:
15204+ put_vx_info(old_vxi);
2380c486
JR
15205+ return ret;
15206+}
d337f35e 15207+
4bf69007 15208+int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
d337f35e 15209+{
4bf69007
AM
15210+ struct task_struct *old_reaper;
15211+ struct vx_info *reaper_vxi;
d337f35e 15212+
4bf69007
AM
15213+ if (!vxi)
15214+ return -EINVAL;
d337f35e 15215+
4bf69007
AM
15216+ vxdprintk(VXD_CBIT(xid, 6),
15217+ "vx_set_reaper(%p[#%d],%p[#%d,%d])",
15218+ vxi, vxi->vx_id, p, p->xid, p->pid);
d337f35e 15219+
4bf69007
AM
15220+ old_reaper = vxi->vx_reaper;
15221+ if (old_reaper == p)
15222+ return 0;
d337f35e 15223+
4bf69007
AM
15224+ reaper_vxi = task_get_vx_info(p);
15225+ if (reaper_vxi && reaper_vxi != vxi) {
15226+ vxwprintk(1,
15227+ "Unsuitable reaper [" VS_Q("%s") ",%u:#%u] "
15228+ "for [xid #%u]",
15229+ p->comm, p->pid, p->xid, vx_current_xid());
2380c486
JR
15230+ goto out;
15231+ }
4bf69007
AM
15232+
15233+ /* set new child reaper */
15234+ get_task_struct(p);
15235+ vxi->vx_reaper = p;
15236+ put_task_struct(old_reaper);
2380c486 15237+out:
4bf69007
AM
15238+ put_vx_info(reaper_vxi);
15239+ return 0;
2380c486 15240+}
d337f35e 15241+
4bf69007 15242+int vx_set_init(struct vx_info *vxi, struct task_struct *p)
d337f35e 15243+{
4bf69007
AM
15244+ if (!vxi)
15245+ return -EINVAL;
d337f35e 15246+
4bf69007
AM
15247+ vxdprintk(VXD_CBIT(xid, 6),
15248+ "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
15249+ vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
d337f35e 15250+
4bf69007
AM
15251+ vxi->vx_flags &= ~VXF_STATE_INIT;
15252+ // vxi->vx_initpid = p->tgid;
15253+ vxi->vx_initpid = p->pid;
2380c486 15254+ return 0;
d337f35e
JR
15255+}
15256+
4bf69007 15257+void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
d337f35e 15258+{
4bf69007
AM
15259+ vxdprintk(VXD_CBIT(xid, 6),
15260+ "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
15261+ vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
2380c486 15262+
4bf69007
AM
15263+ vxi->exit_code = code;
15264+ vxi->vx_initpid = 0;
d337f35e
JR
15265+}
15266+
2380c486 15267+
4bf69007 15268+void vx_set_persistent(struct vx_info *vxi)
d337f35e 15269+{
4bf69007
AM
15270+ vxdprintk(VXD_CBIT(xid, 6),
15271+ "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
2380c486 15272+
4bf69007
AM
15273+ get_vx_info(vxi);
15274+ claim_vx_info(vxi, NULL);
d337f35e
JR
15275+}
15276+
4bf69007 15277+void vx_clear_persistent(struct vx_info *vxi)
2380c486 15278+{
4bf69007
AM
15279+ vxdprintk(VXD_CBIT(xid, 6),
15280+ "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
d337f35e 15281+
4bf69007
AM
15282+ release_vx_info(vxi, NULL);
15283+ put_vx_info(vxi);
2380c486 15284+}
d337f35e 15285+
4bf69007 15286+void vx_update_persistent(struct vx_info *vxi)
d337f35e 15287+{
4bf69007
AM
15288+ if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
15289+ vx_set_persistent(vxi);
2380c486 15290+ else
4bf69007 15291+ vx_clear_persistent(vxi);
2380c486 15292+}
d337f35e 15293+
d337f35e 15294+
4bf69007
AM
15295+/* task must be current or locked */
15296+
15297+void exit_vx_info(struct task_struct *p, int code)
2380c486 15298+{
4bf69007 15299+ struct vx_info *vxi = p->vx_info;
d337f35e 15300+
4bf69007
AM
15301+ if (vxi) {
15302+ atomic_dec(&vxi->cvirt.nr_threads);
15303+ vx_nproc_dec(p);
d337f35e 15304+
4bf69007
AM
15305+ vxi->exit_code = code;
15306+ release_vx_info(vxi, p);
15307+ }
2380c486 15308+}
d337f35e 15309+
4bf69007 15310+void exit_vx_info_early(struct task_struct *p, int code)
2380c486 15311+{
4bf69007 15312+ struct vx_info *vxi = p->vx_info;
d337f35e 15313+
4bf69007
AM
15314+ if (vxi) {
15315+ if (vxi->vx_initpid == p->pid)
15316+ vx_exit_init(vxi, p, code);
15317+ if (vxi->vx_reaper == p)
15318+ vx_set_reaper(vxi, init_pid_ns.child_reaper);
15319+ }
d337f35e
JR
15320+}
15321+
15322+
4bf69007 15323+/* vserver syscall commands below here */
d337f35e 15324+
4bf69007 15325+/* taks xid and vx_info functions */
d337f35e 15326+
4bf69007 15327+#include <asm/uaccess.h>
d337f35e 15328+
d337f35e 15329+
4bf69007 15330+int vc_task_xid(uint32_t id)
d337f35e 15331+{
61333608 15332+ vxid_t xid;
d337f35e 15333+
4bf69007
AM
15334+ if (id) {
15335+ struct task_struct *tsk;
d337f35e 15336+
4bf69007
AM
15337+ rcu_read_lock();
15338+ tsk = find_task_by_real_pid(id);
15339+ xid = (tsk) ? tsk->xid : -ESRCH;
15340+ rcu_read_unlock();
15341+ } else
15342+ xid = vx_current_xid();
15343+ return xid;
d337f35e
JR
15344+}
15345+
d337f35e 15346+
4bf69007
AM
15347+int vc_vx_info(struct vx_info *vxi, void __user *data)
15348+{
15349+ struct vcmd_vx_info_v0 vc_data;
d337f35e 15350+
4bf69007
AM
15351+ vc_data.xid = vxi->vx_id;
15352+ vc_data.initpid = vxi->vx_initpid;
d337f35e 15353+
4bf69007
AM
15354+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15355+ return -EFAULT;
15356+ return 0;
15357+}
d337f35e 15358+
d337f35e 15359+
4bf69007 15360+int vc_ctx_stat(struct vx_info *vxi, void __user *data)
d337f35e 15361+{
4bf69007 15362+ struct vcmd_ctx_stat_v0 vc_data;
d337f35e 15363+
4bf69007
AM
15364+ vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
15365+ vc_data.tasks = atomic_read(&vxi->vx_tasks);
d337f35e 15366+
4bf69007
AM
15367+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15368+ return -EFAULT;
15369+ return 0;
d337f35e
JR
15370+}
15371+
d337f35e 15372+
4bf69007 15373+/* context functions */
d337f35e 15374+
4bf69007 15375+int vc_ctx_create(uint32_t xid, void __user *data)
d337f35e 15376+{
4bf69007
AM
15377+ struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
15378+ struct vx_info *new_vxi;
15379+ int ret;
d337f35e 15380+
4bf69007
AM
15381+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
15382+ return -EFAULT;
d337f35e 15383+
4bf69007
AM
15384+ if ((xid > MAX_S_CONTEXT) || (xid < 2))
15385+ return -EINVAL;
d337f35e 15386+
4bf69007
AM
15387+ new_vxi = __create_vx_info(xid);
15388+ if (IS_ERR(new_vxi))
15389+ return PTR_ERR(new_vxi);
d337f35e 15390+
4bf69007
AM
15391+ /* initial flags */
15392+ new_vxi->vx_flags = vc_data.flagword;
d337f35e 15393+
4bf69007
AM
15394+ ret = -ENOEXEC;
15395+ if (vs_state_change(new_vxi, VSC_STARTUP))
15396+ goto out;
d337f35e 15397+
4bf69007
AM
15398+ ret = vx_migrate_task(current, new_vxi, (!data));
15399+ if (ret)
15400+ goto out;
d337f35e 15401+
4bf69007
AM
15402+ /* return context id on success */
15403+ ret = new_vxi->vx_id;
d337f35e 15404+
4bf69007
AM
15405+ /* get a reference for persistent contexts */
15406+ if ((vc_data.flagword & VXF_PERSISTENT))
15407+ vx_set_persistent(new_vxi);
15408+out:
15409+ release_vx_info(new_vxi, NULL);
15410+ put_vx_info(new_vxi);
15411+ return ret;
15412+}
d337f35e
JR
15413+
15414+
4bf69007 15415+int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
d337f35e 15416+{
4bf69007
AM
15417+ struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
15418+ int ret;
d337f35e 15419+
4bf69007
AM
15420+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
15421+ return -EFAULT;
d337f35e 15422+
4bf69007
AM
15423+ ret = vx_migrate_task(current, vxi, 0);
15424+ if (ret)
15425+ return ret;
15426+ if (vc_data.flagword & VXM_SET_INIT)
15427+ ret = vx_set_init(vxi, current);
15428+ if (ret)
15429+ return ret;
15430+ if (vc_data.flagword & VXM_SET_REAPER)
15431+ ret = vx_set_reaper(vxi, current);
15432+ return ret;
15433+}
d337f35e 15434+
d337f35e 15435+
4bf69007 15436+int vc_get_cflags(struct vx_info *vxi, void __user *data)
d337f35e 15437+{
4bf69007 15438+ struct vcmd_ctx_flags_v0 vc_data;
d337f35e 15439+
4bf69007 15440+ vc_data.flagword = vxi->vx_flags;
d337f35e 15441+
4bf69007
AM
15442+ /* special STATE flag handling */
15443+ vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
d337f35e 15444+
4bf69007
AM
15445+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15446+ return -EFAULT;
15447+ return 0;
d337f35e
JR
15448+}
15449+
4bf69007
AM
15450+int vc_set_cflags(struct vx_info *vxi, void __user *data)
15451+{
15452+ struct vcmd_ctx_flags_v0 vc_data;
15453+ uint64_t mask, trigger;
d337f35e 15454+
4bf69007
AM
15455+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15456+ return -EFAULT;
d337f35e 15457+
4bf69007
AM
15458+ /* special STATE flag handling */
15459+ mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
15460+ trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
d337f35e 15461+
4bf69007
AM
15462+ if (vxi == current_vx_info()) {
15463+ /* if (trigger & VXF_STATE_SETUP)
15464+ vx_mask_cap_bset(vxi, current); */
15465+ if (trigger & VXF_STATE_INIT) {
15466+ int ret;
d337f35e 15467+
4bf69007
AM
15468+ ret = vx_set_init(vxi, current);
15469+ if (ret)
15470+ return ret;
15471+ ret = vx_set_reaper(vxi, current);
15472+ if (ret)
15473+ return ret;
d337f35e
JR
15474+ }
15475+ }
4bf69007
AM
15476+
15477+ vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
15478+ vc_data.flagword, mask);
15479+ if (trigger & VXF_PERSISTENT)
15480+ vx_update_persistent(vxi);
15481+
15482+ return 0;
d337f35e
JR
15483+}
15484+
15485+
4bf69007 15486+static inline uint64_t caps_from_cap_t(kernel_cap_t c)
d337f35e 15487+{
4bf69007 15488+ uint64_t v = c.cap[0] | ((uint64_t)c.cap[1] << 32);
d337f35e 15489+
4bf69007
AM
15490+ // printk("caps_from_cap_t(%08x:%08x) = %016llx\n", c.cap[1], c.cap[0], v);
15491+ return v;
d337f35e
JR
15492+}
15493+
4bf69007 15494+static inline kernel_cap_t cap_t_from_caps(uint64_t v)
d337f35e 15495+{
4bf69007 15496+ kernel_cap_t c = __cap_empty_set;
d337f35e 15497+
4bf69007
AM
15498+ c.cap[0] = v & 0xFFFFFFFF;
15499+ c.cap[1] = (v >> 32) & 0xFFFFFFFF;
d337f35e 15500+
4bf69007
AM
15501+ // printk("cap_t_from_caps(%016llx) = %08x:%08x\n", v, c.cap[1], c.cap[0]);
15502+ return c;
d337f35e
JR
15503+}
15504+
15505+
4bf69007 15506+static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
d337f35e 15507+{
4bf69007
AM
15508+ if (bcaps)
15509+ *bcaps = caps_from_cap_t(vxi->vx_bcaps);
15510+ if (ccaps)
15511+ *ccaps = vxi->vx_ccaps;
d337f35e 15512+
4bf69007
AM
15513+ return 0;
15514+}
d337f35e 15515+
4bf69007
AM
15516+int vc_get_ccaps(struct vx_info *vxi, void __user *data)
15517+{
15518+ struct vcmd_ctx_caps_v1 vc_data;
15519+ int ret;
d337f35e 15520+
4bf69007
AM
15521+ ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
15522+ if (ret)
15523+ return ret;
15524+ vc_data.cmask = ~0ULL;
d337f35e 15525+
4bf69007
AM
15526+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15527+ return -EFAULT;
15528+ return 0;
d337f35e
JR
15529+}
15530+
4bf69007
AM
15531+static int do_set_caps(struct vx_info *vxi,
15532+ uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
d337f35e 15533+{
4bf69007 15534+ uint64_t bcold = caps_from_cap_t(vxi->vx_bcaps);
d337f35e 15535+
4bf69007
AM
15536+#if 0
15537+ printk("do_set_caps(%16llx, %16llx, %16llx, %16llx)\n",
15538+ bcaps, bmask, ccaps, cmask);
15539+#endif
15540+ vxi->vx_bcaps = cap_t_from_caps(
15541+ vs_mask_flags(bcold, bcaps, bmask));
15542+ vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
d337f35e 15543+
4bf69007 15544+ return 0;
d337f35e
JR
15545+}
15546+
4bf69007 15547+int vc_set_ccaps(struct vx_info *vxi, void __user *data)
d337f35e 15548+{
4bf69007 15549+ struct vcmd_ctx_caps_v1 vc_data;
d337f35e 15550+
2380c486 15551+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15552+ return -EFAULT;
15553+
4bf69007 15554+ return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
d337f35e
JR
15555+}
15556+
4bf69007 15557+int vc_get_bcaps(struct vx_info *vxi, void __user *data)
d337f35e 15558+{
4bf69007
AM
15559+ struct vcmd_bcaps vc_data;
15560+ int ret;
d337f35e 15561+
4bf69007
AM
15562+ ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
15563+ if (ret)
15564+ return ret;
15565+ vc_data.bmask = ~0ULL;
d337f35e 15566+
4bf69007
AM
15567+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15568+ return -EFAULT;
15569+ return 0;
d337f35e
JR
15570+}
15571+
4bf69007 15572+int vc_set_bcaps(struct vx_info *vxi, void __user *data)
d337f35e 15573+{
4bf69007 15574+ struct vcmd_bcaps vc_data;
d337f35e 15575+
2380c486 15576+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15577+ return -EFAULT;
15578+
4bf69007 15579+ return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
d337f35e
JR
15580+}
15581+
d337f35e 15582+
4bf69007 15583+int vc_get_umask(struct vx_info *vxi, void __user *data)
d337f35e 15584+{
4bf69007 15585+ struct vcmd_umask vc_data;
7e46296a 15586+
4bf69007
AM
15587+ vc_data.umask = vxi->vx_umask;
15588+ vc_data.mask = ~0ULL;
d337f35e 15589+
4bf69007
AM
15590+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15591+ return -EFAULT;
15592+ return 0;
15593+}
d337f35e 15594+
4bf69007
AM
15595+int vc_set_umask(struct vx_info *vxi, void __user *data)
15596+{
15597+ struct vcmd_umask vc_data;
d337f35e 15598+
4bf69007
AM
15599+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15600+ return -EFAULT;
7e46296a 15601+
4bf69007
AM
15602+ vxi->vx_umask = vs_mask_flags(vxi->vx_umask,
15603+ vc_data.umask, vc_data.mask);
15604+ return 0;
15605+}
7e46296a 15606+
d337f35e 15607+
4bf69007
AM
15608+int vc_get_wmask(struct vx_info *vxi, void __user *data)
15609+{
15610+ struct vcmd_wmask vc_data;
d337f35e 15611+
4bf69007
AM
15612+ vc_data.wmask = vxi->vx_wmask;
15613+ vc_data.mask = ~0ULL;
d337f35e 15614+
4bf69007
AM
15615+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15616+ return -EFAULT;
15617+ return 0;
d337f35e
JR
15618+}
15619+
4bf69007 15620+int vc_set_wmask(struct vx_info *vxi, void __user *data)
d337f35e 15621+{
4bf69007 15622+ struct vcmd_wmask vc_data;
d337f35e 15623+
2380c486 15624+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15625+ return -EFAULT;
15626+
4bf69007
AM
15627+ vxi->vx_wmask = vs_mask_flags(vxi->vx_wmask,
15628+ vc_data.wmask, vc_data.mask);
15629+ return 0;
d337f35e
JR
15630+}
15631+
d337f35e 15632+
4bf69007 15633+int vc_get_badness(struct vx_info *vxi, void __user *data)
d337f35e 15634+{
4bf69007
AM
15635+ struct vcmd_badness_v0 vc_data;
15636+
15637+ vc_data.bias = vxi->vx_badness_bias;
15638+
15639+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15640+ return -EFAULT;
15641+ return 0;
15642+}
15643+
15644+int vc_set_badness(struct vx_info *vxi, void __user *data)
15645+{
15646+ struct vcmd_badness_v0 vc_data;
d337f35e 15647+
2380c486 15648+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15649+ return -EFAULT;
15650+
4bf69007
AM
15651+ vxi->vx_badness_bias = vc_data.bias;
15652+ return 0;
d337f35e
JR
15653+}
15654+
4bf69007 15655+#include <linux/module.h>
d337f35e 15656+
4bf69007 15657+EXPORT_SYMBOL_GPL(free_vx_info);
d337f35e 15658+
c2806d43
AM
15659diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cvirt.c linux-4.4/kernel/vserver/cvirt.c
15660--- linux-4.4/kernel/vserver/cvirt.c 1970-01-01 01:00:00.000000000 +0100
15661+++ linux-4.4/kernel/vserver/cvirt.c 2021-02-24 16:56:24.609490163 +0100
4bf69007
AM
15662@@ -0,0 +1,313 @@
15663+/*
15664+ * linux/kernel/vserver/cvirt.c
15665+ *
15666+ * Virtual Server: Context Virtualization
15667+ *
c2806d43 15668+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
15669+ *
15670+ * V0.01 broken out from limit.c
15671+ * V0.02 added utsname stuff
15672+ * V0.03 changed vcmds to vxi arg
15673+ *
15674+ */
d337f35e 15675+
4bf69007
AM
15676+#include <linux/types.h>
15677+#include <linux/utsname.h>
15678+#include <linux/vs_cvirt.h>
15679+#include <linux/vserver/switch.h>
15680+#include <linux/vserver/cvirt_cmd.h>
d337f35e 15681+
4bf69007 15682+#include <asm/uaccess.h>
d337f35e 15683+
d337f35e 15684+
4bf69007
AM
15685+void vx_vsi_boottime(struct timespec *boottime)
15686+{
15687+ struct vx_info *vxi = current_vx_info();
d337f35e 15688+
4bf69007
AM
15689+ set_normalized_timespec(boottime,
15690+ boottime->tv_sec + vxi->cvirt.bias_uptime.tv_sec,
15691+ boottime->tv_nsec + vxi->cvirt.bias_uptime.tv_nsec);
15692+ return;
d337f35e
JR
15693+}
15694+
4bf69007 15695+void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
d337f35e 15696+{
4bf69007 15697+ struct vx_info *vxi = current_vx_info();
d337f35e 15698+
4bf69007
AM
15699+ set_normalized_timespec(uptime,
15700+ uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
15701+ uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
15702+ if (!idle)
15703+ return;
15704+ set_normalized_timespec(idle,
15705+ idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
15706+ idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
15707+ return;
d337f35e
JR
15708+}
15709+
4bf69007 15710+uint64_t vx_idle_jiffies(void)
d337f35e 15711+{
4bf69007 15712+ return init_task.utime + init_task.stime;
d337f35e
JR
15713+}
15714+
d337f35e
JR
15715+
15716+
4bf69007
AM
15717+static inline uint32_t __update_loadavg(uint32_t load,
15718+ int wsize, int delta, int n)
d337f35e 15719+{
4bf69007 15720+ unsigned long long calc, prev;
d337f35e 15721+
4bf69007
AM
15722+ /* just set it to n */
15723+ if (unlikely(delta >= wsize))
15724+ return (n << FSHIFT);
d337f35e 15725+
4bf69007
AM
15726+ calc = delta * n;
15727+ calc <<= FSHIFT;
15728+ prev = (wsize - delta);
15729+ prev *= load;
15730+ calc += prev;
15731+ do_div(calc, wsize);
15732+ return calc;
15733+}
d337f35e 15734+
d337f35e 15735+
4bf69007
AM
15736+void vx_update_load(struct vx_info *vxi)
15737+{
15738+ uint32_t now, last, delta;
15739+ unsigned int nr_running, nr_uninterruptible;
15740+ unsigned int total;
15741+ unsigned long flags;
d337f35e 15742+
4bf69007 15743+ spin_lock_irqsave(&vxi->cvirt.load_lock, flags);
d337f35e 15744+
4bf69007
AM
15745+ now = jiffies;
15746+ last = vxi->cvirt.load_last;
15747+ delta = now - last;
d337f35e 15748+
4bf69007
AM
15749+ if (delta < 5*HZ)
15750+ goto out;
d337f35e 15751+
4bf69007
AM
15752+ nr_running = atomic_read(&vxi->cvirt.nr_running);
15753+ nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
15754+ total = nr_running + nr_uninterruptible;
d337f35e 15755+
4bf69007
AM
15756+ vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
15757+ 60*HZ, delta, total);
15758+ vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
15759+ 5*60*HZ, delta, total);
15760+ vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
15761+ 15*60*HZ, delta, total);
d337f35e 15762+
4bf69007
AM
15763+ vxi->cvirt.load_last = now;
15764+out:
15765+ atomic_inc(&vxi->cvirt.load_updates);
15766+ spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags);
d337f35e
JR
15767+}
15768+
d337f35e 15769+
d337f35e 15770+/*
4bf69007 15771+ * Commands to do_syslog:
d337f35e 15772+ *
4bf69007
AM
15773+ * 0 -- Close the log. Currently a NOP.
15774+ * 1 -- Open the log. Currently a NOP.
15775+ * 2 -- Read from the log.
15776+ * 3 -- Read all messages remaining in the ring buffer.
15777+ * 4 -- Read and clear all messages remaining in the ring buffer
15778+ * 5 -- Clear ring buffer.
15779+ * 6 -- Disable printk's to console
15780+ * 7 -- Enable printk's to console
15781+ * 8 -- Set level of messages printed to console
15782+ * 9 -- Return number of unread characters in the log buffer
15783+ * 10 -- Return size of the log buffer
d337f35e 15784+ */
4bf69007
AM
15785+int vx_do_syslog(int type, char __user *buf, int len)
15786+{
15787+ int error = 0;
15788+ int do_clear = 0;
15789+ struct vx_info *vxi = current_vx_info();
15790+ struct _vx_syslog *log;
d337f35e 15791+
4bf69007
AM
15792+ if (!vxi)
15793+ return -EINVAL;
15794+ log = &vxi->cvirt.syslog;
15795+
15796+ switch (type) {
15797+ case 0: /* Close log */
15798+ case 1: /* Open log */
15799+ break;
15800+ case 2: /* Read from log */
15801+ error = wait_event_interruptible(log->log_wait,
15802+ (log->log_start - log->log_end));
15803+ if (error)
15804+ break;
15805+ spin_lock_irq(&log->logbuf_lock);
15806+ spin_unlock_irq(&log->logbuf_lock);
15807+ break;
15808+ case 4: /* Read/clear last kernel messages */
15809+ do_clear = 1;
15810+ /* fall through */
15811+ case 3: /* Read last kernel messages */
15812+ return 0;
d337f35e 15813+
4bf69007
AM
15814+ case 5: /* Clear ring buffer */
15815+ return 0;
d337f35e 15816+
4bf69007
AM
15817+ case 6: /* Disable logging to console */
15818+ case 7: /* Enable logging to console */
15819+ case 8: /* Set level of messages printed to console */
15820+ break;
d337f35e 15821+
4bf69007
AM
15822+ case 9: /* Number of chars in the log buffer */
15823+ return 0;
15824+ case 10: /* Size of the log buffer */
15825+ return 0;
15826+ default:
15827+ error = -EINVAL;
15828+ break;
15829+ }
15830+ return error;
1e8b8f9b 15831+}
d337f35e 15832+
4bf69007
AM
15833+
15834+/* virtual host info names */
15835+
15836+static char *vx_vhi_name(struct vx_info *vxi, int id)
d337f35e 15837+{
4bf69007
AM
15838+ struct nsproxy *nsproxy;
15839+ struct uts_namespace *uts;
d337f35e 15840+
4bf69007
AM
15841+ if (id == VHIN_CONTEXT)
15842+ return vxi->vx_name;
15843+
15844+ nsproxy = vxi->space[0].vx_nsproxy;
15845+ if (!nsproxy)
15846+ return NULL;
15847+
15848+ uts = nsproxy->uts_ns;
15849+ if (!uts)
15850+ return NULL;
15851+
15852+ switch (id) {
15853+ case VHIN_SYSNAME:
15854+ return uts->name.sysname;
15855+ case VHIN_NODENAME:
15856+ return uts->name.nodename;
15857+ case VHIN_RELEASE:
15858+ return uts->name.release;
15859+ case VHIN_VERSION:
15860+ return uts->name.version;
15861+ case VHIN_MACHINE:
15862+ return uts->name.machine;
15863+ case VHIN_DOMAINNAME:
15864+ return uts->name.domainname;
15865+ default:
15866+ return NULL;
d337f35e 15867+ }
4bf69007 15868+ return NULL;
d337f35e
JR
15869+}
15870+
4bf69007 15871+int vc_set_vhi_name(struct vx_info *vxi, void __user *data)
d337f35e 15872+{
4bf69007
AM
15873+ struct vcmd_vhi_name_v0 vc_data;
15874+ char *name;
d337f35e 15875+
4bf69007
AM
15876+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15877+ return -EFAULT;
d337f35e 15878+
4bf69007
AM
15879+ name = vx_vhi_name(vxi, vc_data.field);
15880+ if (!name)
15881+ return -EINVAL;
d337f35e 15882+
4bf69007
AM
15883+ memcpy(name, vc_data.name, 65);
15884+ return 0;
15885+}
d337f35e 15886+
4bf69007
AM
15887+int vc_get_vhi_name(struct vx_info *vxi, void __user *data)
15888+{
15889+ struct vcmd_vhi_name_v0 vc_data;
15890+ char *name;
d337f35e 15891+
4bf69007
AM
15892+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15893+ return -EFAULT;
d337f35e 15894+
4bf69007
AM
15895+ name = vx_vhi_name(vxi, vc_data.field);
15896+ if (!name)
15897+ return -EINVAL;
d337f35e 15898+
4bf69007
AM
15899+ memcpy(vc_data.name, name, 65);
15900+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15901+ return -EFAULT;
15902+ return 0;
15903+}
d337f35e 15904+
d337f35e 15905+
4bf69007
AM
15906+int vc_virt_stat(struct vx_info *vxi, void __user *data)
15907+{
15908+ struct vcmd_virt_stat_v0 vc_data;
15909+ struct _vx_cvirt *cvirt = &vxi->cvirt;
15910+ struct timespec uptime;
99a884b4 15911+
927ca606 15912+ ktime_get_ts(&uptime);
4bf69007
AM
15913+ set_normalized_timespec(&uptime,
15914+ uptime.tv_sec - cvirt->bias_uptime.tv_sec,
15915+ uptime.tv_nsec - cvirt->bias_uptime.tv_nsec);
d337f35e 15916+
4bf69007
AM
15917+ vc_data.offset = timespec_to_ns(&cvirt->bias_ts);
15918+ vc_data.uptime = timespec_to_ns(&uptime);
15919+ vc_data.nr_threads = atomic_read(&cvirt->nr_threads);
15920+ vc_data.nr_running = atomic_read(&cvirt->nr_running);
15921+ vc_data.nr_uninterruptible = atomic_read(&cvirt->nr_uninterruptible);
15922+ vc_data.nr_onhold = atomic_read(&cvirt->nr_onhold);
15923+ vc_data.nr_forks = atomic_read(&cvirt->total_forks);
15924+ vc_data.load[0] = cvirt->load[0];
15925+ vc_data.load[1] = cvirt->load[1];
15926+ vc_data.load[2] = cvirt->load[2];
15927+
15928+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15929+ return -EFAULT;
15930+ return 0;
d337f35e
JR
15931+}
15932+
15933+
4bf69007
AM
15934+#ifdef CONFIG_VSERVER_VTIME
15935+
15936+/* virtualized time base */
15937+
15938+void vx_adjust_timespec(struct timespec *ts)
d337f35e 15939+{
4bf69007 15940+ struct vx_info *vxi;
d337f35e 15941+
4bf69007
AM
15942+ if (!vx_flags(VXF_VIRT_TIME, 0))
15943+ return;
d337f35e 15944+
4bf69007
AM
15945+ vxi = current_vx_info();
15946+ ts->tv_sec += vxi->cvirt.bias_ts.tv_sec;
15947+ ts->tv_nsec += vxi->cvirt.bias_ts.tv_nsec;
d337f35e 15948+
4bf69007
AM
15949+ if (ts->tv_nsec >= NSEC_PER_SEC) {
15950+ ts->tv_sec++;
15951+ ts->tv_nsec -= NSEC_PER_SEC;
15952+ } else if (ts->tv_nsec < 0) {
15953+ ts->tv_sec--;
15954+ ts->tv_nsec += NSEC_PER_SEC;
d337f35e 15955+ }
d337f35e
JR
15956+}
15957+
4bf69007 15958+int vx_settimeofday(const struct timespec *ts)
99a884b4 15959+{
4bf69007
AM
15960+ struct timespec ats, delta;
15961+ struct vx_info *vxi;
99a884b4 15962+
4bf69007
AM
15963+ if (!vx_flags(VXF_VIRT_TIME, 0))
15964+ return do_settimeofday(ts);
99a884b4 15965+
4bf69007
AM
15966+ getnstimeofday(&ats);
15967+ delta = timespec_sub(*ts, ats);
99a884b4 15968+
4bf69007
AM
15969+ vxi = current_vx_info();
15970+ vxi->cvirt.bias_ts = timespec_add(vxi->cvirt.bias_ts, delta);
99a884b4
AM
15971+ return 0;
15972+}
d337f35e 15973+
4bf69007 15974+#endif
d337f35e 15975+
c2806d43
AM
15976diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cvirt_init.h linux-4.4/kernel/vserver/cvirt_init.h
15977--- linux-4.4/kernel/vserver/cvirt_init.h 1970-01-01 01:00:00.000000000 +0100
15978+++ linux-4.4/kernel/vserver/cvirt_init.h 2021-02-24 16:56:24.609490163 +0100
4bf69007 15979@@ -0,0 +1,70 @@
d337f35e 15980+
d337f35e 15981+
4bf69007 15982+extern uint64_t vx_idle_jiffies(void);
d337f35e 15983+
4bf69007
AM
15984+static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
15985+{
15986+ uint64_t idle_jiffies = vx_idle_jiffies();
15987+ uint64_t nsuptime;
d337f35e 15988+
927ca606 15989+ ktime_get_ts(&cvirt->bias_uptime);
4bf69007
AM
15990+ nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
15991+ * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
15992+ cvirt->bias_clock = nsec_to_clock_t(nsuptime);
15993+ cvirt->bias_ts.tv_sec = 0;
15994+ cvirt->bias_ts.tv_nsec = 0;
d337f35e 15995+
4bf69007
AM
15996+ jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
15997+ atomic_set(&cvirt->nr_threads, 0);
15998+ atomic_set(&cvirt->nr_running, 0);
15999+ atomic_set(&cvirt->nr_uninterruptible, 0);
16000+ atomic_set(&cvirt->nr_onhold, 0);
d337f35e 16001+
4bf69007
AM
16002+ spin_lock_init(&cvirt->load_lock);
16003+ cvirt->load_last = jiffies;
16004+ atomic_set(&cvirt->load_updates, 0);
16005+ cvirt->load[0] = 0;
16006+ cvirt->load[1] = 0;
16007+ cvirt->load[2] = 0;
16008+ atomic_set(&cvirt->total_forks, 0);
d337f35e 16009+
4bf69007
AM
16010+ spin_lock_init(&cvirt->syslog.logbuf_lock);
16011+ init_waitqueue_head(&cvirt->syslog.log_wait);
16012+ cvirt->syslog.log_start = 0;
16013+ cvirt->syslog.log_end = 0;
16014+ cvirt->syslog.con_start = 0;
16015+ cvirt->syslog.logged_chars = 0;
16016+}
16017+
16018+static inline
16019+void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
d337f35e 16020+{
4bf69007
AM
16021+ // cvirt_pc->cpustat = { 0 };
16022+}
d337f35e 16023+
4bf69007
AM
16024+static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
16025+{
16026+#ifdef CONFIG_VSERVER_WARN
16027+ int value;
16028+#endif
16029+ vxwprintk_xid((value = atomic_read(&cvirt->nr_threads)),
16030+ "!!! cvirt: %p[nr_threads] = %d on exit.",
16031+ cvirt, value);
16032+ vxwprintk_xid((value = atomic_read(&cvirt->nr_running)),
16033+ "!!! cvirt: %p[nr_running] = %d on exit.",
16034+ cvirt, value);
16035+ vxwprintk_xid((value = atomic_read(&cvirt->nr_uninterruptible)),
16036+ "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
16037+ cvirt, value);
16038+ vxwprintk_xid((value = atomic_read(&cvirt->nr_onhold)),
16039+ "!!! cvirt: %p[nr_onhold] = %d on exit.",
16040+ cvirt, value);
16041+ return;
16042+}
d337f35e 16043+
4bf69007
AM
16044+static inline
16045+void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
16046+{
16047+ return;
16048+}
d337f35e 16049+
c2806d43
AM
16050diff -urNp -x '*.orig' linux-4.4/kernel/vserver/cvirt_proc.h linux-4.4/kernel/vserver/cvirt_proc.h
16051--- linux-4.4/kernel/vserver/cvirt_proc.h 1970-01-01 01:00:00.000000000 +0100
16052+++ linux-4.4/kernel/vserver/cvirt_proc.h 2021-02-24 16:56:24.609490163 +0100
4bf69007
AM
16053@@ -0,0 +1,123 @@
16054+#ifndef _VX_CVIRT_PROC_H
16055+#define _VX_CVIRT_PROC_H
d337f35e 16056+
4bf69007
AM
16057+#include <linux/nsproxy.h>
16058+#include <linux/mnt_namespace.h>
16059+#include <linux/ipc_namespace.h>
16060+#include <linux/utsname.h>
16061+#include <linux/ipc.h>
d337f35e 16062+
4bf69007 16063+extern int vx_info_mnt_namespace(struct mnt_namespace *, char *);
d337f35e 16064+
4bf69007
AM
16065+static inline
16066+int vx_info_proc_nsproxy(struct nsproxy *nsproxy, char *buffer)
16067+{
16068+ struct mnt_namespace *ns;
16069+ struct uts_namespace *uts;
16070+ struct ipc_namespace *ipc;
16071+ int length = 0;
d337f35e 16072+
4bf69007
AM
16073+ if (!nsproxy)
16074+ goto out;
d337f35e 16075+
4bf69007
AM
16076+ length += sprintf(buffer + length,
16077+ "NSProxy:\t%p [%p,%p,%p]\n",
16078+ nsproxy, nsproxy->mnt_ns,
16079+ nsproxy->uts_ns, nsproxy->ipc_ns);
d337f35e 16080+
4bf69007
AM
16081+ ns = nsproxy->mnt_ns;
16082+ if (!ns)
16083+ goto skip_ns;
d337f35e 16084+
4bf69007 16085+ length += vx_info_mnt_namespace(ns, buffer + length);
d337f35e 16086+
4bf69007 16087+skip_ns:
d337f35e 16088+
4bf69007
AM
16089+ uts = nsproxy->uts_ns;
16090+ if (!uts)
16091+ goto skip_uts;
d337f35e 16092+
4bf69007
AM
16093+ length += sprintf(buffer + length,
16094+ "SysName:\t%.*s\n"
16095+ "NodeName:\t%.*s\n"
16096+ "Release:\t%.*s\n"
16097+ "Version:\t%.*s\n"
16098+ "Machine:\t%.*s\n"
16099+ "DomainName:\t%.*s\n",
16100+ __NEW_UTS_LEN, uts->name.sysname,
16101+ __NEW_UTS_LEN, uts->name.nodename,
16102+ __NEW_UTS_LEN, uts->name.release,
16103+ __NEW_UTS_LEN, uts->name.version,
16104+ __NEW_UTS_LEN, uts->name.machine,
16105+ __NEW_UTS_LEN, uts->name.domainname);
16106+skip_uts:
d337f35e 16107+
4bf69007
AM
16108+ ipc = nsproxy->ipc_ns;
16109+ if (!ipc)
16110+ goto skip_ipc;
d337f35e 16111+
4bf69007
AM
16112+ length += sprintf(buffer + length,
16113+ "SEMS:\t\t%d %d %d %d %d\n"
16114+ "MSG:\t\t%d %d %d\n"
b00e13aa 16115+ "SHM:\t\t%lu %lu %d %ld\n",
4bf69007
AM
16116+ ipc->sem_ctls[0], ipc->sem_ctls[1],
16117+ ipc->sem_ctls[2], ipc->sem_ctls[3],
16118+ ipc->used_sems,
16119+ ipc->msg_ctlmax, ipc->msg_ctlmnb, ipc->msg_ctlmni,
16120+ (unsigned long)ipc->shm_ctlmax,
16121+ (unsigned long)ipc->shm_ctlall,
16122+ ipc->shm_ctlmni, ipc->shm_tot);
16123+skip_ipc:
16124+out:
16125+ return length;
16126+}
d337f35e
JR
16127+
16128+
4bf69007 16129+#include <linux/sched.h>
d337f35e 16130+
4bf69007
AM
16131+#define LOAD_INT(x) ((x) >> FSHIFT)
16132+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1 - 1)) * 100)
d337f35e 16133+
4bf69007
AM
16134+static inline
16135+int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
d337f35e 16136+{
4bf69007
AM
16137+ int length = 0;
16138+ int a, b, c;
d337f35e 16139+
4bf69007
AM
16140+ length += sprintf(buffer + length,
16141+ "BiasUptime:\t%lu.%02lu\n",
16142+ (unsigned long)cvirt->bias_uptime.tv_sec,
16143+ (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
d337f35e 16144+
4bf69007
AM
16145+ a = cvirt->load[0] + (FIXED_1 / 200);
16146+ b = cvirt->load[1] + (FIXED_1 / 200);
16147+ c = cvirt->load[2] + (FIXED_1 / 200);
16148+ length += sprintf(buffer + length,
16149+ "nr_threads:\t%d\n"
16150+ "nr_running:\t%d\n"
16151+ "nr_unintr:\t%d\n"
16152+ "nr_onhold:\t%d\n"
16153+ "load_updates:\t%d\n"
16154+ "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
16155+ "total_forks:\t%d\n",
16156+ atomic_read(&cvirt->nr_threads),
16157+ atomic_read(&cvirt->nr_running),
16158+ atomic_read(&cvirt->nr_uninterruptible),
16159+ atomic_read(&cvirt->nr_onhold),
16160+ atomic_read(&cvirt->load_updates),
16161+ LOAD_INT(a), LOAD_FRAC(a),
16162+ LOAD_INT(b), LOAD_FRAC(b),
16163+ LOAD_INT(c), LOAD_FRAC(c),
16164+ atomic_read(&cvirt->total_forks));
16165+ return length;
d337f35e
JR
16166+}
16167+
4bf69007
AM
16168+static inline
16169+int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
16170+ char *buffer, int cpu)
16171+{
16172+ int length = 0;
16173+ return length;
16174+}
d337f35e 16175+
4bf69007 16176+#endif /* _VX_CVIRT_PROC_H */
c2806d43
AM
16177diff -urNp -x '*.orig' linux-4.4/kernel/vserver/debug.c linux-4.4/kernel/vserver/debug.c
16178--- linux-4.4/kernel/vserver/debug.c 1970-01-01 01:00:00.000000000 +0100
16179+++ linux-4.4/kernel/vserver/debug.c 2021-02-24 16:56:24.609490163 +0100
4bf69007
AM
16180@@ -0,0 +1,32 @@
16181+/*
16182+ * kernel/vserver/debug.c
16183+ *
c2806d43 16184+ * Copyright (C) 2005-2007 Herbert P?tzl
4bf69007
AM
16185+ *
16186+ * V0.01 vx_info dump support
16187+ *
16188+ */
d337f35e 16189+
4bf69007 16190+#include <linux/module.h>
d337f35e 16191+
4bf69007 16192+#include <linux/vserver/context.h>
d337f35e 16193+
d337f35e 16194+
4bf69007 16195+void dump_vx_info(struct vx_info *vxi, int level)
d337f35e 16196+{
4bf69007
AM
16197+ printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
16198+ atomic_read(&vxi->vx_usecnt),
16199+ atomic_read(&vxi->vx_tasks),
16200+ vxi->vx_state);
16201+ if (level > 0) {
16202+ __dump_vx_limit(&vxi->limit);
16203+ __dump_vx_sched(&vxi->sched);
16204+ __dump_vx_cvirt(&vxi->cvirt);
16205+ __dump_vx_cacct(&vxi->cacct);
16206+ }
16207+ printk("---\n");
16208+}
d337f35e 16209+
d337f35e 16210+
4bf69007 16211+EXPORT_SYMBOL_GPL(dump_vx_info);
d337f35e 16212+
c2806d43
AM
16213diff -urNp -x '*.orig' linux-4.4/kernel/vserver/device.c linux-4.4/kernel/vserver/device.c
16214--- linux-4.4/kernel/vserver/device.c 1970-01-01 01:00:00.000000000 +0100
16215+++ linux-4.4/kernel/vserver/device.c 2021-02-24 16:56:24.609490163 +0100
4bf69007
AM
16216@@ -0,0 +1,443 @@
16217+/*
16218+ * linux/kernel/vserver/device.c
16219+ *
16220+ * Linux-VServer: Device Support
16221+ *
c2806d43 16222+ * Copyright (C) 2006 Herbert P?tzl
4bf69007
AM
16223+ * Copyright (C) 2007 Daniel Hokka Zakrisson
16224+ *
16225+ * V0.01 device mapping basics
16226+ * V0.02 added defaults
16227+ *
16228+ */
d337f35e 16229+
4bf69007
AM
16230+#include <linux/slab.h>
16231+#include <linux/rcupdate.h>
16232+#include <linux/fs.h>
16233+#include <linux/namei.h>
16234+#include <linux/hash.h>
d337f35e 16235+
4bf69007
AM
16236+#include <asm/errno.h>
16237+#include <asm/uaccess.h>
16238+#include <linux/vserver/base.h>
16239+#include <linux/vserver/debug.h>
16240+#include <linux/vserver/context.h>
16241+#include <linux/vserver/device.h>
16242+#include <linux/vserver/device_cmd.h>
d337f35e 16243+
d337f35e 16244+
4bf69007 16245+#define DMAP_HASH_BITS 4
d337f35e 16246+
d337f35e 16247+
4bf69007
AM
16248+struct vs_mapping {
16249+ union {
16250+ struct hlist_node hlist;
16251+ struct list_head list;
16252+ } u;
16253+#define dm_hlist u.hlist
16254+#define dm_list u.list
61333608 16255+ vxid_t xid;
4bf69007
AM
16256+ dev_t device;
16257+ struct vx_dmap_target target;
16258+};
d337f35e 16259+
d337f35e 16260+
4bf69007 16261+static struct hlist_head dmap_main_hash[1 << DMAP_HASH_BITS];
d337f35e 16262+
4bf69007 16263+static DEFINE_SPINLOCK(dmap_main_hash_lock);
d337f35e 16264+
4bf69007
AM
16265+static struct vx_dmap_target dmap_defaults[2] = {
16266+ { .flags = DATTR_OPEN },
16267+ { .flags = DATTR_OPEN },
16268+};
d337f35e
JR
16269+
16270+
4bf69007 16271+struct kmem_cache *dmap_cachep __read_mostly;
d337f35e 16272+
4bf69007
AM
16273+int __init dmap_cache_init(void)
16274+{
16275+ dmap_cachep = kmem_cache_create("dmap_cache",
16276+ sizeof(struct vs_mapping), 0,
16277+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
d337f35e
JR
16278+ return 0;
16279+}
16280+
4bf69007 16281+__initcall(dmap_cache_init);
d337f35e 16282+
4bf69007
AM
16283+
16284+static inline unsigned int __hashval(dev_t dev, int bits)
d337f35e 16285+{
4bf69007
AM
16286+ return hash_long((unsigned long)dev, bits);
16287+}
d337f35e 16288+
d337f35e 16289+
4bf69007
AM
16290+/* __hash_mapping()
16291+ * add the mapping to the hash table
16292+ */
16293+static inline void __hash_mapping(struct vx_info *vxi, struct vs_mapping *vdm)
16294+{
16295+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16296+ struct hlist_head *head, *hash = dmap_main_hash;
16297+ int device = vdm->device;
d337f35e 16298+
4bf69007
AM
16299+ spin_lock(hash_lock);
16300+ vxdprintk(VXD_CBIT(misc, 8), "__hash_mapping: %p[#%d] %08x:%08x",
16301+ vxi, vxi ? vxi->vx_id : 0, device, vdm->target.target);
d337f35e 16302+
4bf69007
AM
16303+ head = &hash[__hashval(device, DMAP_HASH_BITS)];
16304+ hlist_add_head(&vdm->dm_hlist, head);
16305+ spin_unlock(hash_lock);
16306+}
16307+
16308+
16309+static inline int __mode_to_default(umode_t mode)
16310+{
16311+ switch (mode) {
16312+ case S_IFBLK:
16313+ return 0;
16314+ case S_IFCHR:
16315+ return 1;
16316+ default:
16317+ BUG();
d337f35e 16318+ }
d337f35e
JR
16319+}
16320+
4bf69007
AM
16321+
16322+/* __set_default()
16323+ * set a default
16324+ */
16325+static inline void __set_default(struct vx_info *vxi, umode_t mode,
16326+ struct vx_dmap_target *vdmt)
d337f35e 16327+{
4bf69007
AM
16328+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16329+ spin_lock(hash_lock);
d337f35e 16330+
4bf69007
AM
16331+ if (vxi)
16332+ vxi->dmap.targets[__mode_to_default(mode)] = *vdmt;
16333+ else
16334+ dmap_defaults[__mode_to_default(mode)] = *vdmt;
d337f35e 16335+
d337f35e 16336+
4bf69007 16337+ spin_unlock(hash_lock);
d337f35e 16338+
4bf69007
AM
16339+ vxdprintk(VXD_CBIT(misc, 8), "__set_default: %p[#%u] %08x %04x",
16340+ vxi, vxi ? vxi->vx_id : 0, vdmt->target, vdmt->flags);
d337f35e
JR
16341+}
16342+
d337f35e 16343+
4bf69007
AM
16344+/* __remove_default()
16345+ * remove a default
16346+ */
16347+static inline int __remove_default(struct vx_info *vxi, umode_t mode)
d337f35e 16348+{
4bf69007
AM
16349+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16350+ spin_lock(hash_lock);
d337f35e 16351+
4bf69007
AM
16352+ if (vxi)
16353+ vxi->dmap.targets[__mode_to_default(mode)].flags = 0;
16354+ else /* remove == reset */
16355+ dmap_defaults[__mode_to_default(mode)].flags = DATTR_OPEN | mode;
d337f35e 16356+
4bf69007
AM
16357+ spin_unlock(hash_lock);
16358+ return 0;
d337f35e
JR
16359+}
16360+
d337f35e 16361+
4bf69007
AM
16362+/* __find_mapping()
16363+ * find a mapping in the hash table
16364+ *
16365+ * caller must hold hash_lock
16366+ */
61333608 16367+static inline int __find_mapping(vxid_t xid, dev_t device, umode_t mode,
4bf69007
AM
16368+ struct vs_mapping **local, struct vs_mapping **global)
16369+{
16370+ struct hlist_head *hash = dmap_main_hash;
16371+ struct hlist_head *head = &hash[__hashval(device, DMAP_HASH_BITS)];
16372+ struct hlist_node *pos;
16373+ struct vs_mapping *vdm;
d337f35e 16374+
4bf69007
AM
16375+ *local = NULL;
16376+ if (global)
16377+ *global = NULL;
d337f35e 16378+
4bf69007
AM
16379+ hlist_for_each(pos, head) {
16380+ vdm = hlist_entry(pos, struct vs_mapping, dm_hlist);
d337f35e 16381+
4bf69007
AM
16382+ if ((vdm->device == device) &&
16383+ !((vdm->target.flags ^ mode) & S_IFMT)) {
16384+ if (vdm->xid == xid) {
16385+ *local = vdm;
16386+ return 1;
16387+ } else if (global && vdm->xid == 0)
16388+ *global = vdm;
2380c486
JR
16389+ }
16390+ }
16391+
4bf69007
AM
16392+ if (global && *global)
16393+ return 0;
16394+ else
16395+ return -ENOENT;
2380c486
JR
16396+}
16397+
16398+
4bf69007
AM
16399+/* __lookup_mapping()
16400+ * find a mapping and store the result in target and flags
16401+ */
16402+static inline int __lookup_mapping(struct vx_info *vxi,
16403+ dev_t device, dev_t *target, int *flags, umode_t mode)
2380c486 16404+{
4bf69007
AM
16405+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16406+ struct vs_mapping *vdm, *global;
16407+ struct vx_dmap_target *vdmt;
2380c486 16408+ int ret = 0;
61333608 16409+ vxid_t xid = vxi->vx_id;
4bf69007 16410+ int index;
2380c486 16411+
4bf69007
AM
16412+ spin_lock(hash_lock);
16413+ if (__find_mapping(xid, device, mode, &vdm, &global) > 0) {
2380c486 16414+ ret = 1;
4bf69007
AM
16415+ vdmt = &vdm->target;
16416+ goto found;
16417+ }
2380c486 16418+
4bf69007
AM
16419+ index = __mode_to_default(mode);
16420+ if (vxi && vxi->dmap.targets[index].flags) {
16421+ ret = 2;
16422+ vdmt = &vxi->dmap.targets[index];
16423+ } else if (global) {
16424+ ret = 3;
16425+ vdmt = &global->target;
16426+ goto found;
16427+ } else {
16428+ ret = 4;
16429+ vdmt = &dmap_defaults[index];
d337f35e 16430+ }
2380c486 16431+
4bf69007
AM
16432+found:
16433+ if (target && (vdmt->flags & DATTR_REMAP))
16434+ *target = vdmt->target;
16435+ else if (target)
16436+ *target = device;
16437+ if (flags)
16438+ *flags = vdmt->flags;
16439+
16440+ spin_unlock(hash_lock);
2380c486
JR
16441+
16442+ return ret;
d337f35e
JR
16443+}
16444+
16445+
4bf69007
AM
16446+/* __remove_mapping()
16447+ * remove a mapping from the hash table
16448+ */
16449+static inline int __remove_mapping(struct vx_info *vxi, dev_t device,
16450+ umode_t mode)
d337f35e 16451+{
4bf69007
AM
16452+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16453+ struct vs_mapping *vdm = NULL;
d337f35e
JR
16454+ int ret = 0;
16455+
4bf69007
AM
16456+ spin_lock(hash_lock);
16457+
16458+ ret = __find_mapping((vxi ? vxi->vx_id : 0), device, mode, &vdm,
16459+ NULL);
16460+ vxdprintk(VXD_CBIT(misc, 8), "__remove_mapping: %p[#%d] %08x %04x",
16461+ vxi, vxi ? vxi->vx_id : 0, device, mode);
16462+ if (ret < 0)
2380c486 16463+ goto out;
4bf69007 16464+ hlist_del(&vdm->dm_hlist);
2380c486 16465+
2380c486 16466+out:
4bf69007
AM
16467+ spin_unlock(hash_lock);
16468+ if (vdm)
16469+ kmem_cache_free(dmap_cachep, vdm);
2380c486
JR
16470+ return ret;
16471+}
16472+
16473+
2380c486 16474+
4bf69007
AM
16475+int vs_map_device(struct vx_info *vxi,
16476+ dev_t device, dev_t *target, umode_t mode)
2380c486 16477+{
4bf69007 16478+ int ret, flags = DATTR_MASK;
2380c486 16479+
4bf69007
AM
16480+ if (!vxi) {
16481+ if (target)
16482+ *target = device;
2380c486 16483+ goto out;
2380c486 16484+ }
4bf69007
AM
16485+ ret = __lookup_mapping(vxi, device, target, &flags, mode);
16486+ vxdprintk(VXD_CBIT(misc, 8), "vs_map_device: %08x target: %08x flags: %04x mode: %04x mapped=%d",
16487+ device, target ? *target : 0, flags, mode, ret);
2380c486 16488+out:
4bf69007 16489+ return (flags & DATTR_MASK);
2380c486
JR
16490+}
16491+
2380c486 16492+
4bf69007
AM
16493+
16494+static int do_set_mapping(struct vx_info *vxi,
16495+ dev_t device, dev_t target, int flags, umode_t mode)
2380c486 16496+{
4bf69007
AM
16497+ if (device) {
16498+ struct vs_mapping *new;
2380c486 16499+
4bf69007
AM
16500+ new = kmem_cache_alloc(dmap_cachep, GFP_KERNEL);
16501+ if (!new)
16502+ return -ENOMEM;
16503+
16504+ INIT_HLIST_NODE(&new->dm_hlist);
16505+ new->device = device;
16506+ new->target.target = target;
16507+ new->target.flags = flags | mode;
16508+ new->xid = (vxi ? vxi->vx_id : 0);
16509+
16510+ vxdprintk(VXD_CBIT(misc, 8), "do_set_mapping: %08x target: %08x flags: %04x", device, target, flags);
16511+ __hash_mapping(vxi, new);
16512+ } else {
16513+ struct vx_dmap_target new = {
16514+ .target = target,
16515+ .flags = flags | mode,
16516+ };
16517+ __set_default(vxi, mode, &new);
16518+ }
16519+ return 0;
2380c486
JR
16520+}
16521+
4bf69007
AM
16522+
16523+static int do_unset_mapping(struct vx_info *vxi,
16524+ dev_t device, dev_t target, int flags, umode_t mode)
2380c486 16525+{
4bf69007 16526+ int ret = -EINVAL;
763640ca 16527+
4bf69007
AM
16528+ if (device) {
16529+ ret = __remove_mapping(vxi, device, mode);
16530+ if (ret < 0)
16531+ goto out;
16532+ } else {
16533+ ret = __remove_default(vxi, mode);
16534+ if (ret < 0)
16535+ goto out;
16536+ }
2380c486 16537+
4bf69007
AM
16538+out:
16539+ return ret;
16540+}
2380c486 16541+
2380c486 16542+
4bf69007
AM
16543+static inline int __user_device(const char __user *name, dev_t *dev,
16544+ umode_t *mode)
16545+{
927ca606 16546+ struct path path;
4bf69007 16547+ int ret;
2380c486 16548+
4bf69007
AM
16549+ if (!name) {
16550+ *dev = 0;
16551+ return 0;
16552+ }
927ca606 16553+ ret = user_lpath(name, &path);
4bf69007
AM
16554+ if (ret)
16555+ return ret;
927ca606
AM
16556+ if (path.dentry->d_inode) {
16557+ *dev = path.dentry->d_inode->i_rdev;
16558+ *mode = path.dentry->d_inode->i_mode;
4bf69007 16559+ }
927ca606 16560+ path_put(&path);
4bf69007
AM
16561+ return 0;
16562+}
2380c486 16563+
4bf69007
AM
16564+static inline int __mapping_mode(dev_t device, dev_t target,
16565+ umode_t device_mode, umode_t target_mode, umode_t *mode)
16566+{
16567+ if (device)
16568+ *mode = device_mode & S_IFMT;
16569+ else if (target)
16570+ *mode = target_mode & S_IFMT;
16571+ else
16572+ return -EINVAL;
2380c486 16573+
4bf69007
AM
16574+ /* if both given, device and target mode have to match */
16575+ if (device && target &&
16576+ ((device_mode ^ target_mode) & S_IFMT))
16577+ return -EINVAL;
16578+ return 0;
16579+}
d337f35e 16580+
d337f35e 16581+
4bf69007
AM
16582+static inline int do_mapping(struct vx_info *vxi, const char __user *device_path,
16583+ const char __user *target_path, int flags, int set)
16584+{
16585+ dev_t device = ~0, target = ~0;
16586+ umode_t device_mode = 0, target_mode = 0, mode;
16587+ int ret;
2380c486 16588+
4bf69007
AM
16589+ ret = __user_device(device_path, &device, &device_mode);
16590+ if (ret)
16591+ return ret;
16592+ ret = __user_device(target_path, &target, &target_mode);
16593+ if (ret)
16594+ return ret;
2380c486 16595+
4bf69007
AM
16596+ ret = __mapping_mode(device, target,
16597+ device_mode, target_mode, &mode);
16598+ if (ret)
16599+ return ret;
2380c486 16600+
4bf69007
AM
16601+ if (set)
16602+ return do_set_mapping(vxi, device, target,
16603+ flags, mode);
16604+ else
16605+ return do_unset_mapping(vxi, device, target,
16606+ flags, mode);
d337f35e
JR
16607+}
16608+
d337f35e 16609+
4bf69007
AM
16610+int vc_set_mapping(struct vx_info *vxi, void __user *data)
16611+{
16612+ struct vcmd_set_mapping_v0 vc_data;
d337f35e 16613+
4bf69007
AM
16614+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16615+ return -EFAULT;
d337f35e 16616+
4bf69007
AM
16617+ return do_mapping(vxi, vc_data.device, vc_data.target,
16618+ vc_data.flags, 1);
16619+}
d337f35e 16620+
4bf69007 16621+int vc_unset_mapping(struct vx_info *vxi, void __user *data)
d337f35e 16622+{
4bf69007 16623+ struct vcmd_set_mapping_v0 vc_data;
d337f35e 16624+
4bf69007
AM
16625+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16626+ return -EFAULT;
16627+
16628+ return do_mapping(vxi, vc_data.device, vc_data.target,
16629+ vc_data.flags, 0);
d337f35e
JR
16630+}
16631+
16632+
4bf69007
AM
16633+#ifdef CONFIG_COMPAT
16634+
16635+int vc_set_mapping_x32(struct vx_info *vxi, void __user *data)
d337f35e 16636+{
4bf69007 16637+ struct vcmd_set_mapping_v0_x32 vc_data;
d337f35e 16638+
4bf69007
AM
16639+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16640+ return -EFAULT;
16641+
16642+ return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
16643+ compat_ptr(vc_data.target_ptr), vc_data.flags, 1);
d337f35e
JR
16644+}
16645+
4bf69007
AM
16646+int vc_unset_mapping_x32(struct vx_info *vxi, void __user *data)
16647+{
16648+ struct vcmd_set_mapping_v0_x32 vc_data;
16649+
16650+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16651+ return -EFAULT;
d337f35e 16652+
4bf69007
AM
16653+ return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
16654+ compat_ptr(vc_data.target_ptr), vc_data.flags, 0);
16655+}
d337f35e 16656+
4bf69007 16657+#endif /* CONFIG_COMPAT */
d337f35e 16658+
4bf69007 16659+
c2806d43
AM
16660diff -urNp -x '*.orig' linux-4.4/kernel/vserver/dlimit.c linux-4.4/kernel/vserver/dlimit.c
16661--- linux-4.4/kernel/vserver/dlimit.c 1970-01-01 01:00:00.000000000 +0100
16662+++ linux-4.4/kernel/vserver/dlimit.c 2021-02-24 16:56:24.609490163 +0100
b00e13aa 16663@@ -0,0 +1,528 @@
d337f35e 16664+/*
4bf69007 16665+ * linux/kernel/vserver/dlimit.c
d337f35e 16666+ *
4bf69007 16667+ * Virtual Server: Context Disk Limits
d337f35e 16668+ *
c2806d43 16669+ * Copyright (C) 2004-2009 Herbert P?tzl
d337f35e 16670+ *
4bf69007
AM
16671+ * V0.01 initial version
16672+ * V0.02 compat32 splitup
16673+ * V0.03 extended interface
d337f35e
JR
16674+ *
16675+ */
16676+
4bf69007
AM
16677+#include <linux/statfs.h>
16678+#include <linux/sched.h>
2380c486 16679+#include <linux/namei.h>
d337f35e 16680+#include <linux/vs_tag.h>
4bf69007
AM
16681+#include <linux/vs_dlimit.h>
16682+#include <linux/vserver/dlimit_cmd.h>
16683+#include <linux/slab.h>
16684+// #include <linux/gfp.h>
d337f35e 16685+
d337f35e
JR
16686+#include <asm/uaccess.h>
16687+
4bf69007 16688+/* __alloc_dl_info()
d337f35e 16689+
4bf69007
AM
16690+ * allocate an initialized dl_info struct
16691+ * doesn't make it visible (hash) */
d337f35e 16692+
61333608 16693+static struct dl_info *__alloc_dl_info(struct super_block *sb, vtag_t tag)
4bf69007
AM
16694+{
16695+ struct dl_info *new = NULL;
d337f35e 16696+
4bf69007
AM
16697+ vxdprintk(VXD_CBIT(dlim, 5),
16698+ "alloc_dl_info(%p,%d)*", sb, tag);
d337f35e 16699+
4bf69007
AM
16700+ /* would this benefit from a slab cache? */
16701+ new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
16702+ if (!new)
16703+ return 0;
d337f35e 16704+
4bf69007
AM
16705+ memset(new, 0, sizeof(struct dl_info));
16706+ new->dl_tag = tag;
16707+ new->dl_sb = sb;
16708+ // INIT_RCU_HEAD(&new->dl_rcu);
16709+ INIT_HLIST_NODE(&new->dl_hlist);
16710+ spin_lock_init(&new->dl_lock);
16711+ atomic_set(&new->dl_refcnt, 0);
16712+ atomic_set(&new->dl_usecnt, 0);
d337f35e 16713+
4bf69007 16714+ /* rest of init goes here */
d337f35e 16715+
4bf69007
AM
16716+ vxdprintk(VXD_CBIT(dlim, 4),
16717+ "alloc_dl_info(%p,%d) = %p", sb, tag, new);
16718+ return new;
16719+}
d4263eb0 16720+
4bf69007 16721+/* __dealloc_dl_info()
d337f35e 16722+
4bf69007 16723+ * final disposal of dl_info */
d337f35e 16724+
4bf69007 16725+static void __dealloc_dl_info(struct dl_info *dli)
adc1caaa 16726+{
4bf69007
AM
16727+ vxdprintk(VXD_CBIT(dlim, 4),
16728+ "dealloc_dl_info(%p)", dli);
2380c486 16729+
4bf69007
AM
16730+ dli->dl_hlist.next = LIST_POISON1;
16731+ dli->dl_tag = -1;
16732+ dli->dl_sb = 0;
2380c486 16733+
4bf69007
AM
16734+ BUG_ON(atomic_read(&dli->dl_usecnt));
16735+ BUG_ON(atomic_read(&dli->dl_refcnt));
2380c486 16736+
4bf69007 16737+ kfree(dli);
adc1caaa 16738+}
2380c486 16739+
2380c486 16740+
4bf69007 16741+/* hash table for dl_info hash */
2380c486 16742+
4bf69007 16743+#define DL_HASH_SIZE 13
2380c486 16744+
4bf69007 16745+struct hlist_head dl_info_hash[DL_HASH_SIZE];
2380c486 16746+
4bf69007 16747+static DEFINE_SPINLOCK(dl_info_hash_lock);
2380c486 16748+
d33d7b00 16749+
61333608 16750+static inline unsigned int __hashval(struct super_block *sb, vtag_t tag)
adc1caaa 16751+{
4bf69007
AM
16752+ return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
16753+}
2380c486 16754+
2380c486 16755+
2380c486 16756+
4bf69007 16757+/* __hash_dl_info()
2380c486 16758+
4bf69007
AM
16759+ * add the dli to the global hash table
16760+ * requires the hash_lock to be held */
2380c486 16761+
4bf69007
AM
16762+static inline void __hash_dl_info(struct dl_info *dli)
16763+{
16764+ struct hlist_head *head;
d337f35e 16765+
4bf69007
AM
16766+ vxdprintk(VXD_CBIT(dlim, 6),
16767+ "__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
16768+ get_dl_info(dli);
16769+ head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
16770+ hlist_add_head_rcu(&dli->dl_hlist, head);
16771+}
d337f35e 16772+
4bf69007 16773+/* __unhash_dl_info()
3bac966d 16774+
4bf69007
AM
16775+ * remove the dli from the global hash table
16776+ * requires the hash_lock to be held */
3bac966d 16777+
4bf69007
AM
16778+static inline void __unhash_dl_info(struct dl_info *dli)
16779+{
16780+ vxdprintk(VXD_CBIT(dlim, 6),
16781+ "__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
16782+ hlist_del_rcu(&dli->dl_hlist);
16783+ put_dl_info(dli);
16784+}
3bac966d 16785+
3bac966d 16786+
4bf69007 16787+/* __lookup_dl_info()
3bac966d 16788+
4bf69007
AM
16789+ * requires the rcu_read_lock()
16790+ * doesn't increment the dl_refcnt */
3bac966d 16791+
61333608 16792+static inline struct dl_info *__lookup_dl_info(struct super_block *sb, vtag_t tag)
4bf69007
AM
16793+{
16794+ struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
4bf69007 16795+ struct dl_info *dli;
3bac966d 16796+
b00e13aa
AM
16797+ hlist_for_each_entry_rcu(dli, head, dl_hlist) {
16798+ if (dli->dl_tag == tag && dli->dl_sb == sb)
4bf69007 16799+ return dli;
d33d7b00 16800+ }
4bf69007
AM
16801+ return NULL;
16802+}
3bac966d 16803+
3bac966d 16804+
61333608 16805+struct dl_info *locate_dl_info(struct super_block *sb, vtag_t tag)
4bf69007
AM
16806+{
16807+ struct dl_info *dli;
16808+
16809+ rcu_read_lock();
16810+ dli = get_dl_info(__lookup_dl_info(sb, tag));
16811+ vxdprintk(VXD_CBIT(dlim, 7),
16812+ "locate_dl_info(%p,#%d) = %p", sb, tag, dli);
16813+ rcu_read_unlock();
16814+ return dli;
d33d7b00 16815+}
3bac966d 16816+
4bf69007 16817+void rcu_free_dl_info(struct rcu_head *head)
d33d7b00 16818+{
4bf69007
AM
16819+ struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
16820+ int usecnt, refcnt;
3bac966d 16821+
4bf69007 16822+ BUG_ON(!dli || !head);
3bac966d 16823+
4bf69007
AM
16824+ usecnt = atomic_read(&dli->dl_usecnt);
16825+ BUG_ON(usecnt < 0);
3bac966d 16826+
4bf69007
AM
16827+ refcnt = atomic_read(&dli->dl_refcnt);
16828+ BUG_ON(refcnt < 0);
16829+
16830+ vxdprintk(VXD_CBIT(dlim, 3),
16831+ "rcu_free_dl_info(%p)", dli);
16832+ if (!usecnt)
16833+ __dealloc_dl_info(dli);
16834+ else
16835+ printk("!!! rcu didn't free\n");
d33d7b00 16836+}
3bac966d 16837+
3bac966d 16838+
4bf69007
AM
16839+
16840+
16841+static int do_addrem_dlimit(uint32_t id, const char __user *name,
16842+ uint32_t flags, int add)
d33d7b00
AM
16843+{
16844+ struct path path;
d33d7b00 16845+ int ret;
3bac966d 16846+
4bf69007 16847+ ret = user_lpath(name, &path);
d33d7b00 16848+ if (!ret) {
4bf69007
AM
16849+ struct super_block *sb;
16850+ struct dl_info *dli;
16851+
16852+ ret = -EINVAL;
16853+ if (!path.dentry->d_inode)
16854+ goto out_release;
16855+ if (!(sb = path.dentry->d_inode->i_sb))
16856+ goto out_release;
16857+
16858+ if (add) {
16859+ dli = __alloc_dl_info(sb, id);
16860+ spin_lock(&dl_info_hash_lock);
16861+
16862+ ret = -EEXIST;
16863+ if (__lookup_dl_info(sb, id))
16864+ goto out_unlock;
16865+ __hash_dl_info(dli);
16866+ dli = NULL;
16867+ } else {
16868+ spin_lock(&dl_info_hash_lock);
16869+ dli = __lookup_dl_info(sb, id);
16870+
16871+ ret = -ESRCH;
16872+ if (!dli)
16873+ goto out_unlock;
16874+ __unhash_dl_info(dli);
16875+ }
16876+ ret = 0;
16877+ out_unlock:
16878+ spin_unlock(&dl_info_hash_lock);
16879+ if (add && dli)
16880+ __dealloc_dl_info(dli);
16881+ out_release:
d33d7b00
AM
16882+ path_put(&path);
16883+ }
d33d7b00
AM
16884+ return ret;
16885+}
3bac966d 16886+
4bf69007 16887+int vc_add_dlimit(uint32_t id, void __user *data)
d33d7b00 16888+{
4bf69007 16889+ struct vcmd_ctx_dlimit_base_v0 vc_data;
3bac966d 16890+
d33d7b00
AM
16891+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16892+ return -EFAULT;
3bac966d 16893+
4bf69007
AM
16894+ return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
16895+}
3bac966d 16896+
4bf69007
AM
16897+int vc_rem_dlimit(uint32_t id, void __user *data)
16898+{
16899+ struct vcmd_ctx_dlimit_base_v0 vc_data;
3bac966d 16900+
4bf69007 16901+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d33d7b00 16902+ return -EFAULT;
4bf69007
AM
16903+
16904+ return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
d33d7b00 16905+}
3bac966d 16906+
4bf69007 16907+#ifdef CONFIG_COMPAT
3bac966d 16908+
4bf69007
AM
16909+int vc_add_dlimit_x32(uint32_t id, void __user *data)
16910+{
16911+ struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
3bac966d 16912+
4bf69007
AM
16913+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16914+ return -EFAULT;
d337f35e 16915+
4bf69007
AM
16916+ return do_addrem_dlimit(id,
16917+ compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
16918+}
d337f35e 16919+
4bf69007 16920+int vc_rem_dlimit_x32(uint32_t id, void __user *data)
d33d7b00 16921+{
4bf69007 16922+ struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
d337f35e 16923+
4bf69007
AM
16924+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16925+ return -EFAULT;
16926+
16927+ return do_addrem_dlimit(id,
16928+ compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
d33d7b00 16929+}
d337f35e 16930+
4bf69007
AM
16931+#endif /* CONFIG_COMPAT */
16932+
16933+
16934+static inline
16935+int do_set_dlimit(uint32_t id, const char __user *name,
16936+ uint32_t space_used, uint32_t space_total,
16937+ uint32_t inodes_used, uint32_t inodes_total,
16938+ uint32_t reserved, uint32_t flags)
d33d7b00 16939+{
4bf69007
AM
16940+ struct path path;
16941+ int ret;
ba86f833 16942+
4bf69007
AM
16943+ ret = user_lpath(name, &path);
16944+ if (!ret) {
16945+ struct super_block *sb;
16946+ struct dl_info *dli;
d337f35e 16947+
4bf69007
AM
16948+ ret = -EINVAL;
16949+ if (!path.dentry->d_inode)
16950+ goto out_release;
16951+ if (!(sb = path.dentry->d_inode->i_sb))
16952+ goto out_release;
d337f35e 16953+
4bf69007
AM
16954+ /* sanity checks */
16955+ if ((reserved != CDLIM_KEEP &&
16956+ reserved > 100) ||
16957+ (inodes_used != CDLIM_KEEP &&
16958+ inodes_used > inodes_total) ||
16959+ (space_used != CDLIM_KEEP &&
16960+ space_used > space_total))
16961+ goto out_release;
d337f35e 16962+
4bf69007
AM
16963+ ret = -ESRCH;
16964+ dli = locate_dl_info(sb, id);
16965+ if (!dli)
16966+ goto out_release;
ba86f833 16967+
4bf69007 16968+ spin_lock(&dli->dl_lock);
d337f35e 16969+
4bf69007
AM
16970+ if (inodes_used != CDLIM_KEEP)
16971+ dli->dl_inodes_used = inodes_used;
16972+ if (inodes_total != CDLIM_KEEP)
16973+ dli->dl_inodes_total = inodes_total;
16974+ if (space_used != CDLIM_KEEP)
16975+ dli->dl_space_used = dlimit_space_32to64(
16976+ space_used, flags, DLIMS_USED);
d337f35e 16977+
4bf69007
AM
16978+ if (space_total == CDLIM_INFINITY)
16979+ dli->dl_space_total = DLIM_INFINITY;
16980+ else if (space_total != CDLIM_KEEP)
16981+ dli->dl_space_total = dlimit_space_32to64(
16982+ space_total, flags, DLIMS_TOTAL);
78865d5b 16983+
4bf69007
AM
16984+ if (reserved != CDLIM_KEEP)
16985+ dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
78865d5b 16986+
4bf69007 16987+ spin_unlock(&dli->dl_lock);
d337f35e 16988+
4bf69007
AM
16989+ put_dl_info(dli);
16990+ ret = 0;
d337f35e 16991+
4bf69007
AM
16992+ out_release:
16993+ path_put(&path);
16994+ }
16995+ return ret;
16996+}
d337f35e 16997+
4bf69007
AM
16998+int vc_set_dlimit(uint32_t id, void __user *data)
16999+{
17000+ struct vcmd_ctx_dlimit_v0 vc_data;
d337f35e 17001+
4bf69007
AM
17002+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17003+ return -EFAULT;
d337f35e 17004+
4bf69007
AM
17005+ return do_set_dlimit(id, vc_data.name,
17006+ vc_data.space_used, vc_data.space_total,
17007+ vc_data.inodes_used, vc_data.inodes_total,
17008+ vc_data.reserved, vc_data.flags);
17009+}
d337f35e 17010+
4bf69007 17011+#ifdef CONFIG_COMPAT
d337f35e 17012+
4bf69007
AM
17013+int vc_set_dlimit_x32(uint32_t id, void __user *data)
17014+{
17015+ struct vcmd_ctx_dlimit_v0_x32 vc_data;
d337f35e 17016+
4bf69007
AM
17017+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17018+ return -EFAULT;
d337f35e 17019+
4bf69007
AM
17020+ return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
17021+ vc_data.space_used, vc_data.space_total,
17022+ vc_data.inodes_used, vc_data.inodes_total,
17023+ vc_data.reserved, vc_data.flags);
17024+}
d337f35e 17025+
4bf69007 17026+#endif /* CONFIG_COMPAT */
d337f35e 17027+
d337f35e 17028+
4bf69007
AM
17029+static inline
17030+int do_get_dlimit(uint32_t id, const char __user *name,
17031+ uint32_t *space_used, uint32_t *space_total,
17032+ uint32_t *inodes_used, uint32_t *inodes_total,
17033+ uint32_t *reserved, uint32_t *flags)
17034+{
17035+ struct path path;
17036+ int ret;
d337f35e 17037+
4bf69007
AM
17038+ ret = user_lpath(name, &path);
17039+ if (!ret) {
17040+ struct super_block *sb;
17041+ struct dl_info *dli;
d337f35e 17042+
4bf69007
AM
17043+ ret = -EINVAL;
17044+ if (!path.dentry->d_inode)
17045+ goto out_release;
17046+ if (!(sb = path.dentry->d_inode->i_sb))
17047+ goto out_release;
d337f35e 17048+
4bf69007
AM
17049+ ret = -ESRCH;
17050+ dli = locate_dl_info(sb, id);
17051+ if (!dli)
17052+ goto out_release;
d337f35e 17053+
4bf69007
AM
17054+ spin_lock(&dli->dl_lock);
17055+ *inodes_used = dli->dl_inodes_used;
17056+ *inodes_total = dli->dl_inodes_total;
d337f35e 17057+
4bf69007
AM
17058+ *space_used = dlimit_space_64to32(
17059+ dli->dl_space_used, flags, DLIMS_USED);
d337f35e 17060+
4bf69007
AM
17061+ if (dli->dl_space_total == DLIM_INFINITY)
17062+ *space_total = CDLIM_INFINITY;
17063+ else
17064+ *space_total = dlimit_space_64to32(
17065+ dli->dl_space_total, flags, DLIMS_TOTAL);
d337f35e 17066+
4bf69007
AM
17067+ *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
17068+ spin_unlock(&dli->dl_lock);
d337f35e 17069+
4bf69007
AM
17070+ put_dl_info(dli);
17071+ ret = -EFAULT;
d337f35e 17072+
4bf69007
AM
17073+ ret = 0;
17074+ out_release:
17075+ path_put(&path);
17076+ }
17077+ return ret;
d337f35e
JR
17078+}
17079+
4bf69007
AM
17080+
17081+int vc_get_dlimit(uint32_t id, void __user *data)
d337f35e 17082+{
4bf69007 17083+ struct vcmd_ctx_dlimit_v0 vc_data;
d337f35e
JR
17084+ int ret;
17085+
2380c486 17086+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
17087+ return -EFAULT;
17088+
4bf69007
AM
17089+ ret = do_get_dlimit(id, vc_data.name,
17090+ &vc_data.space_used, &vc_data.space_total,
17091+ &vc_data.inodes_used, &vc_data.inodes_total,
17092+ &vc_data.reserved, &vc_data.flags);
d337f35e
JR
17093+ if (ret)
17094+ return ret;
17095+
2380c486 17096+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
d337f35e
JR
17097+ return -EFAULT;
17098+ return 0;
17099+}
17100+
4bf69007 17101+#ifdef CONFIG_COMPAT
d337f35e 17102+
4bf69007 17103+int vc_get_dlimit_x32(uint32_t id, void __user *data)
d337f35e 17104+{
4bf69007 17105+ struct vcmd_ctx_dlimit_v0_x32 vc_data;
d337f35e
JR
17106+ int ret;
17107+
2380c486 17108+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
17109+ return -EFAULT;
17110+
4bf69007
AM
17111+ ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
17112+ &vc_data.space_used, &vc_data.space_total,
17113+ &vc_data.inodes_used, &vc_data.inodes_total,
17114+ &vc_data.reserved, &vc_data.flags);
d337f35e
JR
17115+ if (ret)
17116+ return ret;
17117+
2380c486 17118+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
d337f35e
JR
17119+ return -EFAULT;
17120+ return 0;
17121+}
17122+
4bf69007 17123+#endif /* CONFIG_COMPAT */
ec22aa5c
AM
17124+
17125+
4bf69007 17126+void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
ec22aa5c 17127+{
4bf69007
AM
17128+ struct dl_info *dli;
17129+ __u64 blimit, bfree, bavail;
17130+ __u32 ifree;
ec22aa5c 17131+
4bf69007
AM
17132+ dli = locate_dl_info(sb, dx_current_tag());
17133+ if (!dli)
17134+ return;
ec22aa5c 17135+
4bf69007
AM
17136+ spin_lock(&dli->dl_lock);
17137+ if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
17138+ goto no_ilim;
ec22aa5c 17139+
4bf69007
AM
17140+ /* reduce max inodes available to limit */
17141+ if (buf->f_files > dli->dl_inodes_total)
17142+ buf->f_files = dli->dl_inodes_total;
ec22aa5c 17143+
4bf69007
AM
17144+ ifree = dli->dl_inodes_total - dli->dl_inodes_used;
17145+ /* reduce free inodes to min */
17146+ if (ifree < buf->f_ffree)
17147+ buf->f_ffree = ifree;
b2252bc2 17148+
4bf69007
AM
17149+no_ilim:
17150+ if (dli->dl_space_total == DLIM_INFINITY)
17151+ goto no_blim;
d337f35e 17152+
4bf69007 17153+ blimit = dli->dl_space_total >> sb->s_blocksize_bits;
d337f35e 17154+
4bf69007
AM
17155+ if (dli->dl_space_total < dli->dl_space_used)
17156+ bfree = 0;
17157+ else
17158+ bfree = (dli->dl_space_total - dli->dl_space_used)
17159+ >> sb->s_blocksize_bits;
d337f35e 17160+
4bf69007
AM
17161+ bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
17162+ if (bavail < dli->dl_space_used)
17163+ bavail = 0;
17164+ else
17165+ bavail = (bavail - dli->dl_space_used)
17166+ >> sb->s_blocksize_bits;
d337f35e 17167+
4bf69007
AM
17168+ /* reduce max space available to limit */
17169+ if (buf->f_blocks > blimit)
17170+ buf->f_blocks = blimit;
d337f35e 17171+
4bf69007
AM
17172+ /* reduce free space to min */
17173+ if (bfree < buf->f_bfree)
17174+ buf->f_bfree = bfree;
d337f35e 17175+
4bf69007
AM
17176+ /* reduce avail space to min */
17177+ if (bavail < buf->f_bavail)
17178+ buf->f_bavail = bavail;
d337f35e 17179+
4bf69007
AM
17180+no_blim:
17181+ spin_unlock(&dli->dl_lock);
17182+ put_dl_info(dli);
d337f35e 17183+
4bf69007 17184+ return;
d337f35e
JR
17185+}
17186+
4bf69007 17187+#include <linux/module.h>
d337f35e 17188+
4bf69007
AM
17189+EXPORT_SYMBOL_GPL(locate_dl_info);
17190+EXPORT_SYMBOL_GPL(rcu_free_dl_info);
e3afe727 17191+
c2806d43
AM
17192diff -urNp -x '*.orig' linux-4.4/kernel/vserver/helper.c linux-4.4/kernel/vserver/helper.c
17193--- linux-4.4/kernel/vserver/helper.c 1970-01-01 01:00:00.000000000 +0100
17194+++ linux-4.4/kernel/vserver/helper.c 2021-02-24 16:56:24.609490163 +0100
09be7631 17195@@ -0,0 +1,242 @@
4bf69007
AM
17196+/*
17197+ * linux/kernel/vserver/helper.c
17198+ *
17199+ * Virtual Context Support
17200+ *
c2806d43 17201+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
17202+ *
17203+ * V0.01 basic helper
17204+ *
17205+ */
e3afe727 17206+
4bf69007
AM
17207+#include <linux/kmod.h>
17208+#include <linux/reboot.h>
17209+#include <linux/vs_context.h>
17210+#include <linux/vs_network.h>
17211+#include <linux/vserver/signal.h>
e3afe727 17212+
4bf69007
AM
17213+
17214+char vshelper_path[255] = "/sbin/vshelper";
17215+
17216+static int vshelper_init(struct subprocess_info *info, struct cred *new_cred)
17217+{
09be7631 17218+ current->flags &= ~PF_NO_SETAFFINITY;
4bf69007 17219+ return 0;
d337f35e
JR
17220+}
17221+
09be7631
JR
17222+static int vs_call_usermodehelper(char *path, char **argv, char **envp, int wait)
17223+{
17224+ struct subprocess_info *info;
17225+ gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
17226+
17227+ info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
17228+ vshelper_init, NULL, NULL);
17229+ if (info == NULL)
17230+ return -ENOMEM;
17231+
17232+ return call_usermodehelper_exec(info, wait);
17233+}
17234+
4bf69007 17235+static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
d337f35e 17236+{
4bf69007 17237+ int ret;
e3afe727 17238+
09be7631
JR
17239+ if ((ret = vs_call_usermodehelper(name, argv, envp,
17240+ sync ? UMH_WAIT_PROC : UMH_WAIT_EXEC))) {
4bf69007
AM
17241+ printk(KERN_WARNING "%s: (%s %s) returned %s with %d\n",
17242+ name, argv[1], argv[2],
17243+ sync ? "sync" : "async", ret);
17244+ }
17245+ vxdprintk(VXD_CBIT(switch, 4),
17246+ "%s: (%s %s) returned %s with %d",
17247+ name, argv[1], argv[2], sync ? "sync" : "async", ret);
17248+ return ret;
17249+}
e3afe727 17250+
4bf69007
AM
17251+/*
17252+ * vshelper path is set via /proc/sys
17253+ * invoked by vserver sys_reboot(), with
17254+ * the following arguments
17255+ *
17256+ * argv [0] = vshelper_path;
17257+ * argv [1] = action: "restart", "halt", "poweroff", ...
17258+ * argv [2] = context identifier
17259+ *
17260+ * envp [*] = type-specific parameters
17261+ */
e3afe727 17262+
4bf69007
AM
17263+long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
17264+{
17265+ char id_buf[8], cmd_buf[16];
17266+ char uid_buf[16], pid_buf[16];
17267+ int ret;
e3afe727 17268+
4bf69007
AM
17269+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
17270+ char *envp[] = {"HOME=/", "TERM=linux",
17271+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
17272+ uid_buf, pid_buf, cmd_buf, 0};
e3afe727 17273+
4bf69007
AM
17274+ if (vx_info_state(vxi, VXS_HELPER))
17275+ return -EAGAIN;
17276+ vxi->vx_state |= VXS_HELPER;
7b17263b 17277+
4bf69007 17278+ snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
d337f35e 17279+
4bf69007 17280+ snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
8ce283e1
AM
17281+ snprintf(uid_buf, sizeof(uid_buf), "VS_UID=%d",
17282+ from_kuid(&init_user_ns, current_uid()));
4bf69007 17283+ snprintf(pid_buf, sizeof(pid_buf), "VS_PID=%d", current->pid);
e3afe727 17284+
4bf69007
AM
17285+ switch (cmd) {
17286+ case LINUX_REBOOT_CMD_RESTART:
17287+ argv[1] = "restart";
17288+ break;
07a627a5 17289+
4bf69007
AM
17290+ case LINUX_REBOOT_CMD_HALT:
17291+ argv[1] = "halt";
17292+ break;
e3afe727 17293+
4bf69007
AM
17294+ case LINUX_REBOOT_CMD_POWER_OFF:
17295+ argv[1] = "poweroff";
17296+ break;
d337f35e 17297+
4bf69007
AM
17298+ case LINUX_REBOOT_CMD_SW_SUSPEND:
17299+ argv[1] = "swsusp";
17300+ break;
d337f35e 17301+
4bf69007
AM
17302+ case LINUX_REBOOT_CMD_OOM:
17303+ argv[1] = "oom";
17304+ break;
d337f35e 17305+
4bf69007
AM
17306+ default:
17307+ vxi->vx_state &= ~VXS_HELPER;
17308+ return 0;
d337f35e 17309+ }
4bf69007
AM
17310+
17311+ ret = do_vshelper(vshelper_path, argv, envp, 0);
17312+ vxi->vx_state &= ~VXS_HELPER;
17313+ __wakeup_vx_info(vxi);
17314+ return (ret) ? -EPERM : 0;
d337f35e
JR
17315+}
17316+
4bf69007
AM
17317+
17318+long vs_reboot(unsigned int cmd, void __user *arg)
d337f35e 17319+{
4bf69007
AM
17320+ struct vx_info *vxi = current_vx_info();
17321+ long ret = 0;
d337f35e 17322+
4bf69007
AM
17323+ vxdprintk(VXD_CBIT(misc, 5),
17324+ "vs_reboot(%p[#%d],%u)",
17325+ vxi, vxi ? vxi->vx_id : 0, cmd);
17326+
17327+ ret = vs_reboot_helper(vxi, cmd, arg);
17328+ if (ret)
17329+ return ret;
17330+
17331+ vxi->reboot_cmd = cmd;
17332+ if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
17333+ switch (cmd) {
17334+ case LINUX_REBOOT_CMD_RESTART:
17335+ case LINUX_REBOOT_CMD_HALT:
17336+ case LINUX_REBOOT_CMD_POWER_OFF:
17337+ vx_info_kill(vxi, 0, SIGKILL);
17338+ vx_info_kill(vxi, 1, SIGKILL);
17339+ default:
17340+ break;
17341+ }
d337f35e 17342+ }
4bf69007 17343+ return 0;
d337f35e
JR
17344+}
17345+
4bf69007
AM
17346+long vs_oom_action(unsigned int cmd)
17347+{
17348+ struct vx_info *vxi = current_vx_info();
17349+ long ret = 0;
d337f35e 17350+
4bf69007
AM
17351+ vxdprintk(VXD_CBIT(misc, 5),
17352+ "vs_oom_action(%p[#%d],%u)",
17353+ vxi, vxi ? vxi->vx_id : 0, cmd);
d337f35e 17354+
4bf69007
AM
17355+ ret = vs_reboot_helper(vxi, cmd, NULL);
17356+ if (ret)
17357+ return ret;
d337f35e 17358+
4bf69007
AM
17359+ vxi->reboot_cmd = cmd;
17360+ if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
17361+ vx_info_kill(vxi, 0, SIGKILL);
17362+ vx_info_kill(vxi, 1, SIGKILL);
17363+ }
17364+ return 0;
17365+}
d337f35e 17366+
4bf69007
AM
17367+/*
17368+ * argv [0] = vshelper_path;
17369+ * argv [1] = action: "startup", "shutdown"
17370+ * argv [2] = context identifier
17371+ *
17372+ * envp [*] = type-specific parameters
17373+ */
d337f35e 17374+
4bf69007 17375+long vs_state_change(struct vx_info *vxi, unsigned int cmd)
d337f35e 17376+{
4bf69007
AM
17377+ char id_buf[8], cmd_buf[16];
17378+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
17379+ char *envp[] = {"HOME=/", "TERM=linux",
17380+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
17381+
17382+ if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
17383+ return 0;
17384+
17385+ snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
17386+ snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17387+
17388+ switch (cmd) {
17389+ case VSC_STARTUP:
17390+ argv[1] = "startup";
17391+ break;
17392+ case VSC_SHUTDOWN:
17393+ argv[1] = "shutdown";
17394+ break;
17395+ default:
17396+ return 0;
17397+ }
17398+
17399+ return do_vshelper(vshelper_path, argv, envp, 1);
d337f35e
JR
17400+}
17401+
d337f35e 17402+
4bf69007
AM
17403+/*
17404+ * argv [0] = vshelper_path;
17405+ * argv [1] = action: "netup", "netdown"
17406+ * argv [2] = context identifier
17407+ *
17408+ * envp [*] = type-specific parameters
17409+ */
17410+
17411+long vs_net_change(struct nx_info *nxi, unsigned int cmd)
17412+{
17413+ char id_buf[8], cmd_buf[16];
17414+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
17415+ char *envp[] = {"HOME=/", "TERM=linux",
17416+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
17417+
17418+ if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
17419+ return 0;
17420+
17421+ snprintf(id_buf, sizeof(id_buf), "%d", nxi->nx_id);
17422+ snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17423+
17424+ switch (cmd) {
17425+ case VSC_NETUP:
17426+ argv[1] = "netup";
17427+ break;
17428+ case VSC_NETDOWN:
17429+ argv[1] = "netdown";
17430+ break;
17431+ default:
17432+ return 0;
17433+ }
17434+
17435+ return do_vshelper(vshelper_path, argv, envp, 1);
17436+}
d337f35e 17437+
c2806d43
AM
17438diff -urNp -x '*.orig' linux-4.4/kernel/vserver/history.c linux-4.4/kernel/vserver/history.c
17439--- linux-4.4/kernel/vserver/history.c 1970-01-01 01:00:00.000000000 +0100
17440+++ linux-4.4/kernel/vserver/history.c 2021-02-24 16:56:24.609490163 +0100
4bf69007 17441@@ -0,0 +1,258 @@
d337f35e 17442+/*
4bf69007 17443+ * kernel/vserver/history.c
d337f35e 17444+ *
4bf69007 17445+ * Virtual Context History Backtrace
d337f35e 17446+ *
c2806d43 17447+ * Copyright (C) 2004-2007 Herbert P?tzl
d337f35e 17448+ *
4bf69007
AM
17449+ * V0.01 basic structure
17450+ * V0.02 hash/unhash and trace
17451+ * V0.03 preemption fixes
d337f35e
JR
17452+ *
17453+ */
17454+
4bf69007
AM
17455+#include <linux/module.h>
17456+#include <asm/uaccess.h>
d337f35e 17457+
4bf69007
AM
17458+#include <linux/vserver/context.h>
17459+#include <linux/vserver/debug.h>
17460+#include <linux/vserver/debug_cmd.h>
17461+#include <linux/vserver/history.h>
d337f35e
JR
17462+
17463+
4bf69007
AM
17464+#ifdef CONFIG_VSERVER_HISTORY
17465+#define VXH_SIZE CONFIG_VSERVER_HISTORY_SIZE
17466+#else
17467+#define VXH_SIZE 64
17468+#endif
d337f35e 17469+
4bf69007
AM
17470+struct _vx_history {
17471+ unsigned int counter;
2380c486 17472+
4bf69007
AM
17473+ struct _vx_hist_entry entry[VXH_SIZE + 1];
17474+};
2380c486 17475+
2380c486 17476+
4bf69007 17477+DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
2380c486 17478+
4bf69007 17479+unsigned volatile int vxh_active = 1;
2380c486 17480+
4bf69007 17481+static atomic_t sequence = ATOMIC_INIT(0);
2380c486 17482+
2380c486 17483+
4bf69007 17484+/* vxh_advance()
2380c486 17485+
4bf69007
AM
17486+ * requires disabled preemption */
17487+
17488+struct _vx_hist_entry *vxh_advance(void *loc)
2380c486 17489+{
4bf69007
AM
17490+ unsigned int cpu = smp_processor_id();
17491+ struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
17492+ struct _vx_hist_entry *entry;
17493+ unsigned int index;
17494+
17495+ index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
17496+ entry = &hist->entry[index];
17497+
17498+ entry->seq = atomic_inc_return(&sequence);
17499+ entry->loc = loc;
17500+ return entry;
2380c486
JR
17501+}
17502+
4bf69007 17503+EXPORT_SYMBOL_GPL(vxh_advance);
2380c486 17504+
2380c486 17505+
4bf69007 17506+#define VXH_LOC_FMTS "(#%04x,*%d):%p"
2380c486 17507+
4bf69007 17508+#define VXH_LOC_ARGS(e) (e)->seq, cpu, (e)->loc
2380c486 17509+
2380c486 17510+
4bf69007 17511+#define VXH_VXI_FMTS "%p[#%d,%d.%d]"
2380c486 17512+
4bf69007
AM
17513+#define VXH_VXI_ARGS(e) (e)->vxi.ptr, \
17514+ (e)->vxi.ptr ? (e)->vxi.xid : 0, \
17515+ (e)->vxi.ptr ? (e)->vxi.usecnt : 0, \
17516+ (e)->vxi.ptr ? (e)->vxi.tasks : 0
17517+
17518+void vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
2380c486 17519+{
4bf69007
AM
17520+ switch (e->type) {
17521+ case VXH_THROW_OOPS:
17522+ printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
17523+ break;
2380c486 17524+
4bf69007
AM
17525+ case VXH_GET_VX_INFO:
17526+ case VXH_PUT_VX_INFO:
17527+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
17528+ VXH_LOC_ARGS(e),
17529+ (e->type == VXH_GET_VX_INFO) ? "get" : "put",
17530+ VXH_VXI_ARGS(e));
17531+ break;
2380c486 17532+
4bf69007
AM
17533+ case VXH_INIT_VX_INFO:
17534+ case VXH_SET_VX_INFO:
17535+ case VXH_CLR_VX_INFO:
17536+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
17537+ VXH_LOC_ARGS(e),
17538+ (e->type == VXH_INIT_VX_INFO) ? "init" :
17539+ ((e->type == VXH_SET_VX_INFO) ? "set" : "clr"),
17540+ VXH_VXI_ARGS(e), e->sc.data);
17541+ break;
2380c486 17542+
4bf69007
AM
17543+ case VXH_CLAIM_VX_INFO:
17544+ case VXH_RELEASE_VX_INFO:
17545+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
17546+ VXH_LOC_ARGS(e),
17547+ (e->type == VXH_CLAIM_VX_INFO) ? "claim" : "release",
17548+ VXH_VXI_ARGS(e), e->sc.data);
17549+ break;
2380c486 17550+
4bf69007
AM
17551+ case VXH_ALLOC_VX_INFO:
17552+ case VXH_DEALLOC_VX_INFO:
17553+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
17554+ VXH_LOC_ARGS(e),
17555+ (e->type == VXH_ALLOC_VX_INFO) ? "alloc" : "dealloc",
17556+ VXH_VXI_ARGS(e));
17557+ break;
2380c486 17558+
4bf69007
AM
17559+ case VXH_HASH_VX_INFO:
17560+ case VXH_UNHASH_VX_INFO:
17561+ printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
17562+ VXH_LOC_ARGS(e),
17563+ (e->type == VXH_HASH_VX_INFO) ? "hash" : "unhash",
17564+ VXH_VXI_ARGS(e));
17565+ break;
2380c486 17566+
4bf69007
AM
17567+ case VXH_LOC_VX_INFO:
17568+ case VXH_LOOKUP_VX_INFO:
17569+ case VXH_CREATE_VX_INFO:
17570+ printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
17571+ VXH_LOC_ARGS(e),
17572+ (e->type == VXH_CREATE_VX_INFO) ? "create" :
17573+ ((e->type == VXH_LOC_VX_INFO) ? "loc" : "lookup"),
17574+ e->ll.arg, VXH_VXI_ARGS(e));
17575+ break;
2380c486
JR
17576+ }
17577+}
17578+
4bf69007
AM
17579+static void __vxh_dump_history(void)
17580+{
17581+ unsigned int i, cpu;
d337f35e 17582+
4bf69007
AM
17583+ printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
17584+ atomic_read(&sequence), NR_CPUS);
d337f35e 17585+
4bf69007
AM
17586+ for (i = 0; i < VXH_SIZE; i++) {
17587+ for_each_online_cpu(cpu) {
17588+ struct _vx_history *hist =
17589+ &per_cpu(vx_history_buffer, cpu);
17590+ unsigned int index = (hist->counter - i) % VXH_SIZE;
17591+ struct _vx_hist_entry *entry = &hist->entry[index];
d337f35e 17592+
4bf69007
AM
17593+ vxh_dump_entry(entry, cpu);
17594+ }
17595+ }
17596+}
d337f35e 17597+
4bf69007
AM
17598+void vxh_dump_history(void)
17599+{
17600+ vxh_active = 0;
17601+#ifdef CONFIG_SMP
17602+ local_irq_enable();
17603+ smp_send_stop();
17604+ local_irq_disable();
17605+#endif
17606+ __vxh_dump_history();
17607+}
d337f35e 17608+
d337f35e 17609+
4bf69007 17610+/* vserver syscall commands below here */
d337f35e 17611+
d337f35e 17612+
4bf69007
AM
17613+int vc_dump_history(uint32_t id)
17614+{
17615+ vxh_active = 0;
17616+ __vxh_dump_history();
17617+ vxh_active = 1;
2380c486 17618+
4bf69007 17619+ return 0;
d337f35e
JR
17620+}
17621+
d337f35e 17622+
4bf69007
AM
17623+int do_read_history(struct __user _vx_hist_entry *data,
17624+ int cpu, uint32_t *index, uint32_t *count)
d337f35e 17625+{
4bf69007
AM
17626+ int pos, ret = 0;
17627+ struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
17628+ int end = hist->counter;
17629+ int start = end - VXH_SIZE + 2;
17630+ int idx = *index;
d337f35e 17631+
4bf69007
AM
17632+ /* special case: get current pos */
17633+ if (!*count) {
17634+ *index = end;
17635+ return 0;
17636+ }
d337f35e 17637+
4bf69007
AM
17638+ /* have we lost some data? */
17639+ if (idx < start)
17640+ idx = start;
d337f35e 17641+
4bf69007
AM
17642+ for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
17643+ struct _vx_hist_entry *entry =
17644+ &hist->entry[idx % VXH_SIZE];
2380c486 17645+
4bf69007
AM
17646+ /* send entry to userspace */
17647+ ret = copy_to_user(&data[pos], entry, sizeof(*entry));
17648+ if (ret)
17649+ break;
17650+ }
17651+ /* save new index and count */
17652+ *index = idx;
17653+ *count = pos;
17654+ return ret ? ret : (*index < end);
d337f35e
JR
17655+}
17656+
4bf69007 17657+int vc_read_history(uint32_t id, void __user *data)
d337f35e 17658+{
4bf69007
AM
17659+ struct vcmd_read_history_v0 vc_data;
17660+ int ret;
d337f35e 17661+
4bf69007
AM
17662+ if (id >= NR_CPUS)
17663+ return -EINVAL;
d337f35e 17664+
4bf69007
AM
17665+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17666+ return -EFAULT;
d337f35e 17667+
4bf69007
AM
17668+ ret = do_read_history((struct __user _vx_hist_entry *)vc_data.data,
17669+ id, &vc_data.index, &vc_data.count);
d337f35e 17670+
4bf69007
AM
17671+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17672+ return -EFAULT;
17673+ return ret;
d337f35e
JR
17674+}
17675+
4bf69007 17676+#ifdef CONFIG_COMPAT
d337f35e 17677+
4bf69007 17678+int vc_read_history_x32(uint32_t id, void __user *data)
d337f35e 17679+{
4bf69007
AM
17680+ struct vcmd_read_history_v0_x32 vc_data;
17681+ int ret;
d337f35e 17682+
4bf69007
AM
17683+ if (id >= NR_CPUS)
17684+ return -EINVAL;
d337f35e 17685+
4bf69007
AM
17686+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17687+ return -EFAULT;
2380c486 17688+
4bf69007
AM
17689+ ret = do_read_history((struct __user _vx_hist_entry *)
17690+ compat_ptr(vc_data.data_ptr),
17691+ id, &vc_data.index, &vc_data.count);
d337f35e 17692+
4bf69007
AM
17693+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17694+ return -EFAULT;
17695+ return ret;
17696+}
d337f35e 17697+
4bf69007 17698+#endif /* CONFIG_COMPAT */
d337f35e 17699+
c2806d43
AM
17700diff -urNp -x '*.orig' linux-4.4/kernel/vserver/inet.c linux-4.4/kernel/vserver/inet.c
17701--- linux-4.4/kernel/vserver/inet.c 1970-01-01 01:00:00.000000000 +0100
17702+++ linux-4.4/kernel/vserver/inet.c 2021-02-24 16:56:24.609490163 +0100
7a9e40b8 17703@@ -0,0 +1,236 @@
d337f35e 17704+
4bf69007
AM
17705+#include <linux/in.h>
17706+#include <linux/inetdevice.h>
17707+#include <linux/export.h>
17708+#include <linux/vs_inet.h>
17709+#include <linux/vs_inet6.h>
17710+#include <linux/vserver/debug.h>
17711+#include <net/route.h>
17712+#include <net/addrconf.h>
d337f35e
JR
17713+
17714+
4bf69007 17715+int nx_v4_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
d337f35e 17716+{
4bf69007
AM
17717+ int ret = 0;
17718+
17719+ if (!nxi1 || !nxi2 || nxi1 == nxi2)
17720+ ret = 1;
17721+ else {
17722+ struct nx_addr_v4 *ptr;
7a9e40b8 17723+ unsigned long irqflags;
d337f35e 17724+
7a9e40b8 17725+ spin_lock_irqsave(&nxi1->addr_lock, irqflags);
4bf69007
AM
17726+ for (ptr = &nxi1->v4; ptr; ptr = ptr->next) {
17727+ if (v4_nx_addr_in_nx_info(nxi2, ptr, -1)) {
17728+ ret = 1;
17729+ break;
17730+ }
17731+ }
7a9e40b8 17732+ spin_unlock_irqrestore(&nxi1->addr_lock, irqflags);
4bf69007 17733+ }
d337f35e 17734+
4bf69007
AM
17735+ vxdprintk(VXD_CBIT(net, 2),
17736+ "nx_v4_addr_conflict(%p,%p): %d",
17737+ nxi1, nxi2, ret);
d337f35e 17738+
4bf69007
AM
17739+ return ret;
17740+}
d337f35e 17741+
d337f35e 17742+
4bf69007
AM
17743+#ifdef CONFIG_IPV6
17744+
17745+int nx_v6_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
d337f35e 17746+{
4bf69007 17747+ int ret = 0;
d337f35e 17748+
4bf69007
AM
17749+ if (!nxi1 || !nxi2 || nxi1 == nxi2)
17750+ ret = 1;
17751+ else {
17752+ struct nx_addr_v6 *ptr;
7a9e40b8 17753+ unsigned long irqflags;
d337f35e 17754+
7a9e40b8 17755+ spin_lock_irqsave(&nxi1->addr_lock, irqflags);
4bf69007
AM
17756+ for (ptr = &nxi1->v6; ptr; ptr = ptr->next) {
17757+ if (v6_nx_addr_in_nx_info(nxi2, ptr, -1)) {
17758+ ret = 1;
17759+ break;
17760+ }
17761+ }
7a9e40b8 17762+ spin_unlock_irqrestore(&nxi1->addr_lock, irqflags);
4bf69007 17763+ }
d337f35e 17764+
4bf69007
AM
17765+ vxdprintk(VXD_CBIT(net, 2),
17766+ "nx_v6_addr_conflict(%p,%p): %d",
17767+ nxi1, nxi2, ret);
d337f35e 17768+
4bf69007
AM
17769+ return ret;
17770+}
d337f35e 17771+
4bf69007 17772+#endif
d337f35e 17773+
4bf69007 17774+int v4_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
d337f35e 17775+{
4bf69007
AM
17776+ struct in_device *in_dev;
17777+ struct in_ifaddr **ifap;
17778+ struct in_ifaddr *ifa;
17779+ int ret = 0;
d337f35e 17780+
4bf69007
AM
17781+ if (!dev)
17782+ goto out;
17783+ in_dev = in_dev_get(dev);
17784+ if (!in_dev)
17785+ goto out;
d337f35e 17786+
4bf69007
AM
17787+ for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
17788+ ifap = &ifa->ifa_next) {
17789+ if (v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW)) {
17790+ ret = 1;
17791+ break;
17792+ }
17793+ }
17794+ in_dev_put(in_dev);
17795+out:
17796+ return ret;
d337f35e
JR
17797+}
17798+
17799+
4bf69007 17800+#ifdef CONFIG_IPV6
d337f35e 17801+
4bf69007 17802+int v6_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
d337f35e 17803+{
4bf69007
AM
17804+ struct inet6_dev *in_dev;
17805+ struct inet6_ifaddr *ifa;
17806+ int ret = 0;
d337f35e 17807+
4bf69007
AM
17808+ if (!dev)
17809+ goto out;
17810+ in_dev = in6_dev_get(dev);
17811+ if (!in_dev)
17812+ goto out;
d337f35e 17813+
4bf69007
AM
17814+ // for (ifap = &in_dev->addr_list; (ifa = *ifap) != NULL;
17815+ list_for_each_entry(ifa, &in_dev->addr_list, if_list) {
17816+ if (v6_addr_in_nx_info(nxi, &ifa->addr, -1)) {
17817+ ret = 1;
17818+ break;
17819+ }
d337f35e 17820+ }
4bf69007
AM
17821+ in6_dev_put(in_dev);
17822+out:
17823+ return ret;
d337f35e
JR
17824+}
17825+
4bf69007 17826+#endif
d337f35e 17827+
4bf69007
AM
17828+int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
17829+{
17830+ int ret = 1;
d337f35e 17831+
4bf69007
AM
17832+ if (!nxi)
17833+ goto out;
17834+ if (nxi->v4.type && v4_dev_in_nx_info(dev, nxi))
17835+ goto out;
17836+#ifdef CONFIG_IPV6
17837+ ret = 2;
17838+ if (nxi->v6.type && v6_dev_in_nx_info(dev, nxi))
17839+ goto out;
17840+#endif
17841+ ret = 0;
17842+out:
17843+ vxdprintk(VXD_CBIT(net, 3),
17844+ "dev_in_nx_info(%p,%p[#%d]) = %d",
17845+ dev, nxi, nxi ? nxi->nx_id : 0, ret);
17846+ return ret;
17847+}
d337f35e 17848+
4bf69007
AM
17849+struct rtable *ip_v4_find_src(struct net *net, struct nx_info *nxi,
17850+ struct flowi4 *fl4)
d337f35e 17851+{
4bf69007 17852+ struct rtable *rt;
d337f35e 17853+
4bf69007
AM
17854+ if (!nxi)
17855+ return NULL;
d337f35e 17856+
4bf69007
AM
17857+ /* FIXME: handle lback only case */
17858+ if (!NX_IPV4(nxi))
17859+ return ERR_PTR(-EPERM);
d337f35e 17860+
4bf69007
AM
17861+ vxdprintk(VXD_CBIT(net, 4),
17862+ "ip_v4_find_src(%p[#%u]) " NIPQUAD_FMT " -> " NIPQUAD_FMT,
17863+ nxi, nxi ? nxi->nx_id : 0,
17864+ NIPQUAD(fl4->saddr), NIPQUAD(fl4->daddr));
d337f35e 17865+
4bf69007
AM
17866+ /* single IP is unconditional */
17867+ if (nx_info_flags(nxi, NXF_SINGLE_IP, 0) &&
17868+ (fl4->saddr == INADDR_ANY))
17869+ fl4->saddr = nxi->v4.ip[0].s_addr;
d337f35e 17870+
4bf69007
AM
17871+ if (fl4->saddr == INADDR_ANY) {
17872+ struct nx_addr_v4 *ptr;
17873+ __be32 found = 0;
17874+
17875+ rt = __ip_route_output_key(net, fl4);
17876+ if (!IS_ERR(rt)) {
17877+ found = fl4->saddr;
17878+ ip_rt_put(rt);
17879+ vxdprintk(VXD_CBIT(net, 4),
17880+ "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
17881+ nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(found));
17882+ if (v4_addr_in_nx_info(nxi, found, NXA_MASK_BIND))
17883+ goto found;
17884+ }
d337f35e 17885+
8d50a2ea 17886+ WARN_ON_ONCE(in_irq());
b00e13aa 17887+ spin_lock_bh(&nxi->addr_lock);
4bf69007
AM
17888+ for (ptr = &nxi->v4; ptr; ptr = ptr->next) {
17889+ __be32 primary = ptr->ip[0].s_addr;
17890+ __be32 mask = ptr->mask.s_addr;
17891+ __be32 neta = primary & mask;
d337f35e 17892+
4bf69007
AM
17893+ vxdprintk(VXD_CBIT(net, 4), "ip_v4_find_src(%p[#%u]) chk: "
17894+ NIPQUAD_FMT "/" NIPQUAD_FMT "/" NIPQUAD_FMT,
17895+ nxi, nxi ? nxi->nx_id : 0, NIPQUAD(primary),
17896+ NIPQUAD(mask), NIPQUAD(neta));
17897+ if ((found & mask) != neta)
17898+ continue;
d337f35e 17899+
4bf69007
AM
17900+ fl4->saddr = primary;
17901+ rt = __ip_route_output_key(net, fl4);
17902+ vxdprintk(VXD_CBIT(net, 4),
17903+ "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
17904+ nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(primary));
17905+ if (!IS_ERR(rt)) {
17906+ found = fl4->saddr;
17907+ ip_rt_put(rt);
17908+ if (found == primary)
5cb1760b 17909+ goto found_unlock;
4bf69007
AM
17910+ }
17911+ }
17912+ /* still no source ip? */
17913+ found = ipv4_is_loopback(fl4->daddr)
17914+ ? IPI_LOOPBACK : nxi->v4.ip[0].s_addr;
5cb1760b 17915+ found_unlock:
b00e13aa 17916+ spin_unlock_bh(&nxi->addr_lock);
4bf69007
AM
17917+ found:
17918+ /* assign src ip to flow */
17919+ fl4->saddr = found;
17920+
17921+ } else {
17922+ if (!v4_addr_in_nx_info(nxi, fl4->saddr, NXA_MASK_BIND))
17923+ return ERR_PTR(-EPERM);
17924+ }
d337f35e 17925+
4bf69007
AM
17926+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0)) {
17927+ if (ipv4_is_loopback(fl4->daddr))
17928+ fl4->daddr = nxi->v4_lback.s_addr;
17929+ if (ipv4_is_loopback(fl4->saddr))
17930+ fl4->saddr = nxi->v4_lback.s_addr;
17931+ } else if (ipv4_is_loopback(fl4->daddr) &&
17932+ !nx_info_flags(nxi, NXF_LBACK_ALLOW, 0))
17933+ return ERR_PTR(-EPERM);
d337f35e 17934+
4bf69007 17935+ return NULL;
d337f35e
JR
17936+}
17937+
4bf69007 17938+EXPORT_SYMBOL_GPL(ip_v4_find_src);
d337f35e 17939+
c2806d43
AM
17940diff -urNp -x '*.orig' linux-4.4/kernel/vserver/init.c linux-4.4/kernel/vserver/init.c
17941--- linux-4.4/kernel/vserver/init.c 1970-01-01 01:00:00.000000000 +0100
17942+++ linux-4.4/kernel/vserver/init.c 2021-02-24 16:56:24.609490163 +0100
927ca606 17943@@ -0,0 +1,46 @@
4bf69007
AM
17944+/*
17945+ * linux/kernel/init.c
17946+ *
17947+ * Virtual Server Init
17948+ *
c2806d43 17949+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
17950+ *
17951+ * V0.01 basic structure
17952+ *
17953+ */
d337f35e 17954+
4bf69007 17955+#include <linux/init.h>
927ca606 17956+#include <linux/module.h>
4bf69007
AM
17957+
17958+int vserver_register_sysctl(void);
17959+void vserver_unregister_sysctl(void);
17960+
17961+
17962+static int __init init_vserver(void)
d337f35e 17963+{
4bf69007 17964+ int ret = 0;
d337f35e 17965+
4bf69007
AM
17966+#ifdef CONFIG_VSERVER_DEBUG
17967+ vserver_register_sysctl();
17968+#endif
17969+ return ret;
d337f35e
JR
17970+}
17971+
d337f35e 17972+
4bf69007 17973+static void __exit exit_vserver(void)
d337f35e 17974+{
d337f35e 17975+
4bf69007
AM
17976+#ifdef CONFIG_VSERVER_DEBUG
17977+ vserver_unregister_sysctl();
17978+#endif
17979+ return;
d337f35e
JR
17980+}
17981+
4bf69007
AM
17982+/* FIXME: GFP_ZONETYPES gone
17983+long vx_slab[GFP_ZONETYPES]; */
17984+long vx_area;
d337f35e 17985+
d337f35e 17986+
4bf69007
AM
17987+module_init(init_vserver);
17988+module_exit(exit_vserver);
d337f35e 17989+
c2806d43
AM
17990diff -urNp -x '*.orig' linux-4.4/kernel/vserver/inode.c linux-4.4/kernel/vserver/inode.c
17991--- linux-4.4/kernel/vserver/inode.c 1970-01-01 01:00:00.000000000 +0100
17992+++ linux-4.4/kernel/vserver/inode.c 2021-02-24 16:56:24.612823601 +0100
09be7631 17993@@ -0,0 +1,440 @@
4bf69007
AM
17994+/*
17995+ * linux/kernel/vserver/inode.c
17996+ *
17997+ * Virtual Server: File System Support
17998+ *
c2806d43 17999+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
18000+ *
18001+ * V0.01 separated from vcontext V0.05
18002+ * V0.02 moved to tag (instead of xid)
18003+ *
18004+ */
d337f35e 18005+
4bf69007
AM
18006+#include <linux/tty.h>
18007+#include <linux/proc_fs.h>
18008+#include <linux/devpts_fs.h>
18009+#include <linux/fs.h>
18010+#include <linux/file.h>
18011+#include <linux/mount.h>
18012+#include <linux/parser.h>
18013+#include <linux/namei.h>
09be7631
JR
18014+#include <linux/magic.h>
18015+#include <linux/slab.h>
4bf69007
AM
18016+#include <linux/vserver/inode.h>
18017+#include <linux/vserver/inode_cmd.h>
18018+#include <linux/vs_base.h>
18019+#include <linux/vs_tag.h>
d337f35e 18020+
4bf69007 18021+#include <asm/uaccess.h>
09be7631 18022+#include <../../fs/proc/internal.h>
d337f35e 18023+
d337f35e 18024+
4bf69007 18025+static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
d337f35e 18026+{
4bf69007 18027+ struct proc_dir_entry *entry;
d337f35e 18028+
4bf69007
AM
18029+ if (!in || !in->i_sb)
18030+ return -ESRCH;
d337f35e 18031+
4bf69007
AM
18032+ *flags = IATTR_TAG
18033+ | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0)
18034+ | (IS_IXUNLINK(in) ? IATTR_IXUNLINK : 0)
18035+ | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
18036+ | (IS_COW(in) ? IATTR_COW : 0);
18037+ *mask = IATTR_IXUNLINK | IATTR_IMMUTABLE | IATTR_COW;
d337f35e 18038+
4bf69007
AM
18039+ if (S_ISDIR(in->i_mode))
18040+ *mask |= IATTR_BARRIER;
d337f35e 18041+
4bf69007
AM
18042+ if (IS_TAGGED(in)) {
18043+ *tag = i_tag_read(in);
18044+ *mask |= IATTR_TAG;
18045+ }
2380c486 18046+
4bf69007
AM
18047+ switch (in->i_sb->s_magic) {
18048+ case PROC_SUPER_MAGIC:
18049+ entry = PROC_I(in)->pde;
d337f35e 18050+
4bf69007
AM
18051+ /* check for specific inodes? */
18052+ if (entry)
18053+ *mask |= IATTR_FLAGS;
18054+ if (entry)
18055+ *flags |= (entry->vx_flags & IATTR_FLAGS);
18056+ else
18057+ *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
18058+ break;
d337f35e 18059+
4bf69007
AM
18060+ case DEVPTS_SUPER_MAGIC:
18061+ *tag = i_tag_read(in);
18062+ *mask |= IATTR_TAG;
18063+ break;
d337f35e 18064+
4bf69007
AM
18065+ default:
18066+ break;
18067+ }
18068+ return 0;
d337f35e
JR
18069+}
18070+
4bf69007 18071+int vc_get_iattr(void __user *data)
d337f35e 18072+{
4bf69007
AM
18073+ struct path path;
18074+ struct vcmd_ctx_iattr_v1 vc_data = { .tag = -1 };
18075+ int ret;
d337f35e 18076+
4bf69007
AM
18077+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18078+ return -EFAULT;
d337f35e 18079+
4bf69007
AM
18080+ ret = user_lpath(vc_data.name, &path);
18081+ if (!ret) {
18082+ ret = __vc_get_iattr(path.dentry->d_inode,
18083+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18084+ path_put(&path);
18085+ }
18086+ if (ret)
18087+ return ret;
d337f35e 18088+
4bf69007
AM
18089+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18090+ ret = -EFAULT;
18091+ return ret;
d337f35e
JR
18092+}
18093+
4bf69007 18094+#ifdef CONFIG_COMPAT
d337f35e 18095+
4bf69007 18096+int vc_get_iattr_x32(void __user *data)
d337f35e 18097+{
4bf69007
AM
18098+ struct path path;
18099+ struct vcmd_ctx_iattr_v1_x32 vc_data = { .tag = -1 };
18100+ int ret;
d337f35e 18101+
4bf69007
AM
18102+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18103+ return -EFAULT;
d337f35e 18104+
4bf69007
AM
18105+ ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
18106+ if (!ret) {
18107+ ret = __vc_get_iattr(path.dentry->d_inode,
18108+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18109+ path_put(&path);
18110+ }
18111+ if (ret)
18112+ return ret;
d337f35e 18113+
2380c486 18114+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
4bf69007
AM
18115+ ret = -EFAULT;
18116+ return ret;
d337f35e
JR
18117+}
18118+
4bf69007 18119+#endif /* CONFIG_COMPAT */
d337f35e 18120+
d337f35e 18121+
4bf69007 18122+int vc_fget_iattr(uint32_t fd, void __user *data)
d337f35e 18123+{
4bf69007
AM
18124+ struct file *filp;
18125+ struct vcmd_ctx_fiattr_v0 vc_data = { .tag = -1 };
d337f35e
JR
18126+ int ret;
18127+
4bf69007 18128+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
18129+ return -EFAULT;
18130+
4bf69007 18131+ filp = fget(fd);
927ca606 18132+ if (!filp || !filp->f_path.dentry || !filp->f_path.dentry->d_inode)
4bf69007 18133+ return -EBADF;
2380c486 18134+
927ca606 18135+ ret = __vc_get_iattr(filp->f_path.dentry->d_inode,
4bf69007 18136+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
2380c486 18137+
4bf69007 18138+ fput(filp);
2380c486 18139+
4bf69007
AM
18140+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18141+ ret = -EFAULT;
d337f35e
JR
18142+ return ret;
18143+}
18144+
18145+
4bf69007 18146+static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
2380c486 18147+{
4bf69007
AM
18148+ struct inode *in = de->d_inode;
18149+ int error = 0, is_proc = 0, has_tag = 0;
18150+ struct iattr attr = { 0 };
2380c486 18151+
4bf69007
AM
18152+ if (!in || !in->i_sb)
18153+ return -ESRCH;
2380c486 18154+
4bf69007
AM
18155+ is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
18156+ if ((*mask & IATTR_FLAGS) && !is_proc)
18157+ return -EINVAL;
2380c486 18158+
4bf69007
AM
18159+ has_tag = IS_TAGGED(in) ||
18160+ (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
18161+ if ((*mask & IATTR_TAG) && !has_tag)
18162+ return -EINVAL;
2380c486 18163+
4bf69007
AM
18164+ mutex_lock(&in->i_mutex);
18165+ if (*mask & IATTR_TAG) {
8ce283e1 18166+ attr.ia_tag = make_ktag(&init_user_ns, *tag);
4bf69007 18167+ attr.ia_valid |= ATTR_TAG;
2380c486
JR
18168+ }
18169+
4bf69007
AM
18170+ if (*mask & IATTR_FLAGS) {
18171+ struct proc_dir_entry *entry = PROC_I(in)->pde;
18172+ unsigned int iflags = PROC_I(in)->vx_flags;
2380c486 18173+
4bf69007
AM
18174+ iflags = (iflags & ~(*mask & IATTR_FLAGS))
18175+ | (*flags & IATTR_FLAGS);
18176+ PROC_I(in)->vx_flags = iflags;
18177+ if (entry)
18178+ entry->vx_flags = iflags;
18179+ }
9f7054f1 18180+
4bf69007
AM
18181+ if (*mask & (IATTR_IMMUTABLE | IATTR_IXUNLINK |
18182+ IATTR_BARRIER | IATTR_COW)) {
18183+ int iflags = in->i_flags;
18184+ int vflags = in->i_vflags;
9f7054f1 18185+
4bf69007
AM
18186+ if (*mask & IATTR_IMMUTABLE) {
18187+ if (*flags & IATTR_IMMUTABLE)
18188+ iflags |= S_IMMUTABLE;
18189+ else
18190+ iflags &= ~S_IMMUTABLE;
18191+ }
18192+ if (*mask & IATTR_IXUNLINK) {
18193+ if (*flags & IATTR_IXUNLINK)
18194+ iflags |= S_IXUNLINK;
18195+ else
18196+ iflags &= ~S_IXUNLINK;
18197+ }
18198+ if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
18199+ if (*flags & IATTR_BARRIER)
18200+ vflags |= V_BARRIER;
18201+ else
18202+ vflags &= ~V_BARRIER;
18203+ }
18204+ if (S_ISREG(in->i_mode) && (*mask & IATTR_COW)) {
18205+ if (*flags & IATTR_COW)
18206+ vflags |= V_COW;
18207+ else
18208+ vflags &= ~V_COW;
18209+ }
18210+ if (in->i_op && in->i_op->sync_flags) {
18211+ error = in->i_op->sync_flags(in, iflags, vflags);
18212+ if (error)
18213+ goto out;
18214+ }
18215+ }
9f7054f1 18216+
4bf69007
AM
18217+ if (attr.ia_valid) {
18218+ if (in->i_op && in->i_op->setattr)
18219+ error = in->i_op->setattr(de, &attr);
18220+ else {
18221+ error = inode_change_ok(in, &attr);
18222+ if (!error) {
18223+ setattr_copy(in, &attr);
18224+ mark_inode_dirty(in);
18225+ }
18226+ }
9f7054f1 18227+ }
9f7054f1 18228+
4bf69007
AM
18229+out:
18230+ mutex_unlock(&in->i_mutex);
18231+ return error;
18232+}
2380c486 18233+
4bf69007 18234+int vc_set_iattr(void __user *data)
d337f35e 18235+{
4bf69007
AM
18236+ struct path path;
18237+ struct vcmd_ctx_iattr_v1 vc_data;
18238+ int ret;
d337f35e 18239+
4bf69007
AM
18240+ if (!capable(CAP_LINUX_IMMUTABLE))
18241+ return -EPERM;
18242+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
18243+ return -EFAULT;
18244+
4bf69007
AM
18245+ ret = user_lpath(vc_data.name, &path);
18246+ if (!ret) {
18247+ ret = __vc_set_iattr(path.dentry,
18248+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18249+ path_put(&path);
d337f35e 18250+ }
4bf69007
AM
18251+
18252+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18253+ ret = -EFAULT;
d337f35e
JR
18254+ return ret;
18255+}
18256+
4bf69007
AM
18257+#ifdef CONFIG_COMPAT
18258+
18259+int vc_set_iattr_x32(void __user *data)
d337f35e 18260+{
4bf69007
AM
18261+ struct path path;
18262+ struct vcmd_ctx_iattr_v1_x32 vc_data;
18263+ int ret;
d337f35e 18264+
4bf69007
AM
18265+ if (!capable(CAP_LINUX_IMMUTABLE))
18266+ return -EPERM;
18267+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486
JR
18268+ return -EFAULT;
18269+
4bf69007
AM
18270+ ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
18271+ if (!ret) {
18272+ ret = __vc_set_iattr(path.dentry,
18273+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18274+ path_put(&path);
2380c486 18275+ }
4bf69007
AM
18276+
18277+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18278+ ret = -EFAULT;
18279+ return ret;
2380c486
JR
18280+}
18281+
4bf69007 18282+#endif /* CONFIG_COMPAT */
2380c486 18283+
4bf69007 18284+int vc_fset_iattr(uint32_t fd, void __user *data)
2380c486 18285+{
4bf69007
AM
18286+ struct file *filp;
18287+ struct vcmd_ctx_fiattr_v0 vc_data;
18288+ int ret;
2380c486 18289+
4bf69007
AM
18290+ if (!capable(CAP_LINUX_IMMUTABLE))
18291+ return -EPERM;
18292+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486
JR
18293+ return -EFAULT;
18294+
4bf69007 18295+ filp = fget(fd);
927ca606 18296+ if (!filp || !filp->f_path.dentry || !filp->f_path.dentry->d_inode)
4bf69007 18297+ return -EBADF;
2380c486 18298+
927ca606 18299+ ret = __vc_set_iattr(filp->f_path.dentry, &vc_data.tag,
4bf69007 18300+ &vc_data.flags, &vc_data.mask);
2380c486 18301+
4bf69007 18302+ fput(filp);
2380c486 18303+
4bf69007
AM
18304+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18305+ return -EFAULT;
18306+ return ret;
2380c486
JR
18307+}
18308+
2380c486 18309+
4bf69007 18310+enum { Opt_notagcheck, Opt_tag, Opt_notag, Opt_tagid, Opt_err };
2380c486 18311+
4bf69007
AM
18312+static match_table_t tokens = {
18313+ {Opt_notagcheck, "notagcheck"},
18314+#ifdef CONFIG_PROPAGATE
18315+ {Opt_notag, "notag"},
18316+ {Opt_tag, "tag"},
18317+ {Opt_tagid, "tagid=%u"},
18318+#endif
18319+ {Opt_err, NULL}
18320+};
2380c486 18321+
9f7054f1 18322+
4bf69007
AM
18323+static void __dx_parse_remove(char *string, char *opt)
18324+{
18325+ char *p = strstr(string, opt);
18326+ char *q = p;
2380c486 18327+
4bf69007
AM
18328+ if (p) {
18329+ while (*q != '\0' && *q != ',')
18330+ q++;
18331+ while (*q)
18332+ *p++ = *q++;
18333+ while (*p)
18334+ *p++ = '\0';
2380c486 18335+ }
2380c486
JR
18336+}
18337+
61333608 18338+int dx_parse_tag(char *string, vtag_t *tag, int remove, int *mnt_flags,
4bf69007 18339+ unsigned long *flags)
9f7054f1 18340+{
4bf69007
AM
18341+ int set = 0;
18342+ substring_t args[MAX_OPT_ARGS];
18343+ int token;
18344+ char *s, *p, *opts;
18345+#if defined(CONFIG_PROPAGATE) || defined(CONFIG_VSERVER_DEBUG)
18346+ int option = 0;
18347+#endif
9f7054f1 18348+
4bf69007
AM
18349+ if (!string)
18350+ return 0;
18351+ s = kstrdup(string, GFP_KERNEL | GFP_ATOMIC);
18352+ if (!s)
18353+ return 0;
9f7054f1 18354+
4bf69007
AM
18355+ opts = s;
18356+ while ((p = strsep(&opts, ",")) != NULL) {
18357+ token = match_token(p, tokens, args);
9f7054f1 18358+
4bf69007
AM
18359+ switch (token) {
18360+#ifdef CONFIG_PROPAGATE
18361+ case Opt_tag:
18362+ if (tag)
18363+ *tag = 0;
18364+ if (remove)
18365+ __dx_parse_remove(s, "tag");
18366+ *mnt_flags |= MNT_TAGID;
18367+ set |= MNT_TAGID;
18368+ break;
18369+ case Opt_notag:
18370+ if (remove)
18371+ __dx_parse_remove(s, "notag");
18372+ *mnt_flags |= MNT_NOTAG;
18373+ set |= MNT_NOTAG;
18374+ break;
18375+ case Opt_tagid:
18376+ if (tag && !match_int(args, &option))
18377+ *tag = option;
18378+ if (remove)
18379+ __dx_parse_remove(s, "tagid");
18380+ *mnt_flags |= MNT_TAGID;
18381+ set |= MNT_TAGID;
18382+ break;
18383+#endif /* CONFIG_PROPAGATE */
18384+ case Opt_notagcheck:
18385+ if (remove)
18386+ __dx_parse_remove(s, "notagcheck");
18387+ *flags |= MS_NOTAGCHECK;
18388+ set |= MS_NOTAGCHECK;
18389+ break;
18390+ }
18391+ vxdprintk(VXD_CBIT(tag, 7),
18392+ "dx_parse_tag(" VS_Q("%s") "): %d:#%d",
18393+ p, token, option);
18394+ }
18395+ if (set)
18396+ strcpy(string, s);
18397+ kfree(s);
18398+ return set;
9f7054f1 18399+}
2380c486 18400+
4bf69007 18401+#ifdef CONFIG_PROPAGATE
2380c486 18402+
4bf69007 18403+void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
2380c486 18404+{
61333608 18405+ vtag_t new_tag = 0;
4bf69007
AM
18406+ struct vfsmount *mnt;
18407+ int propagate;
2380c486 18408+
4bf69007
AM
18409+ if (!nd)
18410+ return;
18411+ mnt = nd->path.mnt;
18412+ if (!mnt)
18413+ return;
2380c486 18414+
4bf69007
AM
18415+ propagate = (mnt->mnt_flags & MNT_TAGID);
18416+ if (propagate)
18417+ new_tag = mnt->mnt_tag;
2380c486 18418+
4bf69007
AM
18419+ vxdprintk(VXD_CBIT(tag, 7),
18420+ "dx_propagate_tag(%p[#%lu.%d]): %d,%d",
18421+ inode, inode->i_ino, inode->i_tag,
18422+ new_tag, (propagate) ? 1 : 0);
18423+
18424+ if (propagate)
18425+ i_tag_write(inode, new_tag);
2380c486
JR
18426+}
18427+
4bf69007 18428+#include <linux/module.h>
2380c486 18429+
4bf69007 18430+EXPORT_SYMBOL_GPL(__dx_propagate_tag);
2380c486 18431+
4bf69007 18432+#endif /* CONFIG_PROPAGATE */
2380c486 18433+
c2806d43
AM
18434diff -urNp -x '*.orig' linux-4.4/kernel/vserver/limit.c linux-4.4/kernel/vserver/limit.c
18435--- linux-4.4/kernel/vserver/limit.c 1970-01-01 01:00:00.000000000 +0100
18436+++ linux-4.4/kernel/vserver/limit.c 2021-02-24 16:56:24.612823601 +0100
1d9ad342 18437@@ -0,0 +1,386 @@
4bf69007
AM
18438+/*
18439+ * linux/kernel/vserver/limit.c
18440+ *
18441+ * Virtual Server: Context Limits
18442+ *
c2806d43 18443+ * Copyright (C) 2004-2010 Herbert P?tzl
4bf69007
AM
18444+ *
18445+ * V0.01 broken out from vcontext V0.05
18446+ * V0.02 changed vcmds to vxi arg
18447+ * V0.03 added memory cgroup support
18448+ *
18449+ */
2380c486 18450+
4bf69007
AM
18451+#include <linux/sched.h>
18452+#include <linux/module.h>
18453+#include <linux/memcontrol.h>
927ca606 18454+#include <linux/page_counter.h>
4bf69007
AM
18455+#include <linux/vs_limit.h>
18456+#include <linux/vserver/limit.h>
18457+#include <linux/vserver/limit_cmd.h>
2380c486 18458+
4bf69007 18459+#include <asm/uaccess.h>
d337f35e 18460+
d337f35e 18461+
4bf69007
AM
18462+const char *vlimit_name[NUM_LIMITS] = {
18463+ [RLIMIT_CPU] = "CPU",
18464+ [RLIMIT_NPROC] = "NPROC",
18465+ [RLIMIT_NOFILE] = "NOFILE",
18466+ [RLIMIT_LOCKS] = "LOCKS",
18467+ [RLIMIT_SIGPENDING] = "SIGP",
18468+ [RLIMIT_MSGQUEUE] = "MSGQ",
d337f35e 18469+
4bf69007
AM
18470+ [VLIMIT_NSOCK] = "NSOCK",
18471+ [VLIMIT_OPENFD] = "OPENFD",
18472+ [VLIMIT_SHMEM] = "SHMEM",
18473+ [VLIMIT_DENTRY] = "DENTRY",
18474+};
2380c486 18475+
4bf69007 18476+EXPORT_SYMBOL_GPL(vlimit_name);
2380c486 18477+
4bf69007 18478+#define MASK_ENTRY(x) (1 << (x))
d337f35e 18479+
4bf69007
AM
18480+const struct vcmd_ctx_rlimit_mask_v0 vlimit_mask = {
18481+ /* minimum */
18482+ 0
18483+ , /* softlimit */
18484+ 0
18485+ , /* maximum */
18486+ MASK_ENTRY( RLIMIT_NPROC ) |
18487+ MASK_ENTRY( RLIMIT_NOFILE ) |
18488+ MASK_ENTRY( RLIMIT_LOCKS ) |
18489+ MASK_ENTRY( RLIMIT_MSGQUEUE ) |
d337f35e 18490+
4bf69007
AM
18491+ MASK_ENTRY( VLIMIT_NSOCK ) |
18492+ MASK_ENTRY( VLIMIT_OPENFD ) |
18493+ MASK_ENTRY( VLIMIT_SHMEM ) |
18494+ MASK_ENTRY( VLIMIT_DENTRY ) |
18495+ 0
18496+};
18497+ /* accounting only */
18498+uint32_t account_mask =
18499+ MASK_ENTRY( VLIMIT_SEMARY ) |
18500+ MASK_ENTRY( VLIMIT_NSEMS ) |
18501+ MASK_ENTRY( VLIMIT_MAPPED ) |
18502+ 0;
d337f35e 18503+
4bf69007
AM
18504+
18505+static int is_valid_vlimit(int id)
18506+{
18507+ uint32_t mask = vlimit_mask.minimum |
18508+ vlimit_mask.softlimit | vlimit_mask.maximum;
18509+ return mask & (1 << id);
d337f35e
JR
18510+}
18511+
4bf69007 18512+static int is_accounted_vlimit(int id)
d337f35e 18513+{
4bf69007
AM
18514+ if (is_valid_vlimit(id))
18515+ return 1;
18516+ return account_mask & (1 << id);
18517+}
d337f35e 18518+
d337f35e 18519+
4bf69007
AM
18520+static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
18521+{
18522+ rlim_t limit = __rlim_soft(&vxi->limit, id);
18523+ return VX_VLIM(limit);
18524+}
d337f35e 18525+
4bf69007
AM
18526+static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
18527+{
18528+ rlim_t limit = __rlim_hard(&vxi->limit, id);
18529+ return VX_VLIM(limit);
18530+}
d337f35e 18531+
4bf69007
AM
18532+static int do_get_rlimit(struct vx_info *vxi, uint32_t id,
18533+ uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
18534+{
18535+ if (!is_valid_vlimit(id))
18536+ return -EINVAL;
18537+
18538+ if (minimum)
18539+ *minimum = CRLIM_UNSET;
18540+ if (softlimit)
18541+ *softlimit = vc_get_soft(vxi, id);
18542+ if (maximum)
18543+ *maximum = vc_get_hard(vxi, id);
d337f35e
JR
18544+ return 0;
18545+}
18546+
4bf69007 18547+int vc_get_rlimit(struct vx_info *vxi, void __user *data)
d337f35e 18548+{
4bf69007
AM
18549+ struct vcmd_ctx_rlimit_v0 vc_data;
18550+ int ret;
d337f35e 18551+
4bf69007
AM
18552+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18553+ return -EFAULT;
18554+
18555+ ret = do_get_rlimit(vxi, vc_data.id,
18556+ &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
18557+ if (ret)
18558+ return ret;
d337f35e 18559+
2380c486 18560+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
d337f35e
JR
18561+ return -EFAULT;
18562+ return 0;
18563+}
18564+
4bf69007
AM
18565+static int do_set_rlimit(struct vx_info *vxi, uint32_t id,
18566+ uint64_t minimum, uint64_t softlimit, uint64_t maximum)
d337f35e 18567+{
4bf69007
AM
18568+ if (!is_valid_vlimit(id))
18569+ return -EINVAL;
d337f35e 18570+
4bf69007
AM
18571+ if (maximum != CRLIM_KEEP)
18572+ __rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
18573+ if (softlimit != CRLIM_KEEP)
18574+ __rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
18575+
18576+ /* clamp soft limit */
18577+ if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
18578+ __rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
d337f35e 18579+
d337f35e
JR
18580+ return 0;
18581+}
18582+
4bf69007
AM
18583+int vc_set_rlimit(struct vx_info *vxi, void __user *data)
18584+{
18585+ struct vcmd_ctx_rlimit_v0 vc_data;
d337f35e 18586+
4bf69007
AM
18587+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18588+ return -EFAULT;
d337f35e 18589+
4bf69007
AM
18590+ return do_set_rlimit(vxi, vc_data.id,
18591+ vc_data.minimum, vc_data.softlimit, vc_data.maximum);
18592+}
d337f35e 18593+
4bf69007 18594+#ifdef CONFIG_IA32_EMULATION
2380c486 18595+
4bf69007
AM
18596+int vc_set_rlimit_x32(struct vx_info *vxi, void __user *data)
18597+{
18598+ struct vcmd_ctx_rlimit_v0_x32 vc_data;
d337f35e 18599+
4bf69007
AM
18600+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18601+ return -EFAULT;
d337f35e 18602+
4bf69007
AM
18603+ return do_set_rlimit(vxi, vc_data.id,
18604+ vc_data.minimum, vc_data.softlimit, vc_data.maximum);
18605+}
d337f35e 18606+
4bf69007
AM
18607+int vc_get_rlimit_x32(struct vx_info *vxi, void __user *data)
18608+{
18609+ struct vcmd_ctx_rlimit_v0_x32 vc_data;
18610+ int ret;
d337f35e 18611+
4bf69007
AM
18612+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18613+ return -EFAULT;
d337f35e 18614+
4bf69007
AM
18615+ ret = do_get_rlimit(vxi, vc_data.id,
18616+ &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
18617+ if (ret)
18618+ return ret;
2380c486 18619+
4bf69007
AM
18620+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18621+ return -EFAULT;
18622+ return 0;
2380c486 18623+}
d337f35e 18624+
4bf69007 18625+#endif /* CONFIG_IA32_EMULATION */
d337f35e
JR
18626+
18627+
4bf69007
AM
18628+int vc_get_rlimit_mask(uint32_t id, void __user *data)
18629+{
18630+ if (copy_to_user(data, &vlimit_mask, sizeof(vlimit_mask)))
18631+ return -EFAULT;
18632+ return 0;
18633+}
d337f35e
JR
18634+
18635+
4bf69007
AM
18636+static inline void vx_reset_hits(struct _vx_limit *limit)
18637+{
18638+ int lim;
d337f35e 18639+
4bf69007
AM
18640+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18641+ atomic_set(&__rlim_lhit(limit, lim), 0);
18642+ }
18643+}
d337f35e 18644+
4bf69007 18645+int vc_reset_hits(struct vx_info *vxi, void __user *data)
d337f35e 18646+{
4bf69007
AM
18647+ vx_reset_hits(&vxi->limit);
18648+ return 0;
d337f35e
JR
18649+}
18650+
4bf69007 18651+static inline void vx_reset_minmax(struct _vx_limit *limit)
d337f35e 18652+{
4bf69007
AM
18653+ rlim_t value;
18654+ int lim;
18655+
18656+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18657+ value = __rlim_get(limit, lim);
18658+ __rlim_rmax(limit, lim) = value;
18659+ __rlim_rmin(limit, lim) = value;
18660+ }
d337f35e
JR
18661+}
18662+
4bf69007 18663+int vc_reset_minmax(struct vx_info *vxi, void __user *data)
d337f35e 18664+{
4bf69007
AM
18665+ vx_reset_minmax(&vxi->limit);
18666+ return 0;
d337f35e
JR
18667+}
18668+
18669+
4bf69007 18670+int vc_rlimit_stat(struct vx_info *vxi, void __user *data)
d337f35e 18671+{
4bf69007
AM
18672+ struct vcmd_rlimit_stat_v0 vc_data;
18673+ struct _vx_limit *limit = &vxi->limit;
18674+ int id;
d337f35e 18675+
4bf69007
AM
18676+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18677+ return -EFAULT;
d337f35e 18678+
4bf69007
AM
18679+ id = vc_data.id;
18680+ if (!is_accounted_vlimit(id))
18681+ return -EINVAL;
2380c486 18682+
4bf69007
AM
18683+ vx_limit_fixup(limit, id);
18684+ vc_data.hits = atomic_read(&__rlim_lhit(limit, id));
18685+ vc_data.value = __rlim_get(limit, id);
18686+ vc_data.minimum = __rlim_rmin(limit, id);
18687+ vc_data.maximum = __rlim_rmax(limit, id);
2380c486 18688+
4bf69007
AM
18689+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18690+ return -EFAULT;
18691+ return 0;
d337f35e
JR
18692+}
18693+
d337f35e 18694+
927ca606 18695+#ifdef CONFIG_MEMCG
1d9ad342
AM
18696+
18697+void dump_sysinfo(struct sysinfo *si)
18698+{
18699+ printk(KERN_INFO "sysinfo: memunit=%u\n"
18700+ "\ttotalram:\t%lu\n"
18701+ "\tfreeram:\t%lu\n"
18702+ "\tsharedram:\t%lu\n"
18703+ "\tbufferram:\t%lu\n"
18704+ "\ttotalswap:\t%lu\n"
18705+ "\tfreeswap:\t%lu\n"
18706+ "\ttotalhigh:\t%lu\n"
18707+ "\tfreehigh:\t%lu\n",
18708+ si->mem_unit,
18709+ si->totalram,
18710+ si->freeram,
18711+ si->sharedram,
18712+ si->bufferram,
18713+ si->totalswap,
18714+ si->freeswap,
18715+ si->totalhigh,
18716+ si->freehigh);
18717+}
18718+
4bf69007 18719+void vx_vsi_meminfo(struct sysinfo *val)
d337f35e 18720+{
4bf69007 18721+ struct mem_cgroup *mcg;
1d9ad342
AM
18722+ unsigned long res_limit, res_usage;
18723+ unsigned shift;
18724+
18725+ if (VXD_CBIT(cvirt, 4))
18726+ dump_sysinfo(val);
d337f35e 18727+
4bf69007
AM
18728+ rcu_read_lock();
18729+ mcg = mem_cgroup_from_task(current);
1d9ad342
AM
18730+ if (VXD_CBIT(cvirt, 5))
18731+ dump_mem_cgroup(mcg);
4bf69007
AM
18732+ rcu_read_unlock();
18733+ if (!mcg)
18734+ goto out;
d337f35e 18735+
927ca606
AM
18736+ res_limit = mem_cgroup_mem_limit_pages(mcg);
18737+ res_usage = mem_cgroup_mem_usage_pages(mcg);
1d9ad342 18738+ shift = val->mem_unit == 1 ? PAGE_SHIFT : 0;
2380c486 18739+
927ca606 18740+ if (res_limit != PAGE_COUNTER_MAX)
1d9ad342
AM
18741+ val->totalram = res_limit << shift;
18742+ val->freeram = val->totalram - (res_usage << shift);
4bf69007
AM
18743+ val->bufferram = 0;
18744+ val->totalhigh = 0;
18745+ val->freehigh = 0;
18746+out:
4bf69007 18747+ return;
d337f35e
JR
18748+}
18749+
4bf69007 18750+void vx_vsi_swapinfo(struct sysinfo *val)
d337f35e 18751+{
4bf69007
AM
18752+#ifdef CONFIG_MEMCG_SWAP
18753+ struct mem_cgroup *mcg;
1d9ad342
AM
18754+ unsigned long res_limit, res_usage, memsw_limit, memsw_usage;
18755+ signed long swap_limit, swap_usage;
18756+ unsigned shift;
18757+
18758+ if (VXD_CBIT(cvirt, 6))
18759+ dump_sysinfo(val);
d337f35e 18760+
4bf69007
AM
18761+ rcu_read_lock();
18762+ mcg = mem_cgroup_from_task(current);
1d9ad342
AM
18763+ if (VXD_CBIT(cvirt, 7))
18764+ dump_mem_cgroup(mcg);
4bf69007
AM
18765+ rcu_read_unlock();
18766+ if (!mcg)
18767+ goto out;
d337f35e 18768+
927ca606 18769+ res_limit = mem_cgroup_mem_limit_pages(mcg);
d337f35e 18770+
4bf69007 18771+ /* memory unlimited */
927ca606 18772+ if (res_limit == PAGE_COUNTER_MAX)
4bf69007 18773+ goto out;
d337f35e 18774+
1d9ad342
AM
18775+ res_usage = mem_cgroup_mem_usage_pages(mcg);
18776+ memsw_limit = mem_cgroup_memsw_limit_pages(mcg);
18777+ memsw_usage = mem_cgroup_memsw_usage_pages(mcg);
18778+ shift = val->mem_unit == 1 ? PAGE_SHIFT : 0;
18779+
4bf69007
AM
18780+ swap_limit = memsw_limit - res_limit;
18781+ /* we have a swap limit? */
927ca606 18782+ if (memsw_limit != PAGE_COUNTER_MAX)
1d9ad342 18783+ val->totalswap = swap_limit << shift;
d337f35e 18784+
4bf69007
AM
18785+ /* calculate swap part */
18786+ swap_usage = (memsw_usage > res_usage) ?
18787+ memsw_usage - res_usage : 0;
18788+
18789+ /* total shown minus usage gives free swap */
18790+ val->freeswap = (swap_usage < swap_limit) ?
1d9ad342 18791+ val->totalswap - (swap_usage << shift) : 0;
4bf69007
AM
18792+out:
18793+#else /* !CONFIG_MEMCG_SWAP */
18794+ val->totalswap = 0;
18795+ val->freeswap = 0;
18796+#endif /* !CONFIG_MEMCG_SWAP */
4bf69007 18797+ return;
d337f35e
JR
18798+}
18799+
4bf69007 18800+long vx_vsi_cached(struct sysinfo *val)
d337f35e 18801+{
4bf69007 18802+ long cache = 0;
927ca606 18803+#ifdef CONFIG_MEMCG_BROKEN
4bf69007 18804+ struct mem_cgroup *mcg;
d337f35e 18805+
1d9ad342
AM
18806+ if (VXD_CBIT(cvirt, 8))
18807+ dump_sysinfo(val);
18808+
4bf69007
AM
18809+ rcu_read_lock();
18810+ mcg = mem_cgroup_from_task(current);
1d9ad342
AM
18811+ if (VXD_CBIT(cvirt, 9))
18812+ dump_mem_cgroup(mcg);
4bf69007
AM
18813+ rcu_read_unlock();
18814+ if (!mcg)
18815+ goto out;
2380c486 18816+
927ca606 18817+ // cache = mem_cgroup_stat_read_cache(mcg);
4bf69007 18818+out:
2380c486 18819+#endif
4bf69007 18820+ return cache;
d337f35e 18821+}
927ca606 18822+#endif /* !CONFIG_MEMCG */
d337f35e 18823+
c2806d43
AM
18824diff -urNp -x '*.orig' linux-4.4/kernel/vserver/limit_init.h linux-4.4/kernel/vserver/limit_init.h
18825--- linux-4.4/kernel/vserver/limit_init.h 1970-01-01 01:00:00.000000000 +0100
18826+++ linux-4.4/kernel/vserver/limit_init.h 2021-02-24 16:56:24.612823601 +0100
4bf69007 18827@@ -0,0 +1,31 @@
d337f35e
JR
18828+
18829+
4bf69007
AM
18830+static inline void vx_info_init_limit(struct _vx_limit *limit)
18831+{
18832+ int lim;
d337f35e 18833+
4bf69007
AM
18834+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18835+ __rlim_soft(limit, lim) = RLIM_INFINITY;
18836+ __rlim_hard(limit, lim) = RLIM_INFINITY;
18837+ __rlim_set(limit, lim, 0);
18838+ atomic_set(&__rlim_lhit(limit, lim), 0);
18839+ __rlim_rmin(limit, lim) = 0;
18840+ __rlim_rmax(limit, lim) = 0;
18841+ }
18842+}
d337f35e 18843+
4bf69007 18844+static inline void vx_info_exit_limit(struct _vx_limit *limit)
d337f35e 18845+{
4bf69007
AM
18846+ rlim_t value;
18847+ int lim;
d337f35e 18848+
4bf69007
AM
18849+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18850+ if ((1 << lim) & VLIM_NOCHECK)
18851+ continue;
18852+ value = __rlim_get(limit, lim);
18853+ vxwprintk_xid(value,
18854+ "!!! limit: %p[%s,%d] = %ld on exit.",
18855+ limit, vlimit_name[lim], lim, (long)value);
18856+ }
18857+}
d337f35e 18858+
c2806d43
AM
18859diff -urNp -x '*.orig' linux-4.4/kernel/vserver/limit_proc.h linux-4.4/kernel/vserver/limit_proc.h
18860--- linux-4.4/kernel/vserver/limit_proc.h 1970-01-01 01:00:00.000000000 +0100
18861+++ linux-4.4/kernel/vserver/limit_proc.h 2021-02-24 16:56:24.612823601 +0100
4bf69007
AM
18862@@ -0,0 +1,57 @@
18863+#ifndef _VX_LIMIT_PROC_H
18864+#define _VX_LIMIT_PROC_H
d337f35e 18865+
4bf69007 18866+#include <linux/vserver/limit_int.h>
d337f35e 18867+
d337f35e 18868+
4bf69007
AM
18869+#define VX_LIMIT_FMT ":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
18870+#define VX_LIMIT_TOP \
18871+ "Limit\t current\t min/max\t\t soft/hard\t\thits\n"
d337f35e 18872+
4bf69007
AM
18873+#define VX_LIMIT_ARG(r) \
18874+ (unsigned long)__rlim_get(limit, r), \
18875+ (unsigned long)__rlim_rmin(limit, r), \
18876+ (unsigned long)__rlim_rmax(limit, r), \
18877+ VX_VLIM(__rlim_soft(limit, r)), \
18878+ VX_VLIM(__rlim_hard(limit, r)), \
18879+ atomic_read(&__rlim_lhit(limit, r))
d337f35e 18880+
4bf69007
AM
18881+static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
18882+{
18883+ vx_limit_fixup(limit, -1);
18884+ return sprintf(buffer, VX_LIMIT_TOP
18885+ "PROC" VX_LIMIT_FMT
18886+ "VM" VX_LIMIT_FMT
18887+ "VML" VX_LIMIT_FMT
18888+ "RSS" VX_LIMIT_FMT
18889+ "ANON" VX_LIMIT_FMT
18890+ "RMAP" VX_LIMIT_FMT
18891+ "FILES" VX_LIMIT_FMT
18892+ "OFD" VX_LIMIT_FMT
18893+ "LOCKS" VX_LIMIT_FMT
18894+ "SOCK" VX_LIMIT_FMT
18895+ "MSGQ" VX_LIMIT_FMT
18896+ "SHM" VX_LIMIT_FMT
18897+ "SEMA" VX_LIMIT_FMT
18898+ "SEMS" VX_LIMIT_FMT
18899+ "DENT" VX_LIMIT_FMT,
18900+ VX_LIMIT_ARG(RLIMIT_NPROC),
18901+ VX_LIMIT_ARG(RLIMIT_AS),
18902+ VX_LIMIT_ARG(RLIMIT_MEMLOCK),
18903+ VX_LIMIT_ARG(RLIMIT_RSS),
18904+ VX_LIMIT_ARG(VLIMIT_ANON),
18905+ VX_LIMIT_ARG(VLIMIT_MAPPED),
18906+ VX_LIMIT_ARG(RLIMIT_NOFILE),
18907+ VX_LIMIT_ARG(VLIMIT_OPENFD),
18908+ VX_LIMIT_ARG(RLIMIT_LOCKS),
18909+ VX_LIMIT_ARG(VLIMIT_NSOCK),
18910+ VX_LIMIT_ARG(RLIMIT_MSGQUEUE),
18911+ VX_LIMIT_ARG(VLIMIT_SHMEM),
18912+ VX_LIMIT_ARG(VLIMIT_SEMARY),
18913+ VX_LIMIT_ARG(VLIMIT_NSEMS),
18914+ VX_LIMIT_ARG(VLIMIT_DENTRY));
d337f35e
JR
18915+}
18916+
4bf69007 18917+#endif /* _VX_LIMIT_PROC_H */
d337f35e 18918+
d337f35e 18919+
c2806d43
AM
18920diff -urNp -x '*.orig' linux-4.4/kernel/vserver/network.c linux-4.4/kernel/vserver/network.c
18921--- linux-4.4/kernel/vserver/network.c 1970-01-01 01:00:00.000000000 +0100
18922+++ linux-4.4/kernel/vserver/network.c 2021-02-24 16:56:24.612823601 +0100
5cb1760b 18923@@ -0,0 +1,1053 @@
d337f35e 18924+/*
4bf69007 18925+ * linux/kernel/vserver/network.c
d337f35e 18926+ *
4bf69007
AM
18927+ * Virtual Server: Network Support
18928+ *
c2806d43 18929+ * Copyright (C) 2003-2007 Herbert P?tzl
4bf69007
AM
18930+ *
18931+ * V0.01 broken out from vcontext V0.05
18932+ * V0.02 cleaned up implementation
18933+ * V0.03 added equiv nx commands
18934+ * V0.04 switch to RCU based hash
18935+ * V0.05 and back to locking again
18936+ * V0.06 changed vcmds to nxi arg
18937+ * V0.07 have __create claim() the nxi
d337f35e 18938+ *
d337f35e 18939+ */
d337f35e 18940+
4bf69007
AM
18941+#include <linux/err.h>
18942+#include <linux/slab.h>
18943+#include <linux/rcupdate.h>
18944+#include <net/ipv6.h>
d337f35e 18945+
4bf69007
AM
18946+#include <linux/vs_network.h>
18947+#include <linux/vs_pid.h>
18948+#include <linux/vserver/network_cmd.h>
d337f35e
JR
18949+
18950+
4bf69007
AM
18951+atomic_t nx_global_ctotal = ATOMIC_INIT(0);
18952+atomic_t nx_global_cactive = ATOMIC_INIT(0);
d337f35e 18953+
4bf69007
AM
18954+static struct kmem_cache *nx_addr_v4_cachep = NULL;
18955+static struct kmem_cache *nx_addr_v6_cachep = NULL;
d337f35e 18956+
d337f35e 18957+
4bf69007 18958+static int __init init_network(void)
d337f35e 18959+{
4bf69007
AM
18960+ nx_addr_v4_cachep = kmem_cache_create("nx_v4_addr_cache",
18961+ sizeof(struct nx_addr_v4), 0,
18962+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
18963+ nx_addr_v6_cachep = kmem_cache_create("nx_v6_addr_cache",
18964+ sizeof(struct nx_addr_v6), 0,
18965+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
d337f35e
JR
18966+ return 0;
18967+}
18968+
18969+
4bf69007 18970+/* __alloc_nx_addr_v4() */
d337f35e 18971+
4bf69007 18972+static inline struct nx_addr_v4 *__alloc_nx_addr_v4(void)
d337f35e 18973+{
4bf69007
AM
18974+ struct nx_addr_v4 *nxa = kmem_cache_alloc(
18975+ nx_addr_v4_cachep, GFP_KERNEL);
92598135 18976+
4bf69007
AM
18977+ if (!IS_ERR(nxa))
18978+ memset(nxa, 0, sizeof(*nxa));
18979+ return nxa;
d337f35e
JR
18980+}
18981+
4bf69007 18982+/* __dealloc_nx_addr_v4() */
d337f35e 18983+
4bf69007
AM
18984+static inline void __dealloc_nx_addr_v4(struct nx_addr_v4 *nxa)
18985+{
18986+ kmem_cache_free(nx_addr_v4_cachep, nxa);
18987+}
d337f35e 18988+
4bf69007 18989+/* __dealloc_nx_addr_v4_all() */
d337f35e 18990+
4bf69007 18991+static inline void __dealloc_nx_addr_v4_all(struct nx_addr_v4 *nxa)
d337f35e 18992+{
4bf69007
AM
18993+ while (nxa) {
18994+ struct nx_addr_v4 *next = nxa->next;
d337f35e 18995+
4bf69007
AM
18996+ __dealloc_nx_addr_v4(nxa);
18997+ nxa = next;
18998+ }
18999+}
d337f35e 19000+
d337f35e 19001+
4bf69007 19002+#ifdef CONFIG_IPV6
d337f35e 19003+
4bf69007 19004+/* __alloc_nx_addr_v6() */
d337f35e 19005+
4bf69007
AM
19006+static inline struct nx_addr_v6 *__alloc_nx_addr_v6(void)
19007+{
19008+ struct nx_addr_v6 *nxa = kmem_cache_alloc(
19009+ nx_addr_v6_cachep, GFP_KERNEL);
d337f35e 19010+
4bf69007
AM
19011+ if (!IS_ERR(nxa))
19012+ memset(nxa, 0, sizeof(*nxa));
19013+ return nxa;
d337f35e
JR
19014+}
19015+
4bf69007
AM
19016+/* __dealloc_nx_addr_v6() */
19017+
19018+static inline void __dealloc_nx_addr_v6(struct nx_addr_v6 *nxa)
d337f35e 19019+{
4bf69007
AM
19020+ kmem_cache_free(nx_addr_v6_cachep, nxa);
19021+}
d337f35e 19022+
4bf69007 19023+/* __dealloc_nx_addr_v6_all() */
d337f35e 19024+
4bf69007
AM
19025+static inline void __dealloc_nx_addr_v6_all(struct nx_addr_v6 *nxa)
19026+{
19027+ while (nxa) {
19028+ struct nx_addr_v6 *next = nxa->next;
d337f35e 19029+
4bf69007
AM
19030+ __dealloc_nx_addr_v6(nxa);
19031+ nxa = next;
19032+ }
19033+}
d337f35e 19034+
4bf69007 19035+#endif /* CONFIG_IPV6 */
d337f35e 19036+
4bf69007 19037+/* __alloc_nx_info()
d337f35e 19038+
4bf69007
AM
19039+ * allocate an initialized nx_info struct
19040+ * doesn't make it visible (hash) */
d337f35e 19041+
61333608 19042+static struct nx_info *__alloc_nx_info(vnid_t nid)
d337f35e 19043+{
4bf69007 19044+ struct nx_info *new = NULL;
d337f35e 19045+
4bf69007 19046+ vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
d337f35e 19047+
4bf69007
AM
19048+ /* would this benefit from a slab cache? */
19049+ new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
19050+ if (!new)
19051+ return 0;
d337f35e 19052+
4bf69007
AM
19053+ memset(new, 0, sizeof(struct nx_info));
19054+ new->nx_id = nid;
19055+ INIT_HLIST_NODE(&new->nx_hlist);
19056+ atomic_set(&new->nx_usecnt, 0);
19057+ atomic_set(&new->nx_tasks, 0);
19058+ spin_lock_init(&new->addr_lock);
19059+ new->nx_state = 0;
d337f35e 19060+
4bf69007 19061+ new->nx_flags = NXF_INIT_SET;
d337f35e 19062+
4bf69007 19063+ /* rest of init goes here */
d337f35e 19064+
4bf69007
AM
19065+ new->v4_lback.s_addr = htonl(INADDR_LOOPBACK);
19066+ new->v4_bcast.s_addr = htonl(INADDR_BROADCAST);
19067+
19068+ vxdprintk(VXD_CBIT(nid, 0),
19069+ "alloc_nx_info(%d) = %p", nid, new);
19070+ atomic_inc(&nx_global_ctotal);
19071+ return new;
d337f35e
JR
19072+}
19073+
4bf69007 19074+/* __dealloc_nx_info()
d337f35e 19075+
4bf69007 19076+ * final disposal of nx_info */
d337f35e 19077+
4bf69007
AM
19078+static void __dealloc_nx_info(struct nx_info *nxi)
19079+{
19080+ vxdprintk(VXD_CBIT(nid, 0),
19081+ "dealloc_nx_info(%p)", nxi);
d337f35e 19082+
4bf69007
AM
19083+ nxi->nx_hlist.next = LIST_POISON1;
19084+ nxi->nx_id = -1;
d337f35e 19085+
4bf69007
AM
19086+ BUG_ON(atomic_read(&nxi->nx_usecnt));
19087+ BUG_ON(atomic_read(&nxi->nx_tasks));
19088+
19089+ __dealloc_nx_addr_v4_all(nxi->v4.next);
19090+#ifdef CONFIG_IPV6
19091+ __dealloc_nx_addr_v6_all(nxi->v6.next);
19092+#endif
19093+
19094+ nxi->nx_state |= NXS_RELEASED;
19095+ kfree(nxi);
19096+ atomic_dec(&nx_global_ctotal);
d337f35e
JR
19097+}
19098+
4bf69007
AM
19099+static void __shutdown_nx_info(struct nx_info *nxi)
19100+{
19101+ nxi->nx_state |= NXS_SHUTDOWN;
19102+ vs_net_change(nxi, VSC_NETDOWN);
19103+}
d337f35e 19104+
4bf69007 19105+/* exported stuff */
d337f35e 19106+
4bf69007
AM
19107+void free_nx_info(struct nx_info *nxi)
19108+{
19109+ /* context shutdown is mandatory */
19110+ BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
d337f35e 19111+
4bf69007
AM
19112+ /* context must not be hashed */
19113+ BUG_ON(nxi->nx_state & NXS_HASHED);
d337f35e 19114+
4bf69007
AM
19115+ BUG_ON(atomic_read(&nxi->nx_usecnt));
19116+ BUG_ON(atomic_read(&nxi->nx_tasks));
d337f35e 19117+
4bf69007
AM
19118+ __dealloc_nx_info(nxi);
19119+}
d337f35e 19120+
d337f35e 19121+
4bf69007
AM
19122+void __nx_set_lback(struct nx_info *nxi)
19123+{
19124+ int nid = nxi->nx_id;
19125+ __be32 lback = htonl(INADDR_LOOPBACK ^ ((nid & 0xFFFF) << 8));
d337f35e 19126+
4bf69007
AM
19127+ nxi->v4_lback.s_addr = lback;
19128+}
d337f35e 19129+
4bf69007
AM
19130+extern int __nx_inet_add_lback(__be32 addr);
19131+extern int __nx_inet_del_lback(__be32 addr);
d337f35e
JR
19132+
19133+
4bf69007 19134+/* hash table for nx_info hash */
d337f35e 19135+
4bf69007 19136+#define NX_HASH_SIZE 13
d337f35e 19137+
4bf69007
AM
19138+struct hlist_head nx_info_hash[NX_HASH_SIZE];
19139+
19140+static DEFINE_SPINLOCK(nx_info_hash_lock);
19141+
19142+
61333608 19143+static inline unsigned int __hashval(vnid_t nid)
d337f35e 19144+{
4bf69007 19145+ return (nid % NX_HASH_SIZE);
d337f35e
JR
19146+}
19147+
d337f35e 19148+
d337f35e 19149+
4bf69007 19150+/* __hash_nx_info()
d337f35e 19151+
4bf69007
AM
19152+ * add the nxi to the global hash table
19153+ * requires the hash_lock to be held */
19154+
19155+static inline void __hash_nx_info(struct nx_info *nxi)
d337f35e 19156+{
4bf69007 19157+ struct hlist_head *head;
d337f35e 19158+
4bf69007
AM
19159+ vxd_assert_lock(&nx_info_hash_lock);
19160+ vxdprintk(VXD_CBIT(nid, 4),
19161+ "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
d337f35e 19162+
4bf69007
AM
19163+ /* context must not be hashed */
19164+ BUG_ON(nx_info_state(nxi, NXS_HASHED));
d337f35e 19165+
4bf69007
AM
19166+ nxi->nx_state |= NXS_HASHED;
19167+ head = &nx_info_hash[__hashval(nxi->nx_id)];
19168+ hlist_add_head(&nxi->nx_hlist, head);
19169+ atomic_inc(&nx_global_cactive);
19170+}
d337f35e 19171+
4bf69007 19172+/* __unhash_nx_info()
d337f35e 19173+
4bf69007
AM
19174+ * remove the nxi from the global hash table
19175+ * requires the hash_lock to be held */
d337f35e 19176+
4bf69007
AM
19177+static inline void __unhash_nx_info(struct nx_info *nxi)
19178+{
19179+ vxd_assert_lock(&nx_info_hash_lock);
19180+ vxdprintk(VXD_CBIT(nid, 4),
19181+ "__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
19182+ atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
d337f35e 19183+
4bf69007
AM
19184+ /* context must be hashed */
19185+ BUG_ON(!nx_info_state(nxi, NXS_HASHED));
19186+ /* but without tasks */
19187+ BUG_ON(atomic_read(&nxi->nx_tasks));
d337f35e 19188+
4bf69007
AM
19189+ nxi->nx_state &= ~NXS_HASHED;
19190+ hlist_del(&nxi->nx_hlist);
19191+ atomic_dec(&nx_global_cactive);
d337f35e
JR
19192+}
19193+
d337f35e 19194+
4bf69007 19195+/* __lookup_nx_info()
d337f35e 19196+
4bf69007
AM
19197+ * requires the hash_lock to be held
19198+ * doesn't increment the nx_refcnt */
d337f35e 19199+
61333608 19200+static inline struct nx_info *__lookup_nx_info(vnid_t nid)
d337f35e 19201+{
4bf69007
AM
19202+ struct hlist_head *head = &nx_info_hash[__hashval(nid)];
19203+ struct hlist_node *pos;
19204+ struct nx_info *nxi;
d337f35e 19205+
4bf69007
AM
19206+ vxd_assert_lock(&nx_info_hash_lock);
19207+ hlist_for_each(pos, head) {
19208+ nxi = hlist_entry(pos, struct nx_info, nx_hlist);
19209+
19210+ if (nxi->nx_id == nid)
19211+ goto found;
d337f35e 19212+ }
4bf69007
AM
19213+ nxi = NULL;
19214+found:
19215+ vxdprintk(VXD_CBIT(nid, 0),
19216+ "__lookup_nx_info(#%u): %p[#%u]",
19217+ nid, nxi, nxi ? nxi->nx_id : 0);
19218+ return nxi;
d337f35e
JR
19219+}
19220+
19221+
4bf69007 19222+/* __create_nx_info()
d337f35e 19223+
4bf69007
AM
19224+ * create the requested context
19225+ * get(), claim() and hash it */
d337f35e 19226+
4bf69007
AM
19227+static struct nx_info *__create_nx_info(int id)
19228+{
19229+ struct nx_info *new, *nxi = NULL;
d337f35e 19230+
4bf69007 19231+ vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
d337f35e 19232+
4bf69007
AM
19233+ if (!(new = __alloc_nx_info(id)))
19234+ return ERR_PTR(-ENOMEM);
d337f35e 19235+
4bf69007
AM
19236+ /* required to make dynamic xids unique */
19237+ spin_lock(&nx_info_hash_lock);
d337f35e 19238+
4bf69007
AM
19239+ /* static context requested */
19240+ if ((nxi = __lookup_nx_info(id))) {
19241+ vxdprintk(VXD_CBIT(nid, 0),
19242+ "create_nx_info(%d) = %p (already there)", id, nxi);
19243+ if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
19244+ nxi = ERR_PTR(-EBUSY);
19245+ else
19246+ nxi = ERR_PTR(-EEXIST);
19247+ goto out_unlock;
19248+ }
19249+ /* new context */
19250+ vxdprintk(VXD_CBIT(nid, 0),
19251+ "create_nx_info(%d) = %p (new)", id, new);
19252+ claim_nx_info(new, NULL);
19253+ __nx_set_lback(new);
19254+ __hash_nx_info(get_nx_info(new));
19255+ nxi = new, new = NULL;
d337f35e 19256+
4bf69007
AM
19257+out_unlock:
19258+ spin_unlock(&nx_info_hash_lock);
19259+ if (new)
19260+ __dealloc_nx_info(new);
19261+ return nxi;
19262+}
d337f35e
JR
19263+
19264+
d337f35e 19265+
4bf69007 19266+/* exported stuff */
d337f35e 19267+
d337f35e 19268+
4bf69007
AM
19269+void unhash_nx_info(struct nx_info *nxi)
19270+{
19271+ __shutdown_nx_info(nxi);
19272+ spin_lock(&nx_info_hash_lock);
19273+ __unhash_nx_info(nxi);
19274+ spin_unlock(&nx_info_hash_lock);
d337f35e
JR
19275+}
19276+
4bf69007 19277+/* lookup_nx_info()
d337f35e 19278+
4bf69007
AM
19279+ * search for a nx_info and get() it
19280+ * negative id means current */
d337f35e 19281+
4bf69007 19282+struct nx_info *lookup_nx_info(int id)
d337f35e 19283+{
4bf69007 19284+ struct nx_info *nxi = NULL;
d337f35e 19285+
4bf69007
AM
19286+ if (id < 0) {
19287+ nxi = get_nx_info(current_nx_info());
19288+ } else if (id > 1) {
19289+ spin_lock(&nx_info_hash_lock);
19290+ nxi = get_nx_info(__lookup_nx_info(id));
19291+ spin_unlock(&nx_info_hash_lock);
d337f35e 19292+ }
4bf69007
AM
19293+ return nxi;
19294+}
d337f35e 19295+
4bf69007 19296+/* nid_is_hashed()
d337f35e 19297+
4bf69007
AM
19298+ * verify that nid is still hashed */
19299+
61333608 19300+int nid_is_hashed(vnid_t nid)
4bf69007
AM
19301+{
19302+ int hashed;
19303+
19304+ spin_lock(&nx_info_hash_lock);
19305+ hashed = (__lookup_nx_info(nid) != NULL);
19306+ spin_unlock(&nx_info_hash_lock);
19307+ return hashed;
d337f35e
JR
19308+}
19309+
19310+
4bf69007 19311+#ifdef CONFIG_PROC_FS
d337f35e 19312+
4bf69007
AM
19313+/* get_nid_list()
19314+
19315+ * get a subset of hashed nids for proc
19316+ * assumes size is at least one */
19317+
19318+int get_nid_list(int index, unsigned int *nids, int size)
d337f35e 19319+{
4bf69007 19320+ int hindex, nr_nids = 0;
d337f35e 19321+
4bf69007
AM
19322+ /* only show current and children */
19323+ if (!nx_check(0, VS_ADMIN | VS_WATCH)) {
19324+ if (index > 0)
19325+ return 0;
19326+ nids[nr_nids] = nx_current_nid();
19327+ return 1;
19328+ }
d337f35e 19329+
4bf69007
AM
19330+ for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
19331+ struct hlist_head *head = &nx_info_hash[hindex];
19332+ struct hlist_node *pos;
d337f35e 19333+
4bf69007
AM
19334+ spin_lock(&nx_info_hash_lock);
19335+ hlist_for_each(pos, head) {
19336+ struct nx_info *nxi;
19337+
19338+ if (--index > 0)
19339+ continue;
19340+
19341+ nxi = hlist_entry(pos, struct nx_info, nx_hlist);
19342+ nids[nr_nids] = nxi->nx_id;
19343+ if (++nr_nids >= size) {
19344+ spin_unlock(&nx_info_hash_lock);
d337f35e 19345+ goto out;
4bf69007 19346+ }
d337f35e 19347+ }
4bf69007
AM
19348+ /* keep the lock time short */
19349+ spin_unlock(&nx_info_hash_lock);
d337f35e
JR
19350+ }
19351+out:
4bf69007 19352+ return nr_nids;
d337f35e 19353+}
4bf69007 19354+#endif
d337f35e 19355+
4bf69007
AM
19356+
19357+/*
19358+ * migrate task to new network
19359+ * gets nxi, puts old_nxi on change
19360+ */
19361+
19362+int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
2380c486 19363+{
4bf69007
AM
19364+ struct nx_info *old_nxi;
19365+ int ret = 0;
2380c486 19366+
4bf69007
AM
19367+ if (!p || !nxi)
19368+ BUG();
d337f35e 19369+
4bf69007
AM
19370+ vxdprintk(VXD_CBIT(nid, 5),
19371+ "nx_migrate_task(%p,%p[#%d.%d.%d])",
19372+ p, nxi, nxi->nx_id,
19373+ atomic_read(&nxi->nx_usecnt),
19374+ atomic_read(&nxi->nx_tasks));
d337f35e 19375+
4bf69007
AM
19376+ if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
19377+ !nx_info_flags(nxi, NXF_STATE_SETUP, 0))
19378+ return -EACCES;
d337f35e 19379+
4bf69007
AM
19380+ if (nx_info_state(nxi, NXS_SHUTDOWN))
19381+ return -EFAULT;
d337f35e 19382+
4bf69007
AM
19383+ /* maybe disallow this completely? */
19384+ old_nxi = task_get_nx_info(p);
19385+ if (old_nxi == nxi)
19386+ goto out;
d337f35e 19387+
4bf69007
AM
19388+ task_lock(p);
19389+ if (old_nxi)
19390+ clr_nx_info(&p->nx_info);
19391+ claim_nx_info(nxi, p);
19392+ set_nx_info(&p->nx_info, nxi);
19393+ p->nid = nxi->nx_id;
19394+ task_unlock(p);
d337f35e 19395+
4bf69007
AM
19396+ vxdprintk(VXD_CBIT(nid, 5),
19397+ "moved task %p into nxi:%p[#%d]",
19398+ p, nxi, nxi->nx_id);
d337f35e 19399+
4bf69007
AM
19400+ if (old_nxi)
19401+ release_nx_info(old_nxi, p);
19402+ ret = 0;
19403+out:
19404+ put_nx_info(old_nxi);
19405+ return ret;
19406+}
d337f35e 19407+
d337f35e 19408+
4bf69007
AM
19409+void nx_set_persistent(struct nx_info *nxi)
19410+{
19411+ vxdprintk(VXD_CBIT(nid, 6),
19412+ "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
d337f35e 19413+
4bf69007
AM
19414+ get_nx_info(nxi);
19415+ claim_nx_info(nxi, NULL);
d337f35e
JR
19416+}
19417+
4bf69007 19418+void nx_clear_persistent(struct nx_info *nxi)
2380c486 19419+{
4bf69007
AM
19420+ vxdprintk(VXD_CBIT(nid, 6),
19421+ "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
2380c486 19422+
4bf69007
AM
19423+ release_nx_info(nxi, NULL);
19424+ put_nx_info(nxi);
2380c486 19425+}
d337f35e 19426+
4bf69007
AM
19427+void nx_update_persistent(struct nx_info *nxi)
19428+{
19429+ if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
19430+ nx_set_persistent(nxi);
19431+ else
19432+ nx_clear_persistent(nxi);
19433+}
d337f35e 19434+
4bf69007
AM
19435+/* vserver syscall commands below here */
19436+
19437+/* taks nid and nx_info functions */
d337f35e 19438+
4bf69007 19439+#include <asm/uaccess.h>
d337f35e
JR
19440+
19441+
4bf69007 19442+int vc_task_nid(uint32_t id)
d337f35e 19443+{
61333608 19444+ vnid_t nid;
d337f35e 19445+
4bf69007
AM
19446+ if (id) {
19447+ struct task_struct *tsk;
d337f35e 19448+
4bf69007
AM
19449+ rcu_read_lock();
19450+ tsk = find_task_by_real_pid(id);
19451+ nid = (tsk) ? tsk->nid : -ESRCH;
19452+ rcu_read_unlock();
19453+ } else
19454+ nid = nx_current_nid();
19455+ return nid;
d337f35e
JR
19456+}
19457+
19458+
4bf69007
AM
19459+int vc_nx_info(struct nx_info *nxi, void __user *data)
19460+{
19461+ struct vcmd_nx_info_v0 vc_data;
d337f35e 19462+
4bf69007 19463+ vc_data.nid = nxi->nx_id;
d337f35e 19464+
4bf69007
AM
19465+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19466+ return -EFAULT;
19467+ return 0;
19468+}
d337f35e 19469+
d337f35e 19470+
4bf69007 19471+/* network functions */
d337f35e 19472+
4bf69007
AM
19473+int vc_net_create(uint32_t nid, void __user *data)
19474+{
19475+ struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
19476+ struct nx_info *new_nxi;
19477+ int ret;
d337f35e 19478+
4bf69007
AM
19479+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19480+ return -EFAULT;
d337f35e 19481+
4bf69007
AM
19482+ if ((nid > MAX_S_CONTEXT) || (nid < 2))
19483+ return -EINVAL;
d337f35e 19484+
4bf69007
AM
19485+ new_nxi = __create_nx_info(nid);
19486+ if (IS_ERR(new_nxi))
19487+ return PTR_ERR(new_nxi);
d337f35e 19488+
4bf69007
AM
19489+ /* initial flags */
19490+ new_nxi->nx_flags = vc_data.flagword;
d337f35e 19491+
4bf69007
AM
19492+ ret = -ENOEXEC;
19493+ if (vs_net_change(new_nxi, VSC_NETUP))
19494+ goto out;
d337f35e 19495+
4bf69007
AM
19496+ ret = nx_migrate_task(current, new_nxi);
19497+ if (ret)
d337f35e
JR
19498+ goto out;
19499+
4bf69007
AM
19500+ /* return context id on success */
19501+ ret = new_nxi->nx_id;
d337f35e 19502+
4bf69007
AM
19503+ /* get a reference for persistent contexts */
19504+ if ((vc_data.flagword & NXF_PERSISTENT))
19505+ nx_set_persistent(new_nxi);
d337f35e 19506+out:
4bf69007
AM
19507+ release_nx_info(new_nxi, NULL);
19508+ put_nx_info(new_nxi);
19509+ return ret;
d337f35e
JR
19510+}
19511+
d337f35e 19512+
4bf69007
AM
19513+int vc_net_migrate(struct nx_info *nxi, void __user *data)
19514+{
19515+ return nx_migrate_task(current, nxi);
19516+}
d337f35e 19517+
2380c486 19518+
4bf69007
AM
19519+static inline
19520+struct nx_addr_v4 *__find_v4_addr(struct nx_info *nxi,
19521+ __be32 ip, __be32 ip2, __be32 mask, uint16_t type, uint16_t flags,
19522+ struct nx_addr_v4 **prev)
d337f35e 19523+{
4bf69007
AM
19524+ struct nx_addr_v4 *nxa = &nxi->v4;
19525+
19526+ for (; nxa; nxa = nxa->next) {
19527+ if ((nxa->ip[0].s_addr == ip) &&
19528+ (nxa->ip[1].s_addr == ip2) &&
19529+ (nxa->mask.s_addr == mask) &&
19530+ (nxa->type == type) &&
19531+ (nxa->flags == flags))
19532+ return nxa;
19533+
19534+ /* save previous entry */
19535+ if (prev)
19536+ *prev = nxa;
19537+ }
19538+ return NULL;
2380c486
JR
19539+}
19540+
4bf69007
AM
19541+int do_add_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
19542+ uint16_t type, uint16_t flags)
d337f35e 19543+{
4bf69007
AM
19544+ struct nx_addr_v4 *nxa = NULL;
19545+ struct nx_addr_v4 *new = __alloc_nx_addr_v4();
5cb1760b 19546+ unsigned long irqflags;
4bf69007 19547+ int ret = -EEXIST;
d337f35e 19548+
4bf69007
AM
19549+ if (IS_ERR(new))
19550+ return PTR_ERR(new);
d337f35e 19551+
5cb1760b 19552+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19553+ if (__find_v4_addr(nxi, ip, ip2, mask, type, flags, &nxa))
19554+ goto out_unlock;
2380c486 19555+
4bf69007
AM
19556+ if (NX_IPV4(nxi)) {
19557+ nxa->next = new;
19558+ nxa = new;
19559+ new = NULL;
19560+
19561+ /* remove single ip for ip list */
19562+ nxi->nx_flags &= ~NXF_SINGLE_IP;
19563+ }
19564+
19565+ nxa->ip[0].s_addr = ip;
19566+ nxa->ip[1].s_addr = ip2;
19567+ nxa->mask.s_addr = mask;
19568+ nxa->type = type;
19569+ nxa->flags = flags;
19570+ ret = 0;
19571+out_unlock:
5cb1760b 19572+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19573+ if (new)
19574+ __dealloc_nx_addr_v4(new);
19575+ return ret;
d337f35e
JR
19576+}
19577+
4bf69007
AM
19578+int do_remove_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
19579+ uint16_t type, uint16_t flags)
2380c486 19580+{
4bf69007
AM
19581+ struct nx_addr_v4 *nxa = NULL;
19582+ struct nx_addr_v4 *old = NULL;
5cb1760b 19583+ unsigned long irqflags;
4bf69007 19584+ int ret = 0;
2380c486 19585+
5cb1760b 19586+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19587+ switch (type) {
19588+ case NXA_TYPE_ADDR:
19589+ old = __find_v4_addr(nxi, ip, ip2, mask, type, flags, &nxa);
19590+ if (old) {
19591+ if (nxa) {
19592+ nxa->next = old->next;
19593+ old->next = NULL;
19594+ } else {
19595+ if (old->next) {
19596+ nxa = old;
19597+ old = old->next;
19598+ *nxa = *old;
19599+ old->next = NULL;
19600+ } else {
19601+ memset(old, 0, sizeof(*old));
19602+ old = NULL;
19603+ }
19604+ }
19605+ } else
19606+ ret = -ESRCH;
19607+ break;
2380c486 19608+
4bf69007
AM
19609+ case NXA_TYPE_ANY:
19610+ nxa = &nxi->v4;
19611+ old = nxa->next;
19612+ memset(nxa, 0, sizeof(*nxa));
19613+ break;
19614+
19615+ default:
19616+ ret = -EINVAL;
19617+ }
5cb1760b 19618+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19619+ __dealloc_nx_addr_v4_all(old);
19620+ return ret;
2380c486
JR
19621+}
19622+
4bf69007
AM
19623+
19624+int vc_net_add(struct nx_info *nxi, void __user *data)
2380c486 19625+{
4bf69007
AM
19626+ struct vcmd_net_addr_v0 vc_data;
19627+ int index, ret = 0;
2380c486 19628+
4bf69007 19629+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486
JR
19630+ return -EFAULT;
19631+
4bf69007
AM
19632+ switch (vc_data.type) {
19633+ case NXA_TYPE_IPV4:
19634+ if ((vc_data.count < 1) || (vc_data.count > 4))
19635+ return -EINVAL;
adc1caaa 19636+
4bf69007
AM
19637+ index = 0;
19638+ while (index < vc_data.count) {
19639+ ret = do_add_v4_addr(nxi, vc_data.ip[index].s_addr, 0,
19640+ vc_data.mask[index].s_addr, NXA_TYPE_ADDR, 0);
19641+ if (ret)
19642+ return ret;
19643+ index++;
19644+ }
19645+ ret = index;
19646+ break;
2380c486 19647+
4bf69007
AM
19648+ case NXA_TYPE_IPV4|NXA_MOD_BCAST:
19649+ nxi->v4_bcast = vc_data.ip[0];
19650+ ret = 1;
19651+ break;
2380c486 19652+
4bf69007
AM
19653+ case NXA_TYPE_IPV4|NXA_MOD_LBACK:
19654+ nxi->v4_lback = vc_data.ip[0];
19655+ ret = 1;
19656+ break;
19657+
19658+ default:
19659+ ret = -EINVAL;
19660+ break;
19661+ }
19662+ return ret;
19663+}
19664+
19665+int vc_net_remove(struct nx_info *nxi, void __user *data)
19666+{
19667+ struct vcmd_net_addr_v0 vc_data;
19668+
19669+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486 19670+ return -EFAULT;
4bf69007
AM
19671+
19672+ switch (vc_data.type) {
19673+ case NXA_TYPE_ANY:
19674+ return do_remove_v4_addr(nxi, 0, 0, 0, vc_data.type, 0);
19675+ default:
19676+ return -EINVAL;
19677+ }
2380c486
JR
19678+ return 0;
19679+}
19680+
d337f35e 19681+
4bf69007 19682+int vc_net_add_ipv4_v1(struct nx_info *nxi, void __user *data)
d337f35e 19683+{
4bf69007
AM
19684+ struct vcmd_net_addr_ipv4_v1 vc_data;
19685+
19686+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19687+ return -EFAULT;
19688+
19689+ switch (vc_data.type) {
19690+ case NXA_TYPE_ADDR:
19691+ case NXA_TYPE_MASK:
19692+ return do_add_v4_addr(nxi, vc_data.ip.s_addr, 0,
19693+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19694+
19695+ case NXA_TYPE_ADDR | NXA_MOD_BCAST:
19696+ nxi->v4_bcast = vc_data.ip;
19697+ break;
19698+
19699+ case NXA_TYPE_ADDR | NXA_MOD_LBACK:
19700+ nxi->v4_lback = vc_data.ip;
19701+ break;
19702+
19703+ default:
19704+ return -EINVAL;
19705+ }
19706+ return 0;
d337f35e
JR
19707+}
19708+
4bf69007 19709+int vc_net_add_ipv4(struct nx_info *nxi, void __user *data)
d337f35e 19710+{
4bf69007 19711+ struct vcmd_net_addr_ipv4_v2 vc_data;
d337f35e 19712+
4bf69007
AM
19713+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19714+ return -EFAULT;
19715+
19716+ switch (vc_data.type) {
19717+ case NXA_TYPE_ADDR:
19718+ case NXA_TYPE_MASK:
19719+ case NXA_TYPE_RANGE:
19720+ return do_add_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
19721+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19722+
19723+ case NXA_TYPE_ADDR | NXA_MOD_BCAST:
19724+ nxi->v4_bcast = vc_data.ip;
19725+ break;
19726+
19727+ case NXA_TYPE_ADDR | NXA_MOD_LBACK:
19728+ nxi->v4_lback = vc_data.ip;
19729+ break;
19730+
19731+ default:
19732+ return -EINVAL;
19733+ }
19734+ return 0;
d337f35e
JR
19735+}
19736+
4bf69007 19737+int vc_net_rem_ipv4_v1(struct nx_info *nxi, void __user *data)
d337f35e 19738+{
4bf69007
AM
19739+ struct vcmd_net_addr_ipv4_v1 vc_data;
19740+
19741+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19742+ return -EFAULT;
19743+
19744+ return do_remove_v4_addr(nxi, vc_data.ip.s_addr, 0,
19745+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
d337f35e
JR
19746+}
19747+
4bf69007 19748+int vc_net_rem_ipv4(struct nx_info *nxi, void __user *data)
d337f35e 19749+{
4bf69007
AM
19750+ struct vcmd_net_addr_ipv4_v2 vc_data;
19751+
19752+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19753+ return -EFAULT;
19754+
19755+ return do_remove_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
19756+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
d337f35e 19757+}
d337f35e 19758+
4bf69007 19759+#ifdef CONFIG_IPV6
d337f35e
JR
19760+
19761+static inline
4bf69007
AM
19762+struct nx_addr_v6 *__find_v6_addr(struct nx_info *nxi,
19763+ struct in6_addr *ip, struct in6_addr *mask,
19764+ uint32_t prefix, uint16_t type, uint16_t flags,
19765+ struct nx_addr_v6 **prev)
d337f35e 19766+{
4bf69007 19767+ struct nx_addr_v6 *nxa = &nxi->v6;
d337f35e 19768+
4bf69007
AM
19769+ for (; nxa; nxa = nxa->next) {
19770+ if (ipv6_addr_equal(&nxa->ip, ip) &&
19771+ ipv6_addr_equal(&nxa->mask, mask) &&
19772+ (nxa->prefix == prefix) &&
19773+ (nxa->type == type) &&
19774+ (nxa->flags == flags))
19775+ return nxa;
19776+
19777+ /* save previous entry */
19778+ if (prev)
19779+ *prev = nxa;
19780+ }
19781+ return NULL;
d337f35e
JR
19782+}
19783+
d337f35e 19784+
4bf69007
AM
19785+int do_add_v6_addr(struct nx_info *nxi,
19786+ struct in6_addr *ip, struct in6_addr *mask,
19787+ uint32_t prefix, uint16_t type, uint16_t flags)
19788+{
19789+ struct nx_addr_v6 *nxa = NULL;
19790+ struct nx_addr_v6 *new = __alloc_nx_addr_v6();
5cb1760b 19791+ unsigned long irqflags;
4bf69007 19792+ int ret = -EEXIST;
d337f35e 19793+
4bf69007
AM
19794+ if (IS_ERR(new))
19795+ return PTR_ERR(new);
d337f35e 19796+
5cb1760b 19797+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19798+ if (__find_v6_addr(nxi, ip, mask, prefix, type, flags, &nxa))
19799+ goto out_unlock;
d337f35e 19800+
4bf69007
AM
19801+ if (NX_IPV6(nxi)) {
19802+ nxa->next = new;
19803+ nxa = new;
19804+ new = NULL;
19805+ }
d337f35e 19806+
4bf69007
AM
19807+ nxa->ip = *ip;
19808+ nxa->mask = *mask;
19809+ nxa->prefix = prefix;
19810+ nxa->type = type;
19811+ nxa->flags = flags;
19812+ ret = 0;
19813+out_unlock:
5cb1760b 19814+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19815+ if (new)
19816+ __dealloc_nx_addr_v6(new);
19817+ return ret;
19818+}
d337f35e 19819+
4bf69007
AM
19820+int do_remove_v6_addr(struct nx_info *nxi,
19821+ struct in6_addr *ip, struct in6_addr *mask,
19822+ uint32_t prefix, uint16_t type, uint16_t flags)
d337f35e 19823+{
4bf69007
AM
19824+ struct nx_addr_v6 *nxa = NULL;
19825+ struct nx_addr_v6 *old = NULL;
5cb1760b 19826+ unsigned long irqflags;
4bf69007 19827+ int ret = 0;
d337f35e 19828+
5cb1760b 19829+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19830+ switch (type) {
19831+ case NXA_TYPE_ADDR:
19832+ old = __find_v6_addr(nxi, ip, mask, prefix, type, flags, &nxa);
19833+ if (old) {
19834+ if (nxa) {
19835+ nxa->next = old->next;
19836+ old->next = NULL;
19837+ } else {
19838+ if (old->next) {
19839+ nxa = old;
19840+ old = old->next;
19841+ *nxa = *old;
19842+ old->next = NULL;
19843+ } else {
19844+ memset(old, 0, sizeof(*old));
19845+ old = NULL;
19846+ }
19847+ }
19848+ } else
19849+ ret = -ESRCH;
19850+ break;
d337f35e 19851+
4bf69007
AM
19852+ case NXA_TYPE_ANY:
19853+ nxa = &nxi->v6;
19854+ old = nxa->next;
19855+ memset(nxa, 0, sizeof(*nxa));
d337f35e
JR
19856+ break;
19857+
d337f35e 19858+ default:
4bf69007 19859+ ret = -EINVAL;
d337f35e 19860+ }
5cb1760b 19861+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19862+ __dealloc_nx_addr_v6_all(old);
19863+ return ret;
d337f35e
JR
19864+}
19865+
4bf69007 19866+int vc_net_add_ipv6(struct nx_info *nxi, void __user *data)
d337f35e 19867+{
4bf69007 19868+ struct vcmd_net_addr_ipv6_v1 vc_data;
d337f35e 19869+
4bf69007 19870+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
19871+ return -EFAULT;
19872+
4bf69007
AM
19873+ switch (vc_data.type) {
19874+ case NXA_TYPE_ADDR:
19875+ memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
19876+ /* fallthrough */
19877+ case NXA_TYPE_MASK:
19878+ return do_add_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
19879+ vc_data.prefix, vc_data.type, vc_data.flags);
19880+ default:
19881+ return -EINVAL;
19882+ }
19883+ return 0;
19884+}
d337f35e 19885+
4bf69007
AM
19886+int vc_net_remove_ipv6(struct nx_info *nxi, void __user *data)
19887+{
19888+ struct vcmd_net_addr_ipv6_v1 vc_data;
19889+
19890+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19891+ return -EFAULT;
19892+
19893+ switch (vc_data.type) {
19894+ case NXA_TYPE_ADDR:
19895+ memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
19896+ /* fallthrough */
19897+ case NXA_TYPE_MASK:
19898+ return do_remove_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
19899+ vc_data.prefix, vc_data.type, vc_data.flags);
19900+ case NXA_TYPE_ANY:
19901+ return do_remove_v6_addr(nxi, NULL, NULL, 0, vc_data.type, 0);
19902+ default:
19903+ return -EINVAL;
19904+ }
19905+ return 0;
d337f35e
JR
19906+}
19907+
4bf69007 19908+#endif /* CONFIG_IPV6 */
d337f35e 19909+
4bf69007
AM
19910+
19911+int vc_get_nflags(struct nx_info *nxi, void __user *data)
d337f35e 19912+{
4bf69007 19913+ struct vcmd_net_flags_v0 vc_data;
d337f35e 19914+
4bf69007 19915+ vc_data.flagword = nxi->nx_flags;
d337f35e 19916+
4bf69007
AM
19917+ /* special STATE flag handling */
19918+ vc_data.mask = vs_mask_flags(~0ULL, nxi->nx_flags, NXF_ONE_TIME);
d337f35e 19919+
4bf69007
AM
19920+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19921+ return -EFAULT;
19922+ return 0;
d337f35e
JR
19923+}
19924+
4bf69007
AM
19925+int vc_set_nflags(struct nx_info *nxi, void __user *data)
19926+{
19927+ struct vcmd_net_flags_v0 vc_data;
19928+ uint64_t mask, trigger;
19929+
19930+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19931+ return -EFAULT;
d337f35e 19932+
4bf69007
AM
19933+ /* special STATE flag handling */
19934+ mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
19935+ trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
d337f35e 19936+
4bf69007
AM
19937+ nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
19938+ vc_data.flagword, mask);
19939+ if (trigger & NXF_PERSISTENT)
19940+ nx_update_persistent(nxi);
19941+
19942+ return 0;
19943+}
19944+
19945+int vc_get_ncaps(struct nx_info *nxi, void __user *data)
d337f35e 19946+{
4bf69007 19947+ struct vcmd_net_caps_v0 vc_data;
d337f35e 19948+
4bf69007
AM
19949+ vc_data.ncaps = nxi->nx_ncaps;
19950+ vc_data.cmask = ~0ULL;
d337f35e 19951+
2380c486 19952+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
4bf69007
AM
19953+ return -EFAULT;
19954+ return 0;
d337f35e
JR
19955+}
19956+
4bf69007
AM
19957+int vc_set_ncaps(struct nx_info *nxi, void __user *data)
19958+{
19959+ struct vcmd_net_caps_v0 vc_data;
19960+
19961+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19962+ return -EFAULT;
19963+
19964+ nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
19965+ vc_data.ncaps, vc_data.cmask);
19966+ return 0;
19967+}
19968+
19969+
19970+#include <linux/module.h>
19971+
19972+module_init(init_network);
19973+
19974+EXPORT_SYMBOL_GPL(free_nx_info);
19975+EXPORT_SYMBOL_GPL(unhash_nx_info);
19976+
c2806d43
AM
19977diff -urNp -x '*.orig' linux-4.4/kernel/vserver/proc.c linux-4.4/kernel/vserver/proc.c
19978--- linux-4.4/kernel/vserver/proc.c 1970-01-01 01:00:00.000000000 +0100
19979+++ linux-4.4/kernel/vserver/proc.c 2021-02-24 16:56:24.612823601 +0100
8de2f54c 19980@@ -0,0 +1,1040 @@
d337f35e 19981+/*
4bf69007 19982+ * linux/kernel/vserver/proc.c
d337f35e 19983+ *
4bf69007 19984+ * Virtual Context Support
d337f35e 19985+ *
c2806d43 19986+ * Copyright (C) 2003-2011 Herbert P?tzl
d337f35e 19987+ *
4bf69007
AM
19988+ * V0.01 basic structure
19989+ * V0.02 adaptation vs1.3.0
19990+ * V0.03 proc permissions
19991+ * V0.04 locking/generic
19992+ * V0.05 next generation procfs
19993+ * V0.06 inode validation
19994+ * V0.07 generic rewrite vid
19995+ * V0.08 remove inode type
19996+ * V0.09 added u/wmask info
d337f35e
JR
19997+ *
19998+ */
19999+
4bf69007 20000+#include <linux/proc_fs.h>
ec22aa5c 20001+#include <linux/fs_struct.h>
4bf69007
AM
20002+#include <linux/mount.h>
20003+#include <linux/namei.h>
20004+#include <asm/unistd.h>
2380c486 20005+
d337f35e 20006+#include <linux/vs_context.h>
4bf69007
AM
20007+#include <linux/vs_network.h>
20008+#include <linux/vs_cvirt.h>
d337f35e 20009+
4bf69007
AM
20010+#include <linux/in.h>
20011+#include <linux/inetdevice.h>
20012+#include <linux/vs_inet.h>
20013+#include <linux/vs_inet6.h>
d337f35e 20014+
4bf69007 20015+#include <linux/vserver/global.h>
d337f35e 20016+
4bf69007
AM
20017+#include "cvirt_proc.h"
20018+#include "cacct_proc.h"
20019+#include "limit_proc.h"
20020+#include "sched_proc.h"
20021+#include "vci_config.h"
d337f35e 20022+
09be7631
JR
20023+#include <../../fs/proc/internal.h>
20024+
2380c486 20025+
4bf69007
AM
20026+static inline char *print_cap_t(char *buffer, kernel_cap_t *c)
20027+{
20028+ unsigned __capi;
2380c486 20029+
4bf69007
AM
20030+ CAP_FOR_EACH_U32(__capi) {
20031+ buffer += sprintf(buffer, "%08x",
20032+ c->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
20033+ }
20034+ return buffer;
20035+}
2380c486 20036+
d337f35e 20037+
4bf69007 20038+static struct proc_dir_entry *proc_virtual;
d337f35e 20039+
4bf69007 20040+static struct proc_dir_entry *proc_virtnet;
d337f35e 20041+
d337f35e 20042+
4bf69007 20043+/* first the actual feeds */
d337f35e 20044+
d337f35e 20045+
4bf69007
AM
20046+static int proc_vci(char *buffer)
20047+{
20048+ return sprintf(buffer,
20049+ "VCIVersion:\t%04x:%04x\n"
20050+ "VCISyscall:\t%d\n"
20051+ "VCIKernel:\t%08x\n",
20052+ VCI_VERSION >> 16,
20053+ VCI_VERSION & 0xFFFF,
20054+ __NR_vserver,
20055+ vci_kernel_config());
20056+}
d337f35e 20057+
4bf69007
AM
20058+static int proc_virtual_info(char *buffer)
20059+{
20060+ return proc_vci(buffer);
d337f35e
JR
20061+}
20062+
4bf69007
AM
20063+static int proc_virtual_status(char *buffer)
20064+{
20065+ return sprintf(buffer,
20066+ "#CTotal:\t%d\n"
20067+ "#CActive:\t%d\n"
20068+ "#NSProxy:\t%d\t%d %d %d %d %d %d\n"
20069+ "#InitTask:\t%d\t%d %d\n",
20070+ atomic_read(&vx_global_ctotal),
20071+ atomic_read(&vx_global_cactive),
20072+ atomic_read(&vs_global_nsproxy),
20073+ atomic_read(&vs_global_fs),
20074+ atomic_read(&vs_global_mnt_ns),
20075+ atomic_read(&vs_global_uts_ns),
20076+ atomic_read(&nr_ipc_ns),
20077+ atomic_read(&vs_global_user_ns),
20078+ atomic_read(&vs_global_pid_ns),
20079+ atomic_read(&init_task.usage),
20080+ atomic_read(&init_task.nsproxy->count),
20081+ init_task.fs->users);
20082+}
2380c486 20083+
2380c486 20084+
4bf69007 20085+int proc_vxi_info(struct vx_info *vxi, char *buffer)
d337f35e 20086+{
4bf69007 20087+ int length;
d337f35e 20088+
4bf69007
AM
20089+ length = sprintf(buffer,
20090+ "ID:\t%d\n"
20091+ "Info:\t%p\n"
20092+ "Init:\t%d\n"
20093+ "OOM:\t%lld\n",
20094+ vxi->vx_id,
20095+ vxi,
20096+ vxi->vx_initpid,
20097+ vxi->vx_badness_bias);
20098+ return length;
d337f35e
JR
20099+}
20100+
4bf69007 20101+int proc_vxi_status(struct vx_info *vxi, char *buffer)
d337f35e 20102+{
4bf69007 20103+ char *orig = buffer;
d337f35e 20104+
4bf69007
AM
20105+ buffer += sprintf(buffer,
20106+ "UseCnt:\t%d\n"
20107+ "Tasks:\t%d\n"
20108+ "Flags:\t%016llx\n",
20109+ atomic_read(&vxi->vx_usecnt),
20110+ atomic_read(&vxi->vx_tasks),
20111+ (unsigned long long)vxi->vx_flags);
d337f35e 20112+
4bf69007
AM
20113+ buffer += sprintf(buffer, "BCaps:\t");
20114+ buffer = print_cap_t(buffer, &vxi->vx_bcaps);
20115+ buffer += sprintf(buffer, "\n");
ab30d09f 20116+
4bf69007
AM
20117+ buffer += sprintf(buffer,
20118+ "CCaps:\t%016llx\n"
20119+ "Umask:\t%16llx\n"
20120+ "Wmask:\t%16llx\n"
20121+ "Spaces:\t%08lx %08lx\n",
20122+ (unsigned long long)vxi->vx_ccaps,
20123+ (unsigned long long)vxi->vx_umask,
20124+ (unsigned long long)vxi->vx_wmask,
20125+ vxi->space[0].vx_nsmask, vxi->space[1].vx_nsmask);
20126+ return buffer - orig;
20127+}
ab30d09f 20128+
4bf69007
AM
20129+int proc_vxi_limit(struct vx_info *vxi, char *buffer)
20130+{
20131+ return vx_info_proc_limit(&vxi->limit, buffer);
20132+}
d337f35e 20133+
4bf69007
AM
20134+int proc_vxi_sched(struct vx_info *vxi, char *buffer)
20135+{
20136+ int cpu, length;
d337f35e 20137+
4bf69007
AM
20138+ length = vx_info_proc_sched(&vxi->sched, buffer);
20139+ for_each_online_cpu(cpu) {
20140+ length += vx_info_proc_sched_pc(
20141+ &vx_per_cpu(vxi, sched_pc, cpu),
20142+ buffer + length, cpu);
ec22aa5c 20143+ }
4bf69007
AM
20144+ return length;
20145+}
ec22aa5c 20146+
4bf69007
AM
20147+int proc_vxi_nsproxy0(struct vx_info *vxi, char *buffer)
20148+{
20149+ return vx_info_proc_nsproxy(vxi->space[0].vx_nsproxy, buffer);
20150+}
d337f35e 20151+
4bf69007
AM
20152+int proc_vxi_nsproxy1(struct vx_info *vxi, char *buffer)
20153+{
20154+ return vx_info_proc_nsproxy(vxi->space[1].vx_nsproxy, buffer);
20155+}
ec22aa5c 20156+
4bf69007
AM
20157+int proc_vxi_cvirt(struct vx_info *vxi, char *buffer)
20158+{
20159+ int cpu, length;
d33d7b00 20160+
4bf69007
AM
20161+ vx_update_load(vxi);
20162+ length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
20163+ for_each_online_cpu(cpu) {
20164+ length += vx_info_proc_cvirt_pc(
20165+ &vx_per_cpu(vxi, cvirt_pc, cpu),
20166+ buffer + length, cpu);
3bac966d 20167+ }
4bf69007
AM
20168+ return length;
20169+}
3bac966d 20170+
4bf69007
AM
20171+int proc_vxi_cacct(struct vx_info *vxi, char *buffer)
20172+{
20173+ return vx_info_proc_cacct(&vxi->cacct, buffer);
d337f35e
JR
20174+}
20175+
20176+
4bf69007 20177+static int proc_virtnet_info(char *buffer)
d337f35e 20178+{
4bf69007
AM
20179+ return proc_vci(buffer);
20180+}
ab30d09f 20181+
4bf69007
AM
20182+static int proc_virtnet_status(char *buffer)
20183+{
20184+ return sprintf(buffer,
20185+ "#CTotal:\t%d\n"
20186+ "#CActive:\t%d\n",
20187+ atomic_read(&nx_global_ctotal),
20188+ atomic_read(&nx_global_cactive));
20189+}
d337f35e 20190+
4bf69007
AM
20191+int proc_nxi_info(struct nx_info *nxi, char *buffer)
20192+{
20193+ struct nx_addr_v4 *v4a;
20194+#ifdef CONFIG_IPV6
20195+ struct nx_addr_v6 *v6a;
20196+#endif
20197+ int length, i;
ab30d09f 20198+
4bf69007
AM
20199+ length = sprintf(buffer,
20200+ "ID:\t%d\n"
20201+ "Info:\t%p\n"
20202+ "Bcast:\t" NIPQUAD_FMT "\n"
20203+ "Lback:\t" NIPQUAD_FMT "\n",
20204+ nxi->nx_id,
20205+ nxi,
20206+ NIPQUAD(nxi->v4_bcast.s_addr),
20207+ NIPQUAD(nxi->v4_lback.s_addr));
ab30d09f 20208+
4bf69007
AM
20209+ if (!NX_IPV4(nxi))
20210+ goto skip_v4;
20211+ for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
20212+ length += sprintf(buffer + length, "%d:\t" NXAV4_FMT "\n",
20213+ i, NXAV4(v4a));
20214+skip_v4:
20215+#ifdef CONFIG_IPV6
20216+ if (!NX_IPV6(nxi))
20217+ goto skip_v6;
20218+ for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
20219+ length += sprintf(buffer + length, "%d:\t" NXAV6_FMT "\n",
20220+ i, NXAV6(v6a));
20221+skip_v6:
20222+#endif
20223+ return length;
20224+}
2380c486 20225+
4bf69007
AM
20226+int proc_nxi_status(struct nx_info *nxi, char *buffer)
20227+{
20228+ int length;
ec22aa5c 20229+
4bf69007
AM
20230+ length = sprintf(buffer,
20231+ "UseCnt:\t%d\n"
20232+ "Tasks:\t%d\n"
20233+ "Flags:\t%016llx\n"
20234+ "NCaps:\t%016llx\n",
20235+ atomic_read(&nxi->nx_usecnt),
20236+ atomic_read(&nxi->nx_tasks),
20237+ (unsigned long long)nxi->nx_flags,
20238+ (unsigned long long)nxi->nx_ncaps);
20239+ return length;
20240+}
ec22aa5c 20241+
ec22aa5c 20242+
d337f35e 20243+
4bf69007 20244+/* here the inode helpers */
d337f35e 20245+
4bf69007
AM
20246+struct vs_entry {
20247+ int len;
20248+ char *name;
20249+ mode_t mode;
20250+ struct inode_operations *iop;
20251+ struct file_operations *fop;
20252+ union proc_op op;
20253+};
d337f35e 20254+
4bf69007
AM
20255+static struct inode *vs_proc_make_inode(struct super_block *sb, struct vs_entry *p)
20256+{
20257+ struct inode *inode = new_inode(sb);
3bac966d 20258+
4bf69007
AM
20259+ if (!inode)
20260+ goto out;
3bac966d 20261+
4bf69007
AM
20262+ inode->i_mode = p->mode;
20263+ if (p->iop)
20264+ inode->i_op = p->iop;
20265+ if (p->fop)
20266+ inode->i_fop = p->fop;
3bac966d 20267+
4bf69007
AM
20268+ set_nlink(inode, (p->mode & S_IFDIR) ? 2 : 1);
20269+ inode->i_flags |= S_IMMUTABLE;
3bac966d 20270+
4bf69007 20271+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2380c486 20272+
8ce283e1
AM
20273+ i_uid_write(inode, 0);
20274+ i_gid_write(inode, 0);
20275+ i_tag_write(inode, 0);
4bf69007
AM
20276+out:
20277+ return inode;
d337f35e
JR
20278+}
20279+
4bf69007
AM
20280+static struct dentry *vs_proc_instantiate(struct inode *dir,
20281+ struct dentry *dentry, int id, void *ptr)
2380c486 20282+{
4bf69007
AM
20283+ struct vs_entry *p = ptr;
20284+ struct inode *inode = vs_proc_make_inode(dir->i_sb, p);
20285+ struct dentry *error = ERR_PTR(-EINVAL);
2380c486 20286+
4bf69007
AM
20287+ if (!inode)
20288+ goto out;
2380c486 20289+
4bf69007
AM
20290+ PROC_I(inode)->op = p->op;
20291+ PROC_I(inode)->fd = id;
20292+ d_add(dentry, inode);
20293+ error = NULL;
20294+out:
20295+ return error;
2380c486
JR
20296+}
20297+
4bf69007 20298+/* Lookups */
2380c486 20299+
09be7631
JR
20300+typedef struct dentry *vx_instantiate_t(struct inode *, struct dentry *, int, void *);
20301+
2380c486 20302+
4bf69007
AM
20303+/*
20304+ * Fill a directory entry.
20305+ *
20306+ * If possible create the dcache entry and derive our inode number and
20307+ * file type from dcache entry.
20308+ *
20309+ * Since all of the proc inode numbers are dynamically generated, the inode
20310+ * numbers do not exist until the inode is cache. This means creating the
c2e5f7c8
JR
20311+ * the dcache entry in iterate is necessary to keep the inode numbers
20312+ * reported by iterate in sync with the inode numbers reported
4bf69007
AM
20313+ * by stat.
20314+ */
c2e5f7c8 20315+static int vx_proc_fill_cache(struct file *filp, struct dir_context *ctx,
09be7631 20316+ char *name, int len, vx_instantiate_t instantiate, int id, void *ptr)
2380c486 20317+{
927ca606 20318+ struct dentry *child, *dir = filp->f_path.dentry;
4bf69007
AM
20319+ struct inode *inode;
20320+ struct qstr qname;
20321+ ino_t ino = 0;
20322+ unsigned type = DT_UNKNOWN;
d337f35e 20323+
4bf69007
AM
20324+ qname.name = name;
20325+ qname.len = len;
20326+ qname.hash = full_name_hash(name, len);
d337f35e 20327+
4bf69007
AM
20328+ child = d_lookup(dir, &qname);
20329+ if (!child) {
20330+ struct dentry *new;
20331+ new = d_alloc(dir, &qname);
20332+ if (new) {
20333+ child = instantiate(dir->d_inode, new, id, ptr);
20334+ if (child)
20335+ dput(new);
20336+ else
20337+ child = new;
20338+ }
20339+ }
20340+ if (!child || IS_ERR(child) || !child->d_inode)
20341+ goto end_instantiate;
20342+ inode = child->d_inode;
20343+ if (inode) {
20344+ ino = inode->i_ino;
20345+ type = inode->i_mode >> 12;
20346+ }
20347+ dput(child);
20348+end_instantiate:
20349+ if (!ino)
4bf69007 20350+ ino = 1;
c2e5f7c8 20351+ return !dir_emit(ctx, name, len, ino, type);
4bf69007 20352+}
d337f35e 20353+
d337f35e 20354+
d337f35e 20355+
4bf69007 20356+/* get and revalidate vx_info/xid */
2380c486 20357+
4bf69007
AM
20358+static inline
20359+struct vx_info *get_proc_vx_info(struct inode *inode)
20360+{
20361+ return lookup_vx_info(PROC_I(inode)->fd);
d337f35e
JR
20362+}
20363+
4bf69007 20364+static int proc_xid_revalidate(struct dentry *dentry, unsigned int flags)
d337f35e 20365+{
4bf69007 20366+ struct inode *inode = dentry->d_inode;
61333608 20367+ vxid_t xid = PROC_I(inode)->fd;
2380c486 20368+
4bf69007
AM
20369+ if (flags & LOOKUP_RCU) /* FIXME: can be dropped? */
20370+ return -ECHILD;
2380c486 20371+
4bf69007
AM
20372+ if (!xid || xid_is_hashed(xid))
20373+ return 1;
20374+ d_drop(dentry);
d337f35e
JR
20375+ return 0;
20376+}
20377+
d337f35e 20378+
4bf69007 20379+/* get and revalidate nx_info/nid */
d337f35e 20380+
4bf69007
AM
20381+static int proc_nid_revalidate(struct dentry *dentry, unsigned int flags)
20382+{
20383+ struct inode *inode = dentry->d_inode;
61333608 20384+ vnid_t nid = PROC_I(inode)->fd;
2380c486 20385+
4bf69007
AM
20386+ if (flags & LOOKUP_RCU) /* FIXME: can be dropped? */
20387+ return -ECHILD;
2380c486 20388+
4bf69007
AM
20389+ if (!nid || nid_is_hashed(nid))
20390+ return 1;
20391+ d_drop(dentry);
20392+ return 0;
d337f35e
JR
20393+}
20394+
4bf69007
AM
20395+
20396+
20397+#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
20398+
20399+static ssize_t proc_vs_info_read(struct file *file, char __user *buf,
20400+ size_t count, loff_t *ppos)
d337f35e 20401+{
927ca606 20402+ struct inode *inode = file->f_path.dentry->d_inode;
4bf69007
AM
20403+ unsigned long page;
20404+ ssize_t length = 0;
20405+
20406+ if (count > PROC_BLOCK_SIZE)
20407+ count = PROC_BLOCK_SIZE;
20408+
20409+ /* fade that out as soon as stable */
20410+ WARN_ON(PROC_I(inode)->fd);
20411+
20412+ if (!(page = __get_free_page(GFP_KERNEL)))
20413+ return -ENOMEM;
20414+
20415+ BUG_ON(!PROC_I(inode)->op.proc_vs_read);
20416+ length = PROC_I(inode)->op.proc_vs_read((char *)page);
20417+
20418+ if (length >= 0)
20419+ length = simple_read_from_buffer(buf, count, ppos,
20420+ (char *)page, length);
20421+
20422+ free_page(page);
20423+ return length;
d337f35e
JR
20424+}
20425+
4bf69007
AM
20426+static ssize_t proc_vx_info_read(struct file *file, char __user *buf,
20427+ size_t count, loff_t *ppos)
20428+{
927ca606 20429+ struct inode *inode = file->f_path.dentry->d_inode;
4bf69007 20430+ struct vx_info *vxi = NULL;
61333608 20431+ vxid_t xid = PROC_I(inode)->fd;
4bf69007
AM
20432+ unsigned long page;
20433+ ssize_t length = 0;
d337f35e 20434+
4bf69007
AM
20435+ if (count > PROC_BLOCK_SIZE)
20436+ count = PROC_BLOCK_SIZE;
20437+
20438+ /* fade that out as soon as stable */
20439+ WARN_ON(!xid);
20440+ vxi = lookup_vx_info(xid);
20441+ if (!vxi)
20442+ goto out;
d337f35e 20443+
4bf69007
AM
20444+ length = -ENOMEM;
20445+ if (!(page = __get_free_page(GFP_KERNEL)))
20446+ goto out_put;
d337f35e 20447+
4bf69007
AM
20448+ BUG_ON(!PROC_I(inode)->op.proc_vxi_read);
20449+ length = PROC_I(inode)->op.proc_vxi_read(vxi, (char *)page);
d337f35e 20450+
4bf69007
AM
20451+ if (length >= 0)
20452+ length = simple_read_from_buffer(buf, count, ppos,
20453+ (char *)page, length);
d337f35e 20454+
4bf69007
AM
20455+ free_page(page);
20456+out_put:
20457+ put_vx_info(vxi);
20458+out:
20459+ return length;
20460+}
20461+
20462+static ssize_t proc_nx_info_read(struct file *file, char __user *buf,
20463+ size_t count, loff_t *ppos)
d337f35e 20464+{
927ca606 20465+ struct inode *inode = file->f_path.dentry->d_inode;
4bf69007 20466+ struct nx_info *nxi = NULL;
61333608 20467+ vnid_t nid = PROC_I(inode)->fd;
4bf69007
AM
20468+ unsigned long page;
20469+ ssize_t length = 0;
d337f35e 20470+
4bf69007
AM
20471+ if (count > PROC_BLOCK_SIZE)
20472+ count = PROC_BLOCK_SIZE;
d337f35e 20473+
4bf69007
AM
20474+ /* fade that out as soon as stable */
20475+ WARN_ON(!nid);
20476+ nxi = lookup_nx_info(nid);
20477+ if (!nxi)
20478+ goto out;
d337f35e 20479+
4bf69007
AM
20480+ length = -ENOMEM;
20481+ if (!(page = __get_free_page(GFP_KERNEL)))
20482+ goto out_put;
d337f35e 20483+
4bf69007
AM
20484+ BUG_ON(!PROC_I(inode)->op.proc_nxi_read);
20485+ length = PROC_I(inode)->op.proc_nxi_read(nxi, (char *)page);
2380c486 20486+
4bf69007
AM
20487+ if (length >= 0)
20488+ length = simple_read_from_buffer(buf, count, ppos,
20489+ (char *)page, length);
d337f35e 20490+
4bf69007
AM
20491+ free_page(page);
20492+out_put:
20493+ put_nx_info(nxi);
20494+out:
20495+ return length;
20496+}
2380c486 20497+
d337f35e 20498+
763640ca 20499+
4bf69007 20500+/* here comes the lower level */
763640ca 20501+
265d6dcc 20502+
4bf69007
AM
20503+#define NOD(NAME, MODE, IOP, FOP, OP) { \
20504+ .len = sizeof(NAME) - 1, \
20505+ .name = (NAME), \
20506+ .mode = MODE, \
20507+ .iop = IOP, \
20508+ .fop = FOP, \
20509+ .op = OP, \
20510+}
d337f35e 20511+
d337f35e 20512+
4bf69007
AM
20513+#define DIR(NAME, MODE, OTYPE) \
20514+ NOD(NAME, (S_IFDIR | (MODE)), \
20515+ &proc_ ## OTYPE ## _inode_operations, \
20516+ &proc_ ## OTYPE ## _file_operations, { } )
d337f35e 20517+
4bf69007
AM
20518+#define INF(NAME, MODE, OTYPE) \
20519+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
20520+ &proc_vs_info_file_operations, \
20521+ { .proc_vs_read = &proc_##OTYPE } )
d337f35e 20522+
4bf69007
AM
20523+#define VINF(NAME, MODE, OTYPE) \
20524+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
20525+ &proc_vx_info_file_operations, \
20526+ { .proc_vxi_read = &proc_##OTYPE } )
2380c486 20527+
4bf69007
AM
20528+#define NINF(NAME, MODE, OTYPE) \
20529+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
20530+ &proc_nx_info_file_operations, \
20531+ { .proc_nxi_read = &proc_##OTYPE } )
d337f35e 20532+
d337f35e 20533+
4bf69007
AM
20534+static struct file_operations proc_vs_info_file_operations = {
20535+ .read = proc_vs_info_read,
20536+};
d337f35e 20537+
4bf69007
AM
20538+static struct file_operations proc_vx_info_file_operations = {
20539+ .read = proc_vx_info_read,
20540+};
d337f35e 20541+
4bf69007
AM
20542+static struct dentry_operations proc_xid_dentry_operations = {
20543+ .d_revalidate = proc_xid_revalidate,
20544+};
d337f35e 20545+
4bf69007
AM
20546+static struct vs_entry vx_base_stuff[] = {
20547+ VINF("info", S_IRUGO, vxi_info),
20548+ VINF("status", S_IRUGO, vxi_status),
20549+ VINF("limit", S_IRUGO, vxi_limit),
20550+ VINF("sched", S_IRUGO, vxi_sched),
20551+ VINF("nsproxy", S_IRUGO, vxi_nsproxy0),
20552+ VINF("nsproxy1",S_IRUGO, vxi_nsproxy1),
20553+ VINF("cvirt", S_IRUGO, vxi_cvirt),
20554+ VINF("cacct", S_IRUGO, vxi_cacct),
20555+ {}
20556+};
2380c486 20557+
d337f35e 20558+
d337f35e 20559+
d337f35e 20560+
4bf69007
AM
20561+static struct dentry *proc_xid_instantiate(struct inode *dir,
20562+ struct dentry *dentry, int id, void *ptr)
20563+{
20564+ dentry->d_op = &proc_xid_dentry_operations;
20565+ return vs_proc_instantiate(dir, dentry, id, ptr);
20566+}
2380c486 20567+
4bf69007
AM
20568+static struct dentry *proc_xid_lookup(struct inode *dir,
20569+ struct dentry *dentry, unsigned int flags)
20570+{
20571+ struct vs_entry *p = vx_base_stuff;
20572+ struct dentry *error = ERR_PTR(-ENOENT);
2380c486 20573+
4bf69007
AM
20574+ for (; p->name; p++) {
20575+ if (p->len != dentry->d_name.len)
20576+ continue;
20577+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20578+ break;
20579+ }
20580+ if (!p->name)
20581+ goto out;
d337f35e 20582+
4bf69007
AM
20583+ error = proc_xid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
20584+out:
20585+ return error;
20586+}
9f7054f1 20587+
c2e5f7c8 20588+static int proc_xid_iterate(struct file *filp, struct dir_context *ctx)
4bf69007 20589+{
927ca606 20590+ struct dentry *dentry = filp->f_path.dentry;
4bf69007
AM
20591+ struct inode *inode = dentry->d_inode;
20592+ struct vs_entry *p = vx_base_stuff;
20593+ int size = sizeof(vx_base_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20594+ int index;
2380c486 20595+
8de2f54c
AM
20596+ if (!dir_emit_dots(filp, ctx))
20597+ return 0;
20598+
20599+ index = ctx->pos - 2;
20600+ if (index < size) {
4bf69007 20601+ for (p += index; p->name; p++) {
c2e5f7c8 20602+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20603+ vs_proc_instantiate, PROC_I(inode)->fd, p))
8de2f54c 20604+ return 0;
c2e5f7c8 20605+ ctx->pos++;
4bf69007 20606+ }
d337f35e 20607+ }
4bf69007 20608+ return 1;
d337f35e
JR
20609+}
20610+
20611+
d337f35e 20612+
4bf69007
AM
20613+static struct file_operations proc_nx_info_file_operations = {
20614+ .read = proc_nx_info_read,
20615+};
d337f35e 20616+
4bf69007
AM
20617+static struct dentry_operations proc_nid_dentry_operations = {
20618+ .d_revalidate = proc_nid_revalidate,
20619+};
d337f35e 20620+
4bf69007
AM
20621+static struct vs_entry nx_base_stuff[] = {
20622+ NINF("info", S_IRUGO, nxi_info),
20623+ NINF("status", S_IRUGO, nxi_status),
20624+ {}
20625+};
2380c486 20626+
d337f35e 20627+
4bf69007
AM
20628+static struct dentry *proc_nid_instantiate(struct inode *dir,
20629+ struct dentry *dentry, int id, void *ptr)
d337f35e 20630+{
4bf69007
AM
20631+ dentry->d_op = &proc_nid_dentry_operations;
20632+ return vs_proc_instantiate(dir, dentry, id, ptr);
20633+}
d337f35e 20634+
4bf69007
AM
20635+static struct dentry *proc_nid_lookup(struct inode *dir,
20636+ struct dentry *dentry, unsigned int flags)
20637+{
20638+ struct vs_entry *p = nx_base_stuff;
20639+ struct dentry *error = ERR_PTR(-ENOENT);
d337f35e 20640+
4bf69007
AM
20641+ for (; p->name; p++) {
20642+ if (p->len != dentry->d_name.len)
20643+ continue;
20644+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20645+ break;
20646+ }
20647+ if (!p->name)
20648+ goto out;
d337f35e 20649+
4bf69007
AM
20650+ error = proc_nid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
20651+out:
20652+ return error;
20653+}
d337f35e 20654+
c2e5f7c8 20655+static int proc_nid_iterate(struct file *filp, struct dir_context *ctx)
4bf69007 20656+{
927ca606 20657+ struct dentry *dentry = filp->f_path.dentry;
4bf69007
AM
20658+ struct inode *inode = dentry->d_inode;
20659+ struct vs_entry *p = nx_base_stuff;
20660+ int size = sizeof(nx_base_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20661+ int index;
d337f35e 20662+
8de2f54c
AM
20663+ if (!dir_emit_dots(filp, ctx))
20664+ return 0;
20665+
20666+ index = ctx->pos - 2;
20667+ if (index < size) {
4bf69007 20668+ for (p += index; p->name; p++) {
c2e5f7c8 20669+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20670+ vs_proc_instantiate, PROC_I(inode)->fd, p))
8de2f54c 20671+ return 0;
c2e5f7c8 20672+ ctx->pos++;
4bf69007
AM
20673+ }
20674+ }
4bf69007
AM
20675+ return 1;
20676+}
2380c486 20677+
d337f35e 20678+
4bf69007 20679+#define MAX_MULBY10 ((~0U - 9) / 10)
d337f35e 20680+
4bf69007
AM
20681+static inline int atovid(const char *str, int len)
20682+{
20683+ int vid, c;
d337f35e 20684+
4bf69007
AM
20685+ vid = 0;
20686+ while (len-- > 0) {
20687+ c = *str - '0';
20688+ str++;
20689+ if (c > 9)
20690+ return -1;
20691+ if (vid >= MAX_MULBY10)
20692+ return -1;
20693+ vid *= 10;
20694+ vid += c;
20695+ if (!vid)
20696+ return -1;
20697+ }
20698+ return vid;
20699+}
2380c486 20700+
4bf69007 20701+/* now the upper level (virtual) */
2380c486 20702+
2380c486 20703+
4bf69007
AM
20704+static struct file_operations proc_xid_file_operations = {
20705+ .read = generic_read_dir,
c2e5f7c8 20706+ .iterate = proc_xid_iterate,
4bf69007 20707+};
2380c486 20708+
4bf69007
AM
20709+static struct inode_operations proc_xid_inode_operations = {
20710+ .lookup = proc_xid_lookup,
20711+};
d337f35e 20712+
4bf69007
AM
20713+static struct vs_entry vx_virtual_stuff[] = {
20714+ INF("info", S_IRUGO, virtual_info),
20715+ INF("status", S_IRUGO, virtual_status),
20716+ DIR(NULL, S_IRUGO | S_IXUGO, xid),
20717+};
2380c486 20718+
d337f35e 20719+
4bf69007
AM
20720+static struct dentry *proc_virtual_lookup(struct inode *dir,
20721+ struct dentry *dentry, unsigned int flags)
20722+{
20723+ struct vs_entry *p = vx_virtual_stuff;
20724+ struct dentry *error = ERR_PTR(-ENOENT);
20725+ int id = 0;
d337f35e 20726+
4bf69007
AM
20727+ for (; p->name; p++) {
20728+ if (p->len != dentry->d_name.len)
20729+ continue;
20730+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20731+ break;
20732+ }
20733+ if (p->name)
20734+ goto instantiate;
d337f35e 20735+
4bf69007
AM
20736+ id = atovid(dentry->d_name.name, dentry->d_name.len);
20737+ if ((id < 0) || !xid_is_hashed(id))
d337f35e
JR
20738+ goto out;
20739+
4bf69007
AM
20740+instantiate:
20741+ error = proc_xid_instantiate(dir, dentry, id, p);
20742+out:
20743+ return error;
20744+}
d337f35e 20745+
4bf69007
AM
20746+static struct file_operations proc_nid_file_operations = {
20747+ .read = generic_read_dir,
c2e5f7c8 20748+ .iterate = proc_nid_iterate,
4bf69007 20749+};
d337f35e 20750+
4bf69007
AM
20751+static struct inode_operations proc_nid_inode_operations = {
20752+ .lookup = proc_nid_lookup,
20753+};
d337f35e 20754+
4bf69007
AM
20755+static struct vs_entry nx_virtnet_stuff[] = {
20756+ INF("info", S_IRUGO, virtnet_info),
20757+ INF("status", S_IRUGO, virtnet_status),
20758+ DIR(NULL, S_IRUGO | S_IXUGO, nid),
20759+};
d337f35e 20760+
d337f35e 20761+
4bf69007
AM
20762+static struct dentry *proc_virtnet_lookup(struct inode *dir,
20763+ struct dentry *dentry, unsigned int flags)
20764+{
20765+ struct vs_entry *p = nx_virtnet_stuff;
20766+ struct dentry *error = ERR_PTR(-ENOENT);
20767+ int id = 0;
d337f35e 20768+
4bf69007
AM
20769+ for (; p->name; p++) {
20770+ if (p->len != dentry->d_name.len)
20771+ continue;
20772+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20773+ break;
20774+ }
20775+ if (p->name)
20776+ goto instantiate;
d337f35e 20777+
4bf69007
AM
20778+ id = atovid(dentry->d_name.name, dentry->d_name.len);
20779+ if ((id < 0) || !nid_is_hashed(id))
d337f35e
JR
20780+ goto out;
20781+
4bf69007
AM
20782+instantiate:
20783+ error = proc_nid_instantiate(dir, dentry, id, p);
20784+out:
20785+ return error;
20786+}
2380c486 20787+
d337f35e 20788+
4bf69007
AM
20789+#define PROC_MAXVIDS 32
20790+
c2e5f7c8 20791+int proc_virtual_iterate(struct file *filp, struct dir_context *ctx)
4bf69007 20792+{
4bf69007
AM
20793+ struct vs_entry *p = vx_virtual_stuff;
20794+ int size = sizeof(vx_virtual_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20795+ int index;
4bf69007
AM
20796+ unsigned int xid_array[PROC_MAXVIDS];
20797+ char buf[PROC_NUMBUF];
20798+ unsigned int nr_xids, i;
4bf69007 20799+
8de2f54c
AM
20800+ if (!dir_emit_dots(filp, ctx))
20801+ return 0;
20802+
20803+ index = ctx->pos - 2;
20804+ if (index < size) {
4bf69007 20805+ for (p += index; p->name; p++) {
c2e5f7c8 20806+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20807+ vs_proc_instantiate, 0, p))
8de2f54c 20808+ return 0;
c2e5f7c8 20809+ ctx->pos++;
d337f35e
JR
20810+ }
20811+ }
8de2f54c
AM
20812+
20813+ index = ctx->pos - size;
20814+ p = &vx_virtual_stuff[size - 1];
20815+ nr_xids = get_xid_list(index, xid_array, PROC_MAXVIDS);
20816+ for (i = 0; i < nr_xids; i++) {
20817+ int n, xid = xid_array[i];
20818+ unsigned int j = PROC_NUMBUF;
20819+
20820+ n = xid;
20821+ do
20822+ buf[--j] = '0' + (n % 10);
20823+ while (n /= 10);
20824+
20825+ if (vx_proc_fill_cache(filp, ctx,
20826+ buf + j, PROC_NUMBUF - j,
20827+ vs_proc_instantiate, xid, p))
20828+ return 0;
20829+ ctx->pos++;
20830+ }
4bf69007 20831+ return 0;
d337f35e
JR
20832+}
20833+
4bf69007
AM
20834+static int proc_virtual_getattr(struct vfsmount *mnt,
20835+ struct dentry *dentry, struct kstat *stat)
d337f35e 20836+{
4bf69007 20837+ struct inode *inode = dentry->d_inode;
d337f35e 20838+
4bf69007
AM
20839+ generic_fillattr(inode, stat);
20840+ stat->nlink = 2 + atomic_read(&vx_global_cactive);
20841+ return 0;
d337f35e
JR
20842+}
20843+
4bf69007
AM
20844+static struct file_operations proc_virtual_dir_operations = {
20845+ .read = generic_read_dir,
c2e5f7c8 20846+ .iterate = proc_virtual_iterate,
d337f35e
JR
20847+};
20848+
4bf69007
AM
20849+static struct inode_operations proc_virtual_dir_inode_operations = {
20850+ .getattr = proc_virtual_getattr,
20851+ .lookup = proc_virtual_lookup,
20852+};
d337f35e 20853+
d337f35e
JR
20854+
20855+
c2e5f7c8 20856+int proc_virtnet_iterate(struct file *filp, struct dir_context *ctx)
d337f35e 20857+{
4bf69007
AM
20858+ struct vs_entry *p = nx_virtnet_stuff;
20859+ int size = sizeof(nx_virtnet_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20860+ int index;
4bf69007
AM
20861+ unsigned int nid_array[PROC_MAXVIDS];
20862+ char buf[PROC_NUMBUF];
20863+ unsigned int nr_nids, i;
d337f35e 20864+
8de2f54c
AM
20865+ if (!dir_emit_dots(filp, ctx))
20866+ return 0;
20867+
20868+ index = ctx->pos - 2;
20869+ if (index < size) {
4bf69007 20870+ for (p += index; p->name; p++) {
c2e5f7c8 20871+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20872+ vs_proc_instantiate, 0, p))
8de2f54c 20873+ return 0;
c2e5f7c8 20874+ ctx->pos++;
d337f35e
JR
20875+ }
20876+ }
8de2f54c
AM
20877+
20878+ index = ctx->pos - size;
20879+ p = &nx_virtnet_stuff[size - 1];
20880+ nr_nids = get_nid_list(index, nid_array, PROC_MAXVIDS);
20881+ for (i = 0; i < nr_nids; i++) {
20882+ int n, nid = nid_array[i];
20883+ unsigned int j = PROC_NUMBUF;
20884+
20885+ n = nid;
20886+ do
20887+ buf[--j] = '0' + (n % 10);
20888+ while (n /= 10);
20889+
20890+ if (vx_proc_fill_cache(filp, ctx,
20891+ buf + j, PROC_NUMBUF - j,
20892+ vs_proc_instantiate, nid, p))
20893+ return 0;
20894+ ctx->pos++;
20895+ }
d337f35e
JR
20896+ return 0;
20897+}
20898+
4bf69007
AM
20899+static int proc_virtnet_getattr(struct vfsmount *mnt,
20900+ struct dentry *dentry, struct kstat *stat)
20901+{
20902+ struct inode *inode = dentry->d_inode;
d337f35e 20903+
4bf69007
AM
20904+ generic_fillattr(inode, stat);
20905+ stat->nlink = 2 + atomic_read(&nx_global_cactive);
20906+ return 0;
20907+}
d337f35e 20908+
4bf69007
AM
20909+static struct file_operations proc_virtnet_dir_operations = {
20910+ .read = generic_read_dir,
c2e5f7c8 20911+ .iterate = proc_virtnet_iterate,
d337f35e
JR
20912+};
20913+
4bf69007
AM
20914+static struct inode_operations proc_virtnet_dir_inode_operations = {
20915+ .getattr = proc_virtnet_getattr,
20916+ .lookup = proc_virtnet_lookup,
d337f35e
JR
20917+};
20918+
d337f35e
JR
20919+
20920+
4bf69007 20921+void proc_vx_init(void)
d337f35e 20922+{
4bf69007 20923+ struct proc_dir_entry *ent;
d337f35e 20924+
4bf69007
AM
20925+ ent = proc_mkdir("virtual", 0);
20926+ if (ent) {
20927+ ent->proc_fops = &proc_virtual_dir_operations;
20928+ ent->proc_iops = &proc_virtual_dir_inode_operations;
20929+ }
20930+ proc_virtual = ent;
d337f35e 20931+
4bf69007
AM
20932+ ent = proc_mkdir("virtnet", 0);
20933+ if (ent) {
20934+ ent->proc_fops = &proc_virtnet_dir_operations;
20935+ ent->proc_iops = &proc_virtnet_dir_inode_operations;
d337f35e 20936+ }
4bf69007 20937+ proc_virtnet = ent;
d337f35e
JR
20938+}
20939+
d337f35e 20940+
2380c486 20941+
2380c486 20942+
4bf69007 20943+/* per pid info */
2380c486 20944+
bb20add7
AM
20945+void render_cap_t(struct seq_file *, const char *,
20946+ struct vx_info *, kernel_cap_t *);
20947+
2380c486 20948+
bb20add7
AM
20949+int proc_pid_vx_info(
20950+ struct seq_file *m,
20951+ struct pid_namespace *ns,
20952+ struct pid *pid,
20953+ struct task_struct *p)
2380c486 20954+{
4bf69007 20955+ struct vx_info *vxi;
2380c486 20956+
bb20add7 20957+ seq_printf(m, "XID:\t%d\n", vx_task_xid(p));
2380c486 20958+
4bf69007
AM
20959+ vxi = task_get_vx_info(p);
20960+ if (!vxi)
bb20add7 20961+ return 0;
2380c486 20962+
bb20add7
AM
20963+ render_cap_t(m, "BCaps:\t", vxi, &vxi->vx_bcaps);
20964+ seq_printf(m, "CCaps:\t%016llx\n",
4bf69007 20965+ (unsigned long long)vxi->vx_ccaps);
bb20add7 20966+ seq_printf(m, "CFlags:\t%016llx\n",
4bf69007 20967+ (unsigned long long)vxi->vx_flags);
bb20add7 20968+ seq_printf(m, "CIPid:\t%d\n", vxi->vx_initpid);
4bf69007
AM
20969+
20970+ put_vx_info(vxi);
bb20add7 20971+ return 0;
2380c486
JR
20972+}
20973+
2380c486 20974+
bb20add7
AM
20975+int proc_pid_nx_info(
20976+ struct seq_file *m,
20977+ struct pid_namespace *ns,
20978+ struct pid *pid,
20979+ struct task_struct *p)
4bf69007
AM
20980+{
20981+ struct nx_info *nxi;
20982+ struct nx_addr_v4 *v4a;
20983+#ifdef CONFIG_IPV6
20984+ struct nx_addr_v6 *v6a;
20985+#endif
4bf69007 20986+ int i;
2380c486 20987+
bb20add7 20988+ seq_printf(m, "NID:\t%d\n", nx_task_nid(p));
2380c486 20989+
4bf69007
AM
20990+ nxi = task_get_nx_info(p);
20991+ if (!nxi)
bb20add7 20992+ return 0;
2380c486 20993+
bb20add7 20994+ seq_printf(m, "NCaps:\t%016llx\n",
4bf69007 20995+ (unsigned long long)nxi->nx_ncaps);
bb20add7 20996+ seq_printf(m, "NFlags:\t%016llx\n",
4bf69007
AM
20997+ (unsigned long long)nxi->nx_flags);
20998+
bb20add7 20999+ seq_printf(m, "V4Root[bcast]:\t" NIPQUAD_FMT "\n",
4bf69007 21000+ NIPQUAD(nxi->v4_bcast.s_addr));
bb20add7 21001+ seq_printf(m, "V4Root[lback]:\t" NIPQUAD_FMT "\n",
4bf69007
AM
21002+ NIPQUAD(nxi->v4_lback.s_addr));
21003+ if (!NX_IPV4(nxi))
21004+ goto skip_v4;
21005+ for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
bb20add7 21006+ seq_printf(m, "V4Root[%d]:\t" NXAV4_FMT "\n",
4bf69007
AM
21007+ i, NXAV4(v4a));
21008+skip_v4:
21009+#ifdef CONFIG_IPV6
21010+ if (!NX_IPV6(nxi))
21011+ goto skip_v6;
21012+ for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
bb20add7 21013+ seq_printf(m, "V6Root[%d]:\t" NXAV6_FMT "\n",
4bf69007
AM
21014+ i, NXAV6(v6a));
21015+skip_v6:
21016+#endif
21017+ put_nx_info(nxi);
bb20add7 21018+ return 0;
2380c486
JR
21019+}
21020+
c2806d43
AM
21021diff -urNp -x '*.orig' linux-4.4/kernel/vserver/sched.c linux-4.4/kernel/vserver/sched.c
21022--- linux-4.4/kernel/vserver/sched.c 1970-01-01 01:00:00.000000000 +0100
21023+++ linux-4.4/kernel/vserver/sched.c 2021-02-24 16:56:24.612823601 +0100
4bf69007
AM
21024@@ -0,0 +1,83 @@
21025+/*
21026+ * linux/kernel/vserver/sched.c
21027+ *
21028+ * Virtual Server: Scheduler Support
21029+ *
c2806d43 21030+ * Copyright (C) 2004-2010 Herbert P?tzl
4bf69007
AM
21031+ *
21032+ * V0.01 adapted Sam Vilains version to 2.6.3
21033+ * V0.02 removed legacy interface
21034+ * V0.03 changed vcmds to vxi arg
21035+ * V0.04 removed older and legacy interfaces
21036+ * V0.05 removed scheduler code/commands
21037+ *
21038+ */
21039+
21040+#include <linux/vs_context.h>
21041+#include <linux/vs_sched.h>
21042+#include <linux/cpumask.h>
21043+#include <linux/vserver/sched_cmd.h>
2380c486 21044+
4bf69007
AM
21045+#include <asm/uaccess.h>
21046+
21047+
21048+void vx_update_sched_param(struct _vx_sched *sched,
21049+ struct _vx_sched_pc *sched_pc)
2380c486 21050+{
4bf69007 21051+ sched_pc->prio_bias = sched->prio_bias;
2380c486
JR
21052+}
21053+
4bf69007
AM
21054+static int do_set_prio_bias(struct vx_info *vxi, struct vcmd_prio_bias *data)
21055+{
21056+ int cpu;
2380c486 21057+
4bf69007
AM
21058+ if (data->prio_bias > MAX_PRIO_BIAS)
21059+ data->prio_bias = MAX_PRIO_BIAS;
21060+ if (data->prio_bias < MIN_PRIO_BIAS)
21061+ data->prio_bias = MIN_PRIO_BIAS;
2380c486 21062+
4bf69007 21063+ if (data->cpu_id != ~0) {
927ca606 21064+ vxi->sched.update = *get_cpu_mask(data->cpu_id);
4bf69007
AM
21065+ cpumask_and(&vxi->sched.update, &vxi->sched.update,
21066+ cpu_online_mask);
21067+ } else
21068+ cpumask_copy(&vxi->sched.update, cpu_online_mask);
2380c486 21069+
927ca606 21070+ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)vxi->sched.update)
4bf69007
AM
21071+ vx_update_sched_param(&vxi->sched,
21072+ &vx_per_cpu(vxi, sched_pc, cpu));
21073+ return 0;
21074+}
2380c486 21075+
4bf69007
AM
21076+int vc_set_prio_bias(struct vx_info *vxi, void __user *data)
21077+{
21078+ struct vcmd_prio_bias vc_data;
d337f35e 21079+
4bf69007
AM
21080+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21081+ return -EFAULT;
d337f35e 21082+
4bf69007
AM
21083+ return do_set_prio_bias(vxi, &vc_data);
21084+}
d337f35e 21085+
4bf69007
AM
21086+int vc_get_prio_bias(struct vx_info *vxi, void __user *data)
21087+{
21088+ struct vcmd_prio_bias vc_data;
21089+ struct _vx_sched_pc *pcd;
21090+ int cpu;
d337f35e 21091+
4bf69007
AM
21092+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21093+ return -EFAULT;
2380c486 21094+
4bf69007 21095+ cpu = vc_data.cpu_id;
d337f35e 21096+
4bf69007
AM
21097+ if (!cpu_possible(cpu))
21098+ return -EINVAL;
d337f35e 21099+
4bf69007
AM
21100+ pcd = &vx_per_cpu(vxi, sched_pc, cpu);
21101+ vc_data.prio_bias = pcd->prio_bias;
d337f35e 21102+
4bf69007
AM
21103+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21104+ return -EFAULT;
21105+ return 0;
21106+}
d337f35e 21107+
c2806d43
AM
21108diff -urNp -x '*.orig' linux-4.4/kernel/vserver/sched_init.h linux-4.4/kernel/vserver/sched_init.h
21109--- linux-4.4/kernel/vserver/sched_init.h 1970-01-01 01:00:00.000000000 +0100
21110+++ linux-4.4/kernel/vserver/sched_init.h 2021-02-24 16:56:24.612823601 +0100
4bf69007 21111@@ -0,0 +1,27 @@
2380c486 21112+
4bf69007
AM
21113+static inline void vx_info_init_sched(struct _vx_sched *sched)
21114+{
21115+ /* scheduling; hard code starting values as constants */
21116+ sched->prio_bias = 0;
d337f35e
JR
21117+}
21118+
4bf69007
AM
21119+static inline
21120+void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
e3afe727 21121+{
4bf69007
AM
21122+ sched_pc->prio_bias = 0;
21123+
21124+ sched_pc->user_ticks = 0;
21125+ sched_pc->sys_ticks = 0;
21126+ sched_pc->hold_ticks = 0;
e3afe727
AM
21127+}
21128+
4bf69007 21129+static inline void vx_info_exit_sched(struct _vx_sched *sched)
e3afe727 21130+{
4bf69007 21131+ return;
e3afe727
AM
21132+}
21133+
4bf69007
AM
21134+static inline
21135+void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
e3afe727 21136+{
4bf69007 21137+ return;
e3afe727 21138+}
c2806d43
AM
21139diff -urNp -x '*.orig' linux-4.4/kernel/vserver/sched_proc.h linux-4.4/kernel/vserver/sched_proc.h
21140--- linux-4.4/kernel/vserver/sched_proc.h 1970-01-01 01:00:00.000000000 +0100
21141+++ linux-4.4/kernel/vserver/sched_proc.h 2021-02-24 16:56:24.612823601 +0100
4bf69007
AM
21142@@ -0,0 +1,32 @@
21143+#ifndef _VX_SCHED_PROC_H
21144+#define _VX_SCHED_PROC_H
e3afe727 21145+
4bf69007
AM
21146+
21147+static inline
21148+int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
e3afe727 21149+{
4bf69007
AM
21150+ int length = 0;
21151+
21152+ length += sprintf(buffer,
21153+ "PrioBias:\t%8d\n",
21154+ sched->prio_bias);
21155+ return length;
e3afe727
AM
21156+}
21157+
4bf69007
AM
21158+static inline
21159+int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
21160+ char *buffer, int cpu)
e3afe727 21161+{
4bf69007 21162+ int length = 0;
e3afe727 21163+
4bf69007
AM
21164+ length += sprintf(buffer + length,
21165+ "cpu %d: %lld %lld %lld", cpu,
21166+ (unsigned long long)sched_pc->user_ticks,
21167+ (unsigned long long)sched_pc->sys_ticks,
21168+ (unsigned long long)sched_pc->hold_ticks);
21169+ length += sprintf(buffer + length,
21170+ " %d\n", sched_pc->prio_bias);
21171+ return length;
21172+}
93de0823 21173+
4bf69007 21174+#endif /* _VX_SCHED_PROC_H */
c2806d43
AM
21175diff -urNp -x '*.orig' linux-4.4/kernel/vserver/signal.c linux-4.4/kernel/vserver/signal.c
21176--- linux-4.4/kernel/vserver/signal.c 1970-01-01 01:00:00.000000000 +0100
21177+++ linux-4.4/kernel/vserver/signal.c 2021-02-24 16:56:24.612823601 +0100
4bf69007
AM
21178@@ -0,0 +1,134 @@
21179+/*
21180+ * linux/kernel/vserver/signal.c
21181+ *
21182+ * Virtual Server: Signal Support
21183+ *
c2806d43 21184+ * Copyright (C) 2003-2007 Herbert P?tzl
4bf69007
AM
21185+ *
21186+ * V0.01 broken out from vcontext V0.05
21187+ * V0.02 changed vcmds to vxi arg
21188+ * V0.03 adjusted siginfo for kill
21189+ *
21190+ */
99a884b4 21191+
4bf69007 21192+#include <asm/uaccess.h>
93de0823 21193+
4bf69007
AM
21194+#include <linux/vs_context.h>
21195+#include <linux/vs_pid.h>
21196+#include <linux/vserver/signal_cmd.h>
d337f35e 21197+
d337f35e 21198+
4bf69007
AM
21199+int vx_info_kill(struct vx_info *vxi, int pid, int sig)
21200+{
21201+ int retval, count = 0;
21202+ struct task_struct *p;
21203+ struct siginfo *sip = SEND_SIG_PRIV;
d33d7b00 21204+
4bf69007
AM
21205+ retval = -ESRCH;
21206+ vxdprintk(VXD_CBIT(misc, 4),
21207+ "vx_info_kill(%p[#%d],%d,%d)*",
21208+ vxi, vxi->vx_id, pid, sig);
21209+ read_lock(&tasklist_lock);
21210+ switch (pid) {
21211+ case 0:
21212+ case -1:
21213+ for_each_process(p) {
21214+ int err = 0;
d337f35e 21215+
4bf69007
AM
21216+ if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
21217+ (pid && vxi->vx_initpid == p->pid))
21218+ continue;
d337f35e 21219+
4bf69007
AM
21220+ err = group_send_sig_info(sig, sip, p);
21221+ ++count;
21222+ if (err != -EPERM)
21223+ retval = err;
21224+ }
21225+ break;
d337f35e 21226+
4bf69007
AM
21227+ case 1:
21228+ if (vxi->vx_initpid) {
21229+ pid = vxi->vx_initpid;
21230+ /* for now, only SIGINT to private init ... */
21231+ if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
21232+ /* ... as long as there are tasks left */
21233+ (atomic_read(&vxi->vx_tasks) > 1))
21234+ sig = SIGINT;
21235+ }
21236+ /* fallthrough */
21237+ default:
21238+ rcu_read_lock();
21239+ p = find_task_by_real_pid(pid);
21240+ rcu_read_unlock();
21241+ if (p) {
21242+ if (vx_task_xid(p) == vxi->vx_id)
21243+ retval = group_send_sig_info(sig, sip, p);
21244+ }
21245+ break;
21246+ }
21247+ read_unlock(&tasklist_lock);
21248+ vxdprintk(VXD_CBIT(misc, 4),
21249+ "vx_info_kill(%p[#%d],%d,%d,%ld) = %d",
21250+ vxi, vxi->vx_id, pid, sig, (long)sip, retval);
21251+ return retval;
21252+}
d337f35e 21253+
4bf69007 21254+int vc_ctx_kill(struct vx_info *vxi, void __user *data)
d337f35e 21255+{
4bf69007 21256+ struct vcmd_ctx_kill_v0 vc_data;
d337f35e 21257+
4bf69007
AM
21258+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21259+ return -EFAULT;
d337f35e 21260+
4bf69007
AM
21261+ /* special check to allow guest shutdown */
21262+ if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
21263+ /* forbid killall pid=0 when init is present */
21264+ (((vc_data.pid < 1) && vxi->vx_initpid) ||
21265+ (vc_data.pid > 1)))
21266+ return -EACCES;
21267+
21268+ return vx_info_kill(vxi, vc_data.pid, vc_data.sig);
d337f35e
JR
21269+}
21270+
4bf69007
AM
21271+
21272+static int __wait_exit(struct vx_info *vxi)
d337f35e 21273+{
4bf69007
AM
21274+ DECLARE_WAITQUEUE(wait, current);
21275+ int ret = 0;
d337f35e 21276+
4bf69007
AM
21277+ add_wait_queue(&vxi->vx_wait, &wait);
21278+ set_current_state(TASK_INTERRUPTIBLE);
d337f35e 21279+
4bf69007
AM
21280+wait:
21281+ if (vx_info_state(vxi,
21282+ VXS_SHUTDOWN | VXS_HASHED | VXS_HELPER) == VXS_SHUTDOWN)
21283+ goto out;
21284+ if (signal_pending(current)) {
21285+ ret = -ERESTARTSYS;
21286+ goto out;
21287+ }
21288+ schedule();
21289+ goto wait;
21290+
21291+out:
21292+ set_current_state(TASK_RUNNING);
21293+ remove_wait_queue(&vxi->vx_wait, &wait);
21294+ return ret;
d337f35e
JR
21295+}
21296+
4a036bed 21297+
7b17263b 21298+
4bf69007 21299+int vc_wait_exit(struct vx_info *vxi, void __user *data)
7b17263b 21300+{
4bf69007
AM
21301+ struct vcmd_wait_exit_v0 vc_data;
21302+ int ret;
7b17263b 21303+
4bf69007
AM
21304+ ret = __wait_exit(vxi);
21305+ vc_data.reboot_cmd = vxi->reboot_cmd;
21306+ vc_data.exit_code = vxi->exit_code;
21307+
21308+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21309+ ret = -EFAULT;
21310+ return ret;
7b17263b 21311+}
2380c486 21312+
c2806d43
AM
21313diff -urNp -x '*.orig' linux-4.4/kernel/vserver/space.c linux-4.4/kernel/vserver/space.c
21314--- linux-4.4/kernel/vserver/space.c 1970-01-01 01:00:00.000000000 +0100
21315+++ linux-4.4/kernel/vserver/space.c 2021-02-24 16:56:24.612823601 +0100
4bf69007
AM
21316@@ -0,0 +1,436 @@
21317+/*
21318+ * linux/kernel/vserver/space.c
21319+ *
21320+ * Virtual Server: Context Space Support
21321+ *
c2806d43 21322+ * Copyright (C) 2003-2010 Herbert P?tzl
4bf69007
AM
21323+ *
21324+ * V0.01 broken out from context.c 0.07
21325+ * V0.02 added task locking for namespace
21326+ * V0.03 broken out vx_enter_namespace
21327+ * V0.04 added *space support and commands
21328+ * V0.05 added credential support
21329+ *
21330+ */
21331+
21332+#include <linux/utsname.h>
21333+#include <linux/nsproxy.h>
21334+#include <linux/err.h>
21335+#include <linux/fs_struct.h>
21336+#include <linux/cred.h>
21337+#include <asm/uaccess.h>
d337f35e 21338+
d337f35e 21339+#include <linux/vs_context.h>
4bf69007
AM
21340+#include <linux/vserver/space.h>
21341+#include <linux/vserver/space_cmd.h>
2380c486 21342+
4bf69007
AM
21343+atomic_t vs_global_nsproxy = ATOMIC_INIT(0);
21344+atomic_t vs_global_fs = ATOMIC_INIT(0);
21345+atomic_t vs_global_mnt_ns = ATOMIC_INIT(0);
21346+atomic_t vs_global_uts_ns = ATOMIC_INIT(0);
21347+atomic_t vs_global_user_ns = ATOMIC_INIT(0);
21348+atomic_t vs_global_pid_ns = ATOMIC_INIT(0);
d337f35e 21349+
2380c486 21350+
4bf69007 21351+/* namespace functions */
2380c486 21352+
4bf69007
AM
21353+#include <linux/mnt_namespace.h>
21354+#include <linux/user_namespace.h>
21355+#include <linux/pid_namespace.h>
21356+#include <linux/ipc_namespace.h>
21357+#include <net/net_namespace.h>
21358+#include "../fs/mount.h"
2380c486 21359+
2380c486 21360+
4bf69007
AM
21361+static const struct vcmd_space_mask_v1 space_mask_v0 = {
21362+ .mask = CLONE_FS |
21363+ CLONE_NEWNS |
21364+#ifdef CONFIG_UTS_NS
21365+ CLONE_NEWUTS |
21366+#endif
21367+#ifdef CONFIG_IPC_NS
21368+ CLONE_NEWIPC |
21369+#endif
21370+#ifdef CONFIG_USER_NS
21371+ CLONE_NEWUSER |
21372+#endif
21373+ 0
21374+};
2380c486 21375+
4bf69007
AM
21376+static const struct vcmd_space_mask_v1 space_mask = {
21377+ .mask = CLONE_FS |
21378+ CLONE_NEWNS |
21379+#ifdef CONFIG_UTS_NS
21380+ CLONE_NEWUTS |
21381+#endif
21382+#ifdef CONFIG_IPC_NS
21383+ CLONE_NEWIPC |
21384+#endif
21385+#ifdef CONFIG_USER_NS
21386+ CLONE_NEWUSER |
21387+#endif
21388+#ifdef CONFIG_PID_NS
21389+ CLONE_NEWPID |
21390+#endif
21391+#ifdef CONFIG_NET_NS
21392+ CLONE_NEWNET |
21393+#endif
21394+ 0
21395+};
2380c486 21396+
4bf69007
AM
21397+static const struct vcmd_space_mask_v1 default_space_mask = {
21398+ .mask = CLONE_FS |
21399+ CLONE_NEWNS |
21400+#ifdef CONFIG_UTS_NS
21401+ CLONE_NEWUTS |
21402+#endif
21403+#ifdef CONFIG_IPC_NS
21404+ CLONE_NEWIPC |
21405+#endif
21406+#ifdef CONFIG_USER_NS
bb20add7 21407+// CLONE_NEWUSER |
4bf69007
AM
21408+#endif
21409+#ifdef CONFIG_PID_NS
21410+// CLONE_NEWPID |
21411+#endif
21412+ 0
21413+};
2380c486 21414+
4bf69007
AM
21415+/*
21416+ * build a new nsproxy mix
21417+ * assumes that both proxies are 'const'
21418+ * does not touch nsproxy refcounts
21419+ * will hold a reference on the result.
21420+ */
7b17263b 21421+
4bf69007
AM
21422+struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
21423+ struct nsproxy *new_nsproxy, unsigned long mask)
21424+{
21425+ struct mnt_namespace *old_ns;
21426+ struct uts_namespace *old_uts;
21427+ struct ipc_namespace *old_ipc;
21428+#ifdef CONFIG_PID_NS
21429+ struct pid_namespace *old_pid;
21430+#endif
21431+#ifdef CONFIG_NET_NS
21432+ struct net *old_net;
21433+#endif
21434+ struct nsproxy *nsproxy;
d337f35e 21435+
4bf69007
AM
21436+ nsproxy = copy_nsproxy(old_nsproxy);
21437+ if (!nsproxy)
21438+ goto out;
bd0a9c15 21439+
4bf69007
AM
21440+ if (mask & CLONE_NEWNS) {
21441+ old_ns = nsproxy->mnt_ns;
21442+ nsproxy->mnt_ns = new_nsproxy->mnt_ns;
21443+ if (nsproxy->mnt_ns)
21444+ get_mnt_ns(nsproxy->mnt_ns);
21445+ } else
21446+ old_ns = NULL;
d337f35e 21447+
4bf69007
AM
21448+ if (mask & CLONE_NEWUTS) {
21449+ old_uts = nsproxy->uts_ns;
21450+ nsproxy->uts_ns = new_nsproxy->uts_ns;
21451+ if (nsproxy->uts_ns)
21452+ get_uts_ns(nsproxy->uts_ns);
21453+ } else
21454+ old_uts = NULL;
2380c486 21455+
4bf69007
AM
21456+ if (mask & CLONE_NEWIPC) {
21457+ old_ipc = nsproxy->ipc_ns;
21458+ nsproxy->ipc_ns = new_nsproxy->ipc_ns;
21459+ if (nsproxy->ipc_ns)
21460+ get_ipc_ns(nsproxy->ipc_ns);
21461+ } else
21462+ old_ipc = NULL;
ec22aa5c 21463+
4bf69007
AM
21464+#ifdef CONFIG_PID_NS
21465+ if (mask & CLONE_NEWPID) {
5f23d63e
AM
21466+ old_pid = nsproxy->pid_ns_for_children;
21467+ nsproxy->pid_ns_for_children = new_nsproxy->pid_ns_for_children;
21468+ if (nsproxy->pid_ns_for_children)
21469+ get_pid_ns(nsproxy->pid_ns_for_children);
4bf69007
AM
21470+ } else
21471+ old_pid = NULL;
21472+#endif
21473+#ifdef CONFIG_NET_NS
21474+ if (mask & CLONE_NEWNET) {
21475+ old_net = nsproxy->net_ns;
21476+ nsproxy->net_ns = new_nsproxy->net_ns;
21477+ if (nsproxy->net_ns)
21478+ get_net(nsproxy->net_ns);
21479+ } else
21480+ old_net = NULL;
21481+#endif
21482+ if (old_ns)
21483+ put_mnt_ns(old_ns);
21484+ if (old_uts)
21485+ put_uts_ns(old_uts);
21486+ if (old_ipc)
21487+ put_ipc_ns(old_ipc);
21488+#ifdef CONFIG_PID_NS
21489+ if (old_pid)
21490+ put_pid_ns(old_pid);
21491+#endif
21492+#ifdef CONFIG_NET_NS
21493+ if (old_net)
21494+ put_net(old_net);
21495+#endif
21496+out:
21497+ return nsproxy;
21498+}
2380c486 21499+
bd0a9c15 21500+
4bf69007
AM
21501+/*
21502+ * merge two nsproxy structs into a new one.
21503+ * will hold a reference on the result.
21504+ */
d337f35e 21505+
4bf69007
AM
21506+static inline
21507+struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
21508+ struct nsproxy *proxy, unsigned long mask)
21509+{
21510+ struct nsproxy null_proxy = { .mnt_ns = NULL };
2380c486 21511+
4bf69007
AM
21512+ if (!proxy)
21513+ return NULL;
d337f35e 21514+
4bf69007
AM
21515+ if (mask) {
21516+ /* vs_mix_nsproxy returns with reference */
21517+ return vs_mix_nsproxy(old ? old : &null_proxy,
21518+ proxy, mask);
21519+ }
21520+ get_nsproxy(proxy);
21521+ return proxy;
21522+}
2380c486 21523+
ec22aa5c 21524+
4bf69007
AM
21525+int vx_enter_space(struct vx_info *vxi, unsigned long mask, unsigned index)
21526+{
21527+ struct nsproxy *proxy, *proxy_cur, *proxy_new;
21528+ struct fs_struct *fs_cur, *fs = NULL;
21529+ struct _vx_space *space;
21530+ int ret, kill = 0;
2380c486 21531+
4bf69007
AM
21532+ vxdprintk(VXD_CBIT(space, 8), "vx_enter_space(%p[#%u],0x%08lx,%d)",
21533+ vxi, vxi->vx_id, mask, index);
2380c486 21534+
4bf69007
AM
21535+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
21536+ return -EACCES;
2380c486 21537+
4bf69007
AM
21538+ if (index >= VX_SPACES)
21539+ return -EINVAL;
2380c486 21540+
4bf69007
AM
21541+ space = &vxi->space[index];
21542+
21543+ if (!mask)
21544+ mask = space->vx_nsmask;
21545+
21546+ if ((mask & space->vx_nsmask) != mask)
21547+ return -EINVAL;
21548+
21549+ if (mask & CLONE_FS) {
21550+ fs = copy_fs_struct(space->vx_fs);
21551+ if (!fs)
21552+ return -ENOMEM;
2380c486 21553+ }
4bf69007
AM
21554+ proxy = space->vx_nsproxy;
21555+
21556+ vxdprintk(VXD_CBIT(space, 9),
21557+ "vx_enter_space(%p[#%u],0x%08lx,%d) -> (%p,%p)",
21558+ vxi, vxi->vx_id, mask, index, proxy, fs);
21559+
21560+ task_lock(current);
21561+ fs_cur = current->fs;
21562+
21563+ if (mask & CLONE_FS) {
21564+ spin_lock(&fs_cur->lock);
21565+ current->fs = fs;
21566+ kill = !--fs_cur->users;
21567+ spin_unlock(&fs_cur->lock);
ec22aa5c 21568+ }
ec22aa5c 21569+
4bf69007
AM
21570+ proxy_cur = current->nsproxy;
21571+ get_nsproxy(proxy_cur);
21572+ task_unlock(current);
21573+
21574+ if (kill)
21575+ free_fs_struct(fs_cur);
21576+
21577+ proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
21578+ if (IS_ERR(proxy_new)) {
21579+ ret = PTR_ERR(proxy_new);
21580+ goto out_put;
eab5a9a6 21581+ }
4bf69007
AM
21582+
21583+ proxy_new = xchg(&current->nsproxy, proxy_new);
21584+
21585+ if (mask & CLONE_NEWUSER) {
21586+ struct cred *cred;
21587+
21588+ vxdprintk(VXD_CBIT(space, 10),
21589+ "vx_enter_space(%p[#%u],%p) cred (%p,%p)",
21590+ vxi, vxi->vx_id, space->vx_cred,
21591+ current->real_cred, current->cred);
21592+
21593+ if (space->vx_cred) {
21594+ cred = __prepare_creds(space->vx_cred);
21595+ if (cred)
21596+ commit_creds(cred);
21597+ }
d337f35e 21598+ }
4bf69007
AM
21599+
21600+ ret = 0;
21601+
21602+ if (proxy_new)
21603+ put_nsproxy(proxy_new);
21604+out_put:
21605+ if (proxy_cur)
21606+ put_nsproxy(proxy_cur);
21607+ return ret;
21608+}
21609+
21610+
21611+int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index)
21612+{
21613+ struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
21614+ struct fs_struct *fs_vxi, *fs = NULL;
21615+ struct _vx_space *space;
21616+ int ret, kill = 0;
21617+
21618+ vxdprintk(VXD_CBIT(space, 8), "vx_set_space(%p[#%u],0x%08lx,%d)",
21619+ vxi, vxi->vx_id, mask, index);
21620+
21621+ if ((mask & space_mask.mask) != mask)
21622+ return -EINVAL;
21623+
21624+ if (index >= VX_SPACES)
21625+ return -EINVAL;
21626+
21627+ space = &vxi->space[index];
21628+
21629+ proxy_vxi = space->vx_nsproxy;
21630+ fs_vxi = space->vx_fs;
21631+
21632+ if (mask & CLONE_FS) {
21633+ fs = copy_fs_struct(current->fs);
21634+ if (!fs)
21635+ return -ENOMEM;
2380c486 21636+ }
d337f35e 21637+
4bf69007 21638+ task_lock(current);
2ba6f0dd 21639+
4bf69007
AM
21640+ if (mask & CLONE_FS) {
21641+ spin_lock(&fs_vxi->lock);
21642+ space->vx_fs = fs;
21643+ kill = !--fs_vxi->users;
21644+ spin_unlock(&fs_vxi->lock);
21645+ }
2ba6f0dd 21646+
4bf69007
AM
21647+ proxy_cur = current->nsproxy;
21648+ get_nsproxy(proxy_cur);
21649+ task_unlock(current);
2ba6f0dd 21650+
4bf69007
AM
21651+ if (kill)
21652+ free_fs_struct(fs_vxi);
2ba6f0dd 21653+
4bf69007
AM
21654+ proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
21655+ if (IS_ERR(proxy_new)) {
21656+ ret = PTR_ERR(proxy_new);
21657+ goto out_put;
21658+ }
2ba6f0dd 21659+
4bf69007
AM
21660+ proxy_new = xchg(&space->vx_nsproxy, proxy_new);
21661+ space->vx_nsmask |= mask;
2ba6f0dd 21662+
4bf69007
AM
21663+ if (mask & CLONE_NEWUSER) {
21664+ struct cred *cred;
2ba6f0dd 21665+
4bf69007
AM
21666+ vxdprintk(VXD_CBIT(space, 10),
21667+ "vx_set_space(%p[#%u],%p) cred (%p,%p)",
21668+ vxi, vxi->vx_id, space->vx_cred,
21669+ current->real_cred, current->cred);
2ba6f0dd 21670+
4bf69007
AM
21671+ cred = prepare_creds();
21672+ cred = (struct cred *)xchg(&space->vx_cred, cred);
21673+ if (cred)
21674+ abort_creds(cred);
21675+ }
2ba6f0dd 21676+
4bf69007 21677+ ret = 0;
2ba6f0dd 21678+
4bf69007
AM
21679+ if (proxy_new)
21680+ put_nsproxy(proxy_new);
21681+out_put:
21682+ if (proxy_cur)
21683+ put_nsproxy(proxy_cur);
21684+ return ret;
21685+}
2ba6f0dd
AM
21686+
21687+
4bf69007
AM
21688+int vc_enter_space_v1(struct vx_info *vxi, void __user *data)
21689+{
21690+ struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
2ba6f0dd 21691+
4bf69007
AM
21692+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21693+ return -EFAULT;
2ba6f0dd 21694+
4bf69007
AM
21695+ return vx_enter_space(vxi, vc_data.mask, 0);
21696+}
2ba6f0dd 21697+
4bf69007
AM
21698+int vc_enter_space(struct vx_info *vxi, void __user *data)
21699+{
21700+ struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
2ba6f0dd 21701+
4bf69007
AM
21702+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21703+ return -EFAULT;
2ba6f0dd 21704+
4bf69007
AM
21705+ if (vc_data.index >= VX_SPACES)
21706+ return -EINVAL;
2ba6f0dd 21707+
4bf69007
AM
21708+ return vx_enter_space(vxi, vc_data.mask, vc_data.index);
21709+}
2ba6f0dd 21710+
4bf69007
AM
21711+int vc_set_space_v1(struct vx_info *vxi, void __user *data)
21712+{
21713+ struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
2ba6f0dd 21714+
4bf69007
AM
21715+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21716+ return -EFAULT;
2ba6f0dd 21717+
4bf69007
AM
21718+ return vx_set_space(vxi, vc_data.mask, 0);
21719+}
2ba6f0dd 21720+
4bf69007
AM
21721+int vc_set_space(struct vx_info *vxi, void __user *data)
21722+{
21723+ struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
2ba6f0dd 21724+
4bf69007
AM
21725+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21726+ return -EFAULT;
2ba6f0dd 21727+
4bf69007
AM
21728+ if (vc_data.index >= VX_SPACES)
21729+ return -EINVAL;
2ba6f0dd 21730+
4bf69007
AM
21731+ return vx_set_space(vxi, vc_data.mask, vc_data.index);
21732+}
2ba6f0dd 21733+
4bf69007
AM
21734+int vc_get_space_mask(void __user *data, int type)
21735+{
21736+ const struct vcmd_space_mask_v1 *mask;
2ba6f0dd 21737+
4bf69007
AM
21738+ if (type == 0)
21739+ mask = &space_mask_v0;
21740+ else if (type == 1)
21741+ mask = &space_mask;
21742+ else
21743+ mask = &default_space_mask;
2ba6f0dd 21744+
4bf69007
AM
21745+ vxdprintk(VXD_CBIT(space, 10),
21746+ "vc_get_space_mask(%d) = %08llx", type, mask->mask);
2ba6f0dd 21747+
4bf69007
AM
21748+ if (copy_to_user(data, mask, sizeof(*mask)))
21749+ return -EFAULT;
21750+ return 0;
21751+}
2ba6f0dd 21752+
c2806d43
AM
21753diff -urNp -x '*.orig' linux-4.4/kernel/vserver/switch.c linux-4.4/kernel/vserver/switch.c
21754--- linux-4.4/kernel/vserver/switch.c 1970-01-01 01:00:00.000000000 +0100
21755+++ linux-4.4/kernel/vserver/switch.c 2021-02-24 16:56:24.612823601 +0100
4bf69007
AM
21756@@ -0,0 +1,556 @@
21757+/*
21758+ * linux/kernel/vserver/switch.c
21759+ *
21760+ * Virtual Server: Syscall Switch
21761+ *
c2806d43 21762+ * Copyright (C) 2003-2011 Herbert P?tzl
4bf69007
AM
21763+ *
21764+ * V0.01 syscall switch
21765+ * V0.02 added signal to context
21766+ * V0.03 added rlimit functions
21767+ * V0.04 added iattr, task/xid functions
21768+ * V0.05 added debug/history stuff
21769+ * V0.06 added compat32 layer
21770+ * V0.07 vcmd args and perms
21771+ * V0.08 added status commands
21772+ * V0.09 added tag commands
21773+ * V0.10 added oom bias
21774+ * V0.11 added device commands
21775+ * V0.12 added warn mask
21776+ *
21777+ */
2ba6f0dd 21778+
4bf69007
AM
21779+#include <linux/vs_context.h>
21780+#include <linux/vs_network.h>
21781+#include <linux/vserver/switch.h>
2ba6f0dd 21782+
4bf69007 21783+#include "vci_config.h"
2ba6f0dd 21784+
2ba6f0dd 21785+
4bf69007
AM
21786+static inline
21787+int vc_get_version(uint32_t id)
21788+{
21789+ return VCI_VERSION;
21790+}
2ba6f0dd 21791+
4bf69007
AM
21792+static inline
21793+int vc_get_vci(uint32_t id)
21794+{
21795+ return vci_kernel_config();
21796+}
2ba6f0dd 21797+
4bf69007
AM
21798+#include <linux/vserver/context_cmd.h>
21799+#include <linux/vserver/cvirt_cmd.h>
21800+#include <linux/vserver/cacct_cmd.h>
21801+#include <linux/vserver/limit_cmd.h>
21802+#include <linux/vserver/network_cmd.h>
21803+#include <linux/vserver/sched_cmd.h>
21804+#include <linux/vserver/debug_cmd.h>
21805+#include <linux/vserver/inode_cmd.h>
21806+#include <linux/vserver/dlimit_cmd.h>
21807+#include <linux/vserver/signal_cmd.h>
21808+#include <linux/vserver/space_cmd.h>
21809+#include <linux/vserver/tag_cmd.h>
21810+#include <linux/vserver/device_cmd.h>
2ba6f0dd 21811+
4bf69007
AM
21812+#include <linux/vserver/inode.h>
21813+#include <linux/vserver/dlimit.h>
2ba6f0dd 21814+
2ba6f0dd 21815+
4bf69007
AM
21816+#ifdef CONFIG_COMPAT
21817+#define __COMPAT(name, id, data, compat) \
21818+ (compat) ? name ## _x32(id, data) : name(id, data)
21819+#define __COMPAT_NO_ID(name, data, compat) \
21820+ (compat) ? name ## _x32(data) : name(data)
21821+#else
21822+#define __COMPAT(name, id, data, compat) \
21823+ name(id, data)
21824+#define __COMPAT_NO_ID(name, data, compat) \
21825+ name(data)
21826+#endif
2ba6f0dd 21827+
2ba6f0dd 21828+
4bf69007
AM
21829+static inline
21830+long do_vcmd(uint32_t cmd, uint32_t id,
21831+ struct vx_info *vxi, struct nx_info *nxi,
21832+ void __user *data, int compat)
21833+{
21834+ switch (cmd) {
2ba6f0dd 21835+
4bf69007
AM
21836+ case VCMD_get_version:
21837+ return vc_get_version(id);
21838+ case VCMD_get_vci:
21839+ return vc_get_vci(id);
2ba6f0dd 21840+
4bf69007
AM
21841+ case VCMD_task_xid:
21842+ return vc_task_xid(id);
21843+ case VCMD_vx_info:
21844+ return vc_vx_info(vxi, data);
2ba6f0dd 21845+
4bf69007
AM
21846+ case VCMD_task_nid:
21847+ return vc_task_nid(id);
21848+ case VCMD_nx_info:
21849+ return vc_nx_info(nxi, data);
2ba6f0dd 21850+
4bf69007
AM
21851+ case VCMD_task_tag:
21852+ return vc_task_tag(id);
2ba6f0dd 21853+
4bf69007
AM
21854+ case VCMD_set_space_v1:
21855+ return vc_set_space_v1(vxi, data);
21856+ /* this is version 2 */
21857+ case VCMD_set_space:
21858+ return vc_set_space(vxi, data);
2ba6f0dd 21859+
4bf69007
AM
21860+ case VCMD_get_space_mask_v0:
21861+ return vc_get_space_mask(data, 0);
21862+ /* this is version 1 */
21863+ case VCMD_get_space_mask:
21864+ return vc_get_space_mask(data, 1);
2ba6f0dd 21865+
4bf69007
AM
21866+ case VCMD_get_space_default:
21867+ return vc_get_space_mask(data, -1);
2ba6f0dd 21868+
4bf69007
AM
21869+ case VCMD_set_umask:
21870+ return vc_set_umask(vxi, data);
2ba6f0dd 21871+
4bf69007
AM
21872+ case VCMD_get_umask:
21873+ return vc_get_umask(vxi, data);
2ba6f0dd 21874+
4bf69007
AM
21875+ case VCMD_set_wmask:
21876+ return vc_set_wmask(vxi, data);
2ba6f0dd 21877+
4bf69007
AM
21878+ case VCMD_get_wmask:
21879+ return vc_get_wmask(vxi, data);
21880+#ifdef CONFIG_IA32_EMULATION
21881+ case VCMD_get_rlimit:
21882+ return __COMPAT(vc_get_rlimit, vxi, data, compat);
21883+ case VCMD_set_rlimit:
21884+ return __COMPAT(vc_set_rlimit, vxi, data, compat);
21885+#else
21886+ case VCMD_get_rlimit:
21887+ return vc_get_rlimit(vxi, data);
21888+ case VCMD_set_rlimit:
21889+ return vc_set_rlimit(vxi, data);
21890+#endif
21891+ case VCMD_get_rlimit_mask:
21892+ return vc_get_rlimit_mask(id, data);
21893+ case VCMD_reset_hits:
21894+ return vc_reset_hits(vxi, data);
21895+ case VCMD_reset_minmax:
21896+ return vc_reset_minmax(vxi, data);
2ba6f0dd 21897+
4bf69007
AM
21898+ case VCMD_get_vhi_name:
21899+ return vc_get_vhi_name(vxi, data);
21900+ case VCMD_set_vhi_name:
21901+ return vc_set_vhi_name(vxi, data);
2ba6f0dd 21902+
4bf69007
AM
21903+ case VCMD_ctx_stat:
21904+ return vc_ctx_stat(vxi, data);
21905+ case VCMD_virt_stat:
21906+ return vc_virt_stat(vxi, data);
21907+ case VCMD_sock_stat:
21908+ return vc_sock_stat(vxi, data);
21909+ case VCMD_rlimit_stat:
21910+ return vc_rlimit_stat(vxi, data);
2ba6f0dd 21911+
4bf69007
AM
21912+ case VCMD_set_cflags:
21913+ return vc_set_cflags(vxi, data);
21914+ case VCMD_get_cflags:
21915+ return vc_get_cflags(vxi, data);
2ba6f0dd 21916+
4bf69007
AM
21917+ /* this is version 1 */
21918+ case VCMD_set_ccaps:
21919+ return vc_set_ccaps(vxi, data);
21920+ /* this is version 1 */
21921+ case VCMD_get_ccaps:
21922+ return vc_get_ccaps(vxi, data);
21923+ case VCMD_set_bcaps:
21924+ return vc_set_bcaps(vxi, data);
21925+ case VCMD_get_bcaps:
21926+ return vc_get_bcaps(vxi, data);
2ba6f0dd 21927+
4bf69007
AM
21928+ case VCMD_set_badness:
21929+ return vc_set_badness(vxi, data);
21930+ case VCMD_get_badness:
21931+ return vc_get_badness(vxi, data);
2ba6f0dd 21932+
4bf69007
AM
21933+ case VCMD_set_nflags:
21934+ return vc_set_nflags(nxi, data);
21935+ case VCMD_get_nflags:
21936+ return vc_get_nflags(nxi, data);
2ba6f0dd 21937+
4bf69007
AM
21938+ case VCMD_set_ncaps:
21939+ return vc_set_ncaps(nxi, data);
21940+ case VCMD_get_ncaps:
21941+ return vc_get_ncaps(nxi, data);
2ba6f0dd 21942+
4bf69007
AM
21943+ case VCMD_set_prio_bias:
21944+ return vc_set_prio_bias(vxi, data);
21945+ case VCMD_get_prio_bias:
21946+ return vc_get_prio_bias(vxi, data);
21947+ case VCMD_add_dlimit:
21948+ return __COMPAT(vc_add_dlimit, id, data, compat);
21949+ case VCMD_rem_dlimit:
21950+ return __COMPAT(vc_rem_dlimit, id, data, compat);
21951+ case VCMD_set_dlimit:
21952+ return __COMPAT(vc_set_dlimit, id, data, compat);
21953+ case VCMD_get_dlimit:
21954+ return __COMPAT(vc_get_dlimit, id, data, compat);
2ba6f0dd 21955+
4bf69007
AM
21956+ case VCMD_ctx_kill:
21957+ return vc_ctx_kill(vxi, data);
2ba6f0dd 21958+
4bf69007
AM
21959+ case VCMD_wait_exit:
21960+ return vc_wait_exit(vxi, data);
2ba6f0dd 21961+
4bf69007
AM
21962+ case VCMD_get_iattr:
21963+ return __COMPAT_NO_ID(vc_get_iattr, data, compat);
21964+ case VCMD_set_iattr:
21965+ return __COMPAT_NO_ID(vc_set_iattr, data, compat);
2ba6f0dd 21966+
4bf69007
AM
21967+ case VCMD_fget_iattr:
21968+ return vc_fget_iattr(id, data);
21969+ case VCMD_fset_iattr:
21970+ return vc_fset_iattr(id, data);
2ba6f0dd 21971+
4bf69007
AM
21972+ case VCMD_enter_space_v0:
21973+ return vc_enter_space_v1(vxi, NULL);
21974+ case VCMD_enter_space_v1:
21975+ return vc_enter_space_v1(vxi, data);
21976+ /* this is version 2 */
21977+ case VCMD_enter_space:
21978+ return vc_enter_space(vxi, data);
2ba6f0dd 21979+
4bf69007
AM
21980+ case VCMD_ctx_create_v0:
21981+ return vc_ctx_create(id, NULL);
21982+ case VCMD_ctx_create:
21983+ return vc_ctx_create(id, data);
21984+ case VCMD_ctx_migrate_v0:
21985+ return vc_ctx_migrate(vxi, NULL);
21986+ case VCMD_ctx_migrate:
21987+ return vc_ctx_migrate(vxi, data);
2ba6f0dd 21988+
4bf69007
AM
21989+ case VCMD_net_create_v0:
21990+ return vc_net_create(id, NULL);
21991+ case VCMD_net_create:
21992+ return vc_net_create(id, data);
21993+ case VCMD_net_migrate:
21994+ return vc_net_migrate(nxi, data);
2ba6f0dd 21995+
4bf69007
AM
21996+ case VCMD_tag_migrate:
21997+ return vc_tag_migrate(id);
2ba6f0dd 21998+
4bf69007
AM
21999+ case VCMD_net_add:
22000+ return vc_net_add(nxi, data);
22001+ case VCMD_net_remove:
22002+ return vc_net_remove(nxi, data);
2ba6f0dd 22003+
4bf69007
AM
22004+ case VCMD_net_add_ipv4_v1:
22005+ return vc_net_add_ipv4_v1(nxi, data);
22006+ /* this is version 2 */
22007+ case VCMD_net_add_ipv4:
22008+ return vc_net_add_ipv4(nxi, data);
2ba6f0dd 22009+
4bf69007
AM
22010+ case VCMD_net_rem_ipv4_v1:
22011+ return vc_net_rem_ipv4_v1(nxi, data);
22012+ /* this is version 2 */
22013+ case VCMD_net_rem_ipv4:
22014+ return vc_net_rem_ipv4(nxi, data);
22015+#ifdef CONFIG_IPV6
22016+ case VCMD_net_add_ipv6:
22017+ return vc_net_add_ipv6(nxi, data);
22018+ case VCMD_net_remove_ipv6:
22019+ return vc_net_remove_ipv6(nxi, data);
22020+#endif
22021+/* case VCMD_add_match_ipv4:
22022+ return vc_add_match_ipv4(nxi, data);
22023+ case VCMD_get_match_ipv4:
22024+ return vc_get_match_ipv4(nxi, data);
22025+#ifdef CONFIG_IPV6
22026+ case VCMD_add_match_ipv6:
22027+ return vc_add_match_ipv6(nxi, data);
22028+ case VCMD_get_match_ipv6:
22029+ return vc_get_match_ipv6(nxi, data);
22030+#endif */
2ba6f0dd 22031+
4bf69007
AM
22032+#ifdef CONFIG_VSERVER_DEVICE
22033+ case VCMD_set_mapping:
22034+ return __COMPAT(vc_set_mapping, vxi, data, compat);
22035+ case VCMD_unset_mapping:
22036+ return __COMPAT(vc_unset_mapping, vxi, data, compat);
22037+#endif
22038+#ifdef CONFIG_VSERVER_HISTORY
22039+ case VCMD_dump_history:
22040+ return vc_dump_history(id);
22041+ case VCMD_read_history:
22042+ return __COMPAT(vc_read_history, id, data, compat);
22043+#endif
22044+ default:
22045+ vxwprintk_task(1, "unimplemented VCMD_%02d_%d[%d]",
22046+ VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
22047+ }
22048+ return -ENOSYS;
22049+}
2ba6f0dd 22050+
2ba6f0dd 22051+
4bf69007
AM
22052+#define __VCMD(vcmd, _perm, _args, _flags) \
22053+ case VCMD_ ## vcmd: perm = _perm; \
22054+ args = _args; flags = _flags; break
2ba6f0dd 22055+
2ba6f0dd 22056+
4bf69007
AM
22057+#define VCA_NONE 0x00
22058+#define VCA_VXI 0x01
22059+#define VCA_NXI 0x02
2ba6f0dd 22060+
4bf69007
AM
22061+#define VCF_NONE 0x00
22062+#define VCF_INFO 0x01
22063+#define VCF_ADMIN 0x02
22064+#define VCF_ARES 0x06 /* includes admin */
22065+#define VCF_SETUP 0x08
2ba6f0dd 22066+
4bf69007 22067+#define VCF_ZIDOK 0x10 /* zero id okay */
2ba6f0dd 22068+
2ba6f0dd
AM
22069+
22070+static inline
4bf69007 22071+long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
2ba6f0dd 22072+{
4bf69007
AM
22073+ long ret;
22074+ int permit = -1, state = 0;
22075+ int perm = -1, args = 0, flags = 0;
22076+ struct vx_info *vxi = NULL;
22077+ struct nx_info *nxi = NULL;
2ba6f0dd 22078+
4bf69007
AM
22079+ switch (cmd) {
22080+ /* unpriviledged commands */
22081+ __VCMD(get_version, 0, VCA_NONE, 0);
22082+ __VCMD(get_vci, 0, VCA_NONE, 0);
22083+ __VCMD(get_rlimit_mask, 0, VCA_NONE, 0);
22084+ __VCMD(get_space_mask_v0,0, VCA_NONE, 0);
22085+ __VCMD(get_space_mask, 0, VCA_NONE, 0);
22086+ __VCMD(get_space_default,0, VCA_NONE, 0);
2ba6f0dd 22087+
4bf69007
AM
22088+ /* info commands */
22089+ __VCMD(task_xid, 2, VCA_NONE, 0);
22090+ __VCMD(reset_hits, 2, VCA_VXI, 0);
22091+ __VCMD(reset_minmax, 2, VCA_VXI, 0);
22092+ __VCMD(vx_info, 3, VCA_VXI, VCF_INFO);
22093+ __VCMD(get_bcaps, 3, VCA_VXI, VCF_INFO);
22094+ __VCMD(get_ccaps, 3, VCA_VXI, VCF_INFO);
22095+ __VCMD(get_cflags, 3, VCA_VXI, VCF_INFO);
22096+ __VCMD(get_umask, 3, VCA_VXI, VCF_INFO);
22097+ __VCMD(get_wmask, 3, VCA_VXI, VCF_INFO);
22098+ __VCMD(get_badness, 3, VCA_VXI, VCF_INFO);
22099+ __VCMD(get_vhi_name, 3, VCA_VXI, VCF_INFO);
22100+ __VCMD(get_rlimit, 3, VCA_VXI, VCF_INFO);
2ba6f0dd 22101+
4bf69007
AM
22102+ __VCMD(ctx_stat, 3, VCA_VXI, VCF_INFO);
22103+ __VCMD(virt_stat, 3, VCA_VXI, VCF_INFO);
22104+ __VCMD(sock_stat, 3, VCA_VXI, VCF_INFO);
22105+ __VCMD(rlimit_stat, 3, VCA_VXI, VCF_INFO);
2ba6f0dd 22106+
4bf69007
AM
22107+ __VCMD(task_nid, 2, VCA_NONE, 0);
22108+ __VCMD(nx_info, 3, VCA_NXI, VCF_INFO);
22109+ __VCMD(get_ncaps, 3, VCA_NXI, VCF_INFO);
22110+ __VCMD(get_nflags, 3, VCA_NXI, VCF_INFO);
2ba6f0dd 22111+
4bf69007 22112+ __VCMD(task_tag, 2, VCA_NONE, 0);
2ba6f0dd 22113+
4bf69007
AM
22114+ __VCMD(get_iattr, 2, VCA_NONE, 0);
22115+ __VCMD(fget_iattr, 2, VCA_NONE, 0);
22116+ __VCMD(get_dlimit, 3, VCA_NONE, VCF_INFO);
22117+ __VCMD(get_prio_bias, 3, VCA_VXI, VCF_INFO);
2ba6f0dd 22118+
4bf69007
AM
22119+ /* lower admin commands */
22120+ __VCMD(wait_exit, 4, VCA_VXI, VCF_INFO);
22121+ __VCMD(ctx_create_v0, 5, VCA_NONE, 0);
22122+ __VCMD(ctx_create, 5, VCA_NONE, 0);
22123+ __VCMD(ctx_migrate_v0, 5, VCA_VXI, VCF_ADMIN);
22124+ __VCMD(ctx_migrate, 5, VCA_VXI, VCF_ADMIN);
22125+ __VCMD(enter_space_v0, 5, VCA_VXI, VCF_ADMIN);
22126+ __VCMD(enter_space_v1, 5, VCA_VXI, VCF_ADMIN);
22127+ __VCMD(enter_space, 5, VCA_VXI, VCF_ADMIN);
2ba6f0dd 22128+
4bf69007
AM
22129+ __VCMD(net_create_v0, 5, VCA_NONE, 0);
22130+ __VCMD(net_create, 5, VCA_NONE, 0);
22131+ __VCMD(net_migrate, 5, VCA_NXI, VCF_ADMIN);
2ba6f0dd 22132+
4bf69007 22133+ __VCMD(tag_migrate, 5, VCA_NONE, VCF_ADMIN);
2ba6f0dd 22134+
4bf69007
AM
22135+ /* higher admin commands */
22136+ __VCMD(ctx_kill, 6, VCA_VXI, VCF_ARES);
22137+ __VCMD(set_space_v1, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22138+ __VCMD(set_space, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
2ba6f0dd 22139+
4bf69007
AM
22140+ __VCMD(set_ccaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22141+ __VCMD(set_bcaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22142+ __VCMD(set_cflags, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22143+ __VCMD(set_umask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22144+ __VCMD(set_wmask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22145+ __VCMD(set_badness, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
2ba6f0dd 22146+
4bf69007
AM
22147+ __VCMD(set_vhi_name, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22148+ __VCMD(set_rlimit, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22149+ __VCMD(set_prio_bias, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
2ba6f0dd 22150+
4bf69007
AM
22151+ __VCMD(set_ncaps, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
22152+ __VCMD(set_nflags, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
22153+ __VCMD(net_add, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22154+ __VCMD(net_remove, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22155+ __VCMD(net_add_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22156+ __VCMD(net_rem_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22157+ __VCMD(net_add_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22158+ __VCMD(net_rem_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22159+#ifdef CONFIG_IPV6
22160+ __VCMD(net_add_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22161+ __VCMD(net_remove_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22162+#endif
22163+ __VCMD(set_iattr, 7, VCA_NONE, 0);
22164+ __VCMD(fset_iattr, 7, VCA_NONE, 0);
22165+ __VCMD(set_dlimit, 7, VCA_NONE, VCF_ARES);
22166+ __VCMD(add_dlimit, 8, VCA_NONE, VCF_ARES);
22167+ __VCMD(rem_dlimit, 8, VCA_NONE, VCF_ARES);
2ba6f0dd 22168+
4bf69007
AM
22169+#ifdef CONFIG_VSERVER_DEVICE
22170+ __VCMD(set_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
22171+ __VCMD(unset_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
22172+#endif
22173+ /* debug level admin commands */
22174+#ifdef CONFIG_VSERVER_HISTORY
22175+ __VCMD(dump_history, 9, VCA_NONE, 0);
22176+ __VCMD(read_history, 9, VCA_NONE, 0);
22177+#endif
2ba6f0dd 22178+
4bf69007
AM
22179+ default:
22180+ perm = -1;
22181+ }
2ba6f0dd 22182+
4bf69007
AM
22183+ vxdprintk(VXD_CBIT(switch, 0),
22184+ "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
22185+ VC_CATEGORY(cmd), VC_COMMAND(cmd),
22186+ VC_VERSION(cmd), id, data, compat,
22187+ perm, args, flags);
2ba6f0dd 22188+
4bf69007
AM
22189+ ret = -ENOSYS;
22190+ if (perm < 0)
22191+ goto out;
2ba6f0dd 22192+
4bf69007
AM
22193+ state = 1;
22194+ if (!capable(CAP_CONTEXT))
22195+ goto out;
2ba6f0dd 22196+
4bf69007
AM
22197+ state = 2;
22198+ /* moved here from the individual commands */
22199+ ret = -EPERM;
22200+ if ((perm > 1) && !capable(CAP_SYS_ADMIN))
22201+ goto out;
2ba6f0dd 22202+
4bf69007
AM
22203+ state = 3;
22204+ /* vcmd involves resource management */
22205+ ret = -EPERM;
22206+ if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
22207+ goto out;
2ba6f0dd 22208+
4bf69007
AM
22209+ state = 4;
22210+ /* various legacy exceptions */
22211+ switch (cmd) {
22212+ /* will go away when spectator is a cap */
22213+ case VCMD_ctx_migrate_v0:
22214+ case VCMD_ctx_migrate:
22215+ if (id == 1) {
22216+ current->xid = 1;
22217+ ret = 1;
22218+ goto out;
22219+ }
22220+ break;
2ba6f0dd 22221+
4bf69007
AM
22222+ /* will go away when spectator is a cap */
22223+ case VCMD_net_migrate:
22224+ if (id == 1) {
22225+ current->nid = 1;
22226+ ret = 1;
22227+ goto out;
22228+ }
22229+ break;
22230+ }
2ba6f0dd 22231+
4bf69007
AM
22232+ /* vcmds are fine by default */
22233+ permit = 1;
2ba6f0dd 22234+
4bf69007
AM
22235+ /* admin type vcmds require admin ... */
22236+ if (flags & VCF_ADMIN)
22237+ permit = vx_check(0, VS_ADMIN) ? 1 : 0;
2ba6f0dd 22238+
4bf69007
AM
22239+ /* ... but setup type vcmds override that */
22240+ if (!permit && (flags & VCF_SETUP))
22241+ permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
2ba6f0dd 22242+
4bf69007
AM
22243+ state = 5;
22244+ ret = -EPERM;
22245+ if (!permit)
22246+ goto out;
2ba6f0dd 22247+
4bf69007
AM
22248+ state = 6;
22249+ if (!id && (flags & VCF_ZIDOK))
22250+ goto skip_id;
2ba6f0dd 22251+
4bf69007
AM
22252+ ret = -ESRCH;
22253+ if (args & VCA_VXI) {
22254+ vxi = lookup_vx_info(id);
22255+ if (!vxi)
22256+ goto out;
2ba6f0dd 22257+
4bf69007
AM
22258+ if ((flags & VCF_ADMIN) &&
22259+ /* special case kill for shutdown */
22260+ (cmd != VCMD_ctx_kill) &&
22261+ /* can context be administrated? */
22262+ !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
22263+ ret = -EACCES;
22264+ goto out_vxi;
22265+ }
22266+ }
22267+ state = 7;
22268+ if (args & VCA_NXI) {
22269+ nxi = lookup_nx_info(id);
22270+ if (!nxi)
22271+ goto out_vxi;
2ba6f0dd 22272+
4bf69007
AM
22273+ if ((flags & VCF_ADMIN) &&
22274+ /* can context be administrated? */
22275+ !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
22276+ ret = -EACCES;
22277+ goto out_nxi;
22278+ }
22279+ }
22280+skip_id:
22281+ state = 8;
22282+ ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
2ba6f0dd 22283+
4bf69007
AM
22284+out_nxi:
22285+ if ((args & VCA_NXI) && nxi)
22286+ put_nx_info(nxi);
22287+out_vxi:
22288+ if ((args & VCA_VXI) && vxi)
22289+ put_vx_info(vxi);
22290+out:
22291+ vxdprintk(VXD_CBIT(switch, 1),
22292+ "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
22293+ VC_CATEGORY(cmd), VC_COMMAND(cmd),
22294+ VC_VERSION(cmd), ret, ret, state, permit);
22295+ return ret;
22296+}
2ba6f0dd 22297+
4bf69007
AM
22298+asmlinkage long
22299+sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
22300+{
22301+ return do_vserver(cmd, id, data, 0);
22302+}
2ba6f0dd 22303+
4bf69007 22304+#ifdef CONFIG_COMPAT
2ba6f0dd 22305+
4bf69007
AM
22306+asmlinkage long
22307+sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
22308+{
22309+ return do_vserver(cmd, id, data, 1);
22310+}
2ba6f0dd 22311+
4bf69007 22312+#endif /* CONFIG_COMPAT */
c2806d43
AM
22313diff -urNp -x '*.orig' linux-4.4/kernel/vserver/sysctl.c linux-4.4/kernel/vserver/sysctl.c
22314--- linux-4.4/kernel/vserver/sysctl.c 1970-01-01 01:00:00.000000000 +0100
22315+++ linux-4.4/kernel/vserver/sysctl.c 2021-02-24 16:56:24.612823601 +0100
4bf69007
AM
22316@@ -0,0 +1,247 @@
22317+/*
22318+ * kernel/vserver/sysctl.c
22319+ *
22320+ * Virtual Context Support
22321+ *
c2806d43 22322+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
22323+ *
22324+ * V0.01 basic structure
22325+ *
22326+ */
2ba6f0dd 22327+
4bf69007
AM
22328+#include <linux/module.h>
22329+#include <linux/ctype.h>
22330+#include <linux/sysctl.h>
22331+#include <linux/parser.h>
22332+#include <asm/uaccess.h>
2ba6f0dd 22333+
4bf69007
AM
22334+enum {
22335+ CTL_DEBUG_ERROR = 0,
22336+ CTL_DEBUG_SWITCH = 1,
22337+ CTL_DEBUG_XID,
22338+ CTL_DEBUG_NID,
22339+ CTL_DEBUG_TAG,
22340+ CTL_DEBUG_NET,
22341+ CTL_DEBUG_LIMIT,
22342+ CTL_DEBUG_CRES,
22343+ CTL_DEBUG_DLIM,
22344+ CTL_DEBUG_QUOTA,
22345+ CTL_DEBUG_CVIRT,
22346+ CTL_DEBUG_SPACE,
22347+ CTL_DEBUG_PERM,
22348+ CTL_DEBUG_MISC,
2ba6f0dd
AM
22349+};
22350+
2ba6f0dd 22351+
4bf69007
AM
22352+unsigned int vs_debug_switch = 0;
22353+unsigned int vs_debug_xid = 0;
22354+unsigned int vs_debug_nid = 0;
22355+unsigned int vs_debug_tag = 0;
22356+unsigned int vs_debug_net = 0;
22357+unsigned int vs_debug_limit = 0;
22358+unsigned int vs_debug_cres = 0;
22359+unsigned int vs_debug_dlim = 0;
22360+unsigned int vs_debug_quota = 0;
22361+unsigned int vs_debug_cvirt = 0;
22362+unsigned int vs_debug_space = 0;
22363+unsigned int vs_debug_perm = 0;
22364+unsigned int vs_debug_misc = 0;
2ba6f0dd 22365+
2ba6f0dd 22366+
4bf69007 22367+static struct ctl_table_header *vserver_table_header;
bb20add7 22368+static struct ctl_table vserver_root_table[];
4bf69007 22369+
2ba6f0dd 22370+
4bf69007
AM
22371+void vserver_register_sysctl(void)
22372+{
22373+ if (!vserver_table_header) {
22374+ vserver_table_header = register_sysctl_table(vserver_root_table);
22375+ }
2ba6f0dd 22376+
4bf69007 22377+}
2ba6f0dd 22378+
4bf69007
AM
22379+void vserver_unregister_sysctl(void)
22380+{
22381+ if (vserver_table_header) {
22382+ unregister_sysctl_table(vserver_table_header);
22383+ vserver_table_header = NULL;
22384+ }
22385+}
2ba6f0dd 22386+
2ba6f0dd 22387+
bb20add7 22388+static int proc_dodebug(struct ctl_table *table, int write,
4bf69007
AM
22389+ void __user *buffer, size_t *lenp, loff_t *ppos)
22390+{
22391+ char tmpbuf[20], *p, c;
22392+ unsigned int value;
22393+ size_t left, len;
2ba6f0dd 22394+
4bf69007
AM
22395+ if ((*ppos && !write) || !*lenp) {
22396+ *lenp = 0;
22397+ return 0;
22398+ }
2ba6f0dd 22399+
4bf69007 22400+ left = *lenp;
2ba6f0dd 22401+
4bf69007
AM
22402+ if (write) {
22403+ if (!access_ok(VERIFY_READ, buffer, left))
22404+ return -EFAULT;
22405+ p = (char *)buffer;
22406+ while (left && __get_user(c, p) >= 0 && isspace(c))
22407+ left--, p++;
22408+ if (!left)
22409+ goto done;
2ba6f0dd 22410+
4bf69007
AM
22411+ if (left > sizeof(tmpbuf) - 1)
22412+ return -EINVAL;
22413+ if (copy_from_user(tmpbuf, p, left))
22414+ return -EFAULT;
22415+ tmpbuf[left] = '\0';
2ba6f0dd 22416+
4bf69007
AM
22417+ for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
22418+ value = 10 * value + (*p - '0');
22419+ if (*p && !isspace(*p))
22420+ return -EINVAL;
22421+ while (left && isspace(*p))
22422+ left--, p++;
22423+ *(unsigned int *)table->data = value;
22424+ } else {
22425+ if (!access_ok(VERIFY_WRITE, buffer, left))
22426+ return -EFAULT;
22427+ len = sprintf(tmpbuf, "%d", *(unsigned int *)table->data);
22428+ if (len > left)
22429+ len = left;
22430+ if (__copy_to_user(buffer, tmpbuf, len))
22431+ return -EFAULT;
22432+ if ((left -= len) > 0) {
22433+ if (put_user('\n', (char *)buffer + len))
22434+ return -EFAULT;
22435+ left--;
22436+ }
22437+ }
2ba6f0dd 22438+
4bf69007
AM
22439+done:
22440+ *lenp -= left;
22441+ *ppos += *lenp;
22442+ return 0;
22443+}
2ba6f0dd 22444+
4bf69007 22445+static int zero;
2ba6f0dd 22446+
4bf69007
AM
22447+#define CTL_ENTRY(ctl, name) \
22448+ { \
22449+ .procname = #name, \
22450+ .data = &vs_ ## name, \
22451+ .maxlen = sizeof(int), \
22452+ .mode = 0644, \
22453+ .proc_handler = &proc_dodebug, \
22454+ .extra1 = &zero, \
22455+ .extra2 = &zero, \
22456+ }
2ba6f0dd 22457+
bb20add7 22458+static struct ctl_table vserver_debug_table[] = {
4bf69007
AM
22459+ CTL_ENTRY(CTL_DEBUG_SWITCH, debug_switch),
22460+ CTL_ENTRY(CTL_DEBUG_XID, debug_xid),
22461+ CTL_ENTRY(CTL_DEBUG_NID, debug_nid),
22462+ CTL_ENTRY(CTL_DEBUG_TAG, debug_tag),
22463+ CTL_ENTRY(CTL_DEBUG_NET, debug_net),
22464+ CTL_ENTRY(CTL_DEBUG_LIMIT, debug_limit),
22465+ CTL_ENTRY(CTL_DEBUG_CRES, debug_cres),
22466+ CTL_ENTRY(CTL_DEBUG_DLIM, debug_dlim),
22467+ CTL_ENTRY(CTL_DEBUG_QUOTA, debug_quota),
22468+ CTL_ENTRY(CTL_DEBUG_CVIRT, debug_cvirt),
22469+ CTL_ENTRY(CTL_DEBUG_SPACE, debug_space),
22470+ CTL_ENTRY(CTL_DEBUG_PERM, debug_perm),
22471+ CTL_ENTRY(CTL_DEBUG_MISC, debug_misc),
22472+ { 0 }
22473+};
2ba6f0dd 22474+
bb20add7 22475+static struct ctl_table vserver_root_table[] = {
4bf69007
AM
22476+ {
22477+ .procname = "vserver",
22478+ .mode = 0555,
22479+ .child = vserver_debug_table
22480+ },
22481+ { 0 }
22482+};
2ba6f0dd 22483+
2ba6f0dd 22484+
4bf69007
AM
22485+static match_table_t tokens = {
22486+ { CTL_DEBUG_SWITCH, "switch=%x" },
22487+ { CTL_DEBUG_XID, "xid=%x" },
22488+ { CTL_DEBUG_NID, "nid=%x" },
22489+ { CTL_DEBUG_TAG, "tag=%x" },
22490+ { CTL_DEBUG_NET, "net=%x" },
22491+ { CTL_DEBUG_LIMIT, "limit=%x" },
22492+ { CTL_DEBUG_CRES, "cres=%x" },
22493+ { CTL_DEBUG_DLIM, "dlim=%x" },
22494+ { CTL_DEBUG_QUOTA, "quota=%x" },
22495+ { CTL_DEBUG_CVIRT, "cvirt=%x" },
22496+ { CTL_DEBUG_SPACE, "space=%x" },
22497+ { CTL_DEBUG_PERM, "perm=%x" },
22498+ { CTL_DEBUG_MISC, "misc=%x" },
22499+ { CTL_DEBUG_ERROR, NULL }
22500+};
2ba6f0dd 22501+
4bf69007
AM
22502+#define HANDLE_CASE(id, name, val) \
22503+ case CTL_DEBUG_ ## id: \
22504+ vs_debug_ ## name = val; \
22505+ printk("vs_debug_" #name "=0x%x\n", val); \
22506+ break
2ba6f0dd 22507+
2ba6f0dd 22508+
4bf69007
AM
22509+static int __init vs_debug_setup(char *str)
22510+{
22511+ char *p;
22512+ int token;
2ba6f0dd 22513+
4bf69007
AM
22514+ printk("vs_debug_setup(%s)\n", str);
22515+ while ((p = strsep(&str, ",")) != NULL) {
22516+ substring_t args[MAX_OPT_ARGS];
22517+ unsigned int value;
2ba6f0dd 22518+
4bf69007
AM
22519+ if (!*p)
22520+ continue;
2ba6f0dd 22521+
4bf69007
AM
22522+ token = match_token(p, tokens, args);
22523+ value = (token > 0) ? simple_strtoul(args[0].from, NULL, 0) : 0;
2ba6f0dd 22524+
4bf69007
AM
22525+ switch (token) {
22526+ HANDLE_CASE(SWITCH, switch, value);
22527+ HANDLE_CASE(XID, xid, value);
22528+ HANDLE_CASE(NID, nid, value);
22529+ HANDLE_CASE(TAG, tag, value);
22530+ HANDLE_CASE(NET, net, value);
22531+ HANDLE_CASE(LIMIT, limit, value);
22532+ HANDLE_CASE(CRES, cres, value);
22533+ HANDLE_CASE(DLIM, dlim, value);
22534+ HANDLE_CASE(QUOTA, quota, value);
22535+ HANDLE_CASE(CVIRT, cvirt, value);
22536+ HANDLE_CASE(SPACE, space, value);
22537+ HANDLE_CASE(PERM, perm, value);
22538+ HANDLE_CASE(MISC, misc, value);
22539+ default:
22540+ return -EINVAL;
22541+ break;
22542+ }
22543+ }
22544+ return 1;
22545+}
2ba6f0dd 22546+
4bf69007 22547+__setup("vsdebug=", vs_debug_setup);
2ba6f0dd 22548+
2ba6f0dd 22549+
2ba6f0dd 22550+
4bf69007
AM
22551+EXPORT_SYMBOL_GPL(vs_debug_switch);
22552+EXPORT_SYMBOL_GPL(vs_debug_xid);
22553+EXPORT_SYMBOL_GPL(vs_debug_nid);
22554+EXPORT_SYMBOL_GPL(vs_debug_net);
22555+EXPORT_SYMBOL_GPL(vs_debug_limit);
22556+EXPORT_SYMBOL_GPL(vs_debug_cres);
22557+EXPORT_SYMBOL_GPL(vs_debug_dlim);
22558+EXPORT_SYMBOL_GPL(vs_debug_quota);
22559+EXPORT_SYMBOL_GPL(vs_debug_cvirt);
22560+EXPORT_SYMBOL_GPL(vs_debug_space);
22561+EXPORT_SYMBOL_GPL(vs_debug_perm);
22562+EXPORT_SYMBOL_GPL(vs_debug_misc);
2ba6f0dd 22563+
c2806d43
AM
22564diff -urNp -x '*.orig' linux-4.4/kernel/vserver/tag.c linux-4.4/kernel/vserver/tag.c
22565--- linux-4.4/kernel/vserver/tag.c 1970-01-01 01:00:00.000000000 +0100
22566+++ linux-4.4/kernel/vserver/tag.c 2021-02-24 16:56:24.612823601 +0100
4bf69007
AM
22567@@ -0,0 +1,63 @@
22568+/*
22569+ * linux/kernel/vserver/tag.c
22570+ *
22571+ * Virtual Server: Shallow Tag Space
22572+ *
c2806d43 22573+ * Copyright (C) 2007 Herbert P?tzl
4bf69007
AM
22574+ *
22575+ * V0.01 basic implementation
22576+ *
22577+ */
2ba6f0dd 22578+
4bf69007
AM
22579+#include <linux/sched.h>
22580+#include <linux/vserver/debug.h>
22581+#include <linux/vs_pid.h>
22582+#include <linux/vs_tag.h>
2ba6f0dd 22583+
4bf69007 22584+#include <linux/vserver/tag_cmd.h>
2ba6f0dd 22585+
2ba6f0dd 22586+
61333608 22587+int dx_migrate_task(struct task_struct *p, vtag_t tag)
4bf69007
AM
22588+{
22589+ if (!p)
22590+ BUG();
2ba6f0dd 22591+
4bf69007
AM
22592+ vxdprintk(VXD_CBIT(tag, 5),
22593+ "dx_migrate_task(%p[#%d],#%d)", p, p->tag, tag);
2ba6f0dd 22594+
4bf69007
AM
22595+ task_lock(p);
22596+ p->tag = tag;
22597+ task_unlock(p);
2ba6f0dd 22598+
4bf69007
AM
22599+ vxdprintk(VXD_CBIT(tag, 5),
22600+ "moved task %p into [#%d]", p, tag);
22601+ return 0;
22602+}
2ba6f0dd 22603+
4bf69007 22604+/* vserver syscall commands below here */
2ba6f0dd 22605+
4bf69007 22606+/* taks xid and vx_info functions */
2ba6f0dd 22607+
2ba6f0dd 22608+
4bf69007
AM
22609+int vc_task_tag(uint32_t id)
22610+{
61333608 22611+ vtag_t tag;
2ba6f0dd 22612+
4bf69007
AM
22613+ if (id) {
22614+ struct task_struct *tsk;
22615+ rcu_read_lock();
22616+ tsk = find_task_by_real_pid(id);
22617+ tag = (tsk) ? tsk->tag : -ESRCH;
22618+ rcu_read_unlock();
22619+ } else
22620+ tag = dx_current_tag();
22621+ return tag;
22622+}
2ba6f0dd 22623+
2ba6f0dd 22624+
4bf69007
AM
22625+int vc_tag_migrate(uint32_t tag)
22626+{
22627+ return dx_migrate_task(current, tag & 0xFFFF);
22628+}
2ba6f0dd 22629+
2ba6f0dd 22630+
c2806d43
AM
22631diff -urNp -x '*.orig' linux-4.4/kernel/vserver/vci_config.h linux-4.4/kernel/vserver/vci_config.h
22632--- linux-4.4/kernel/vserver/vci_config.h 1970-01-01 01:00:00.000000000 +0100
22633+++ linux-4.4/kernel/vserver/vci_config.h 2021-02-24 16:56:24.612823601 +0100
4bf69007 22634@@ -0,0 +1,80 @@
2ba6f0dd 22635+
4bf69007 22636+/* interface version */
2ba6f0dd 22637+
4bf69007 22638+#define VCI_VERSION 0x00020308
2ba6f0dd 22639+
2ba6f0dd 22640+
4bf69007
AM
22641+enum {
22642+ VCI_KCBIT_NO_DYNAMIC = 0,
2ba6f0dd 22643+
4bf69007
AM
22644+ VCI_KCBIT_PROC_SECURE = 4,
22645+ /* VCI_KCBIT_HARDCPU = 5, */
22646+ /* VCI_KCBIT_IDLELIMIT = 6, */
22647+ /* VCI_KCBIT_IDLETIME = 7, */
2ba6f0dd 22648+
4bf69007
AM
22649+ VCI_KCBIT_COWBL = 8,
22650+ VCI_KCBIT_FULLCOWBL = 9,
22651+ VCI_KCBIT_SPACES = 10,
22652+ VCI_KCBIT_NETV2 = 11,
22653+ VCI_KCBIT_MEMCG = 12,
22654+ VCI_KCBIT_MEMCG_SWAP = 13,
2ba6f0dd 22655+
4bf69007
AM
22656+ VCI_KCBIT_DEBUG = 16,
22657+ VCI_KCBIT_HISTORY = 20,
22658+ VCI_KCBIT_TAGGED = 24,
22659+ VCI_KCBIT_PPTAG = 28,
2ba6f0dd 22660+
4bf69007 22661+ VCI_KCBIT_MORE = 31,
2ba6f0dd
AM
22662+};
22663+
2ba6f0dd 22664+
4bf69007
AM
22665+static inline uint32_t vci_kernel_config(void)
22666+{
22667+ return
22668+ (1 << VCI_KCBIT_NO_DYNAMIC) |
2ba6f0dd 22669+
4bf69007
AM
22670+ /* configured features */
22671+#ifdef CONFIG_VSERVER_PROC_SECURE
22672+ (1 << VCI_KCBIT_PROC_SECURE) |
22673+#endif
22674+#ifdef CONFIG_VSERVER_COWBL
22675+ (1 << VCI_KCBIT_COWBL) |
22676+ (1 << VCI_KCBIT_FULLCOWBL) |
22677+#endif
22678+ (1 << VCI_KCBIT_SPACES) |
22679+ (1 << VCI_KCBIT_NETV2) |
22680+#ifdef CONFIG_MEMCG
22681+ (1 << VCI_KCBIT_MEMCG) |
22682+#endif
22683+#ifdef CONFIG_MEMCG_SWAP
22684+ (1 << VCI_KCBIT_MEMCG_SWAP) |
22685+#endif
2ba6f0dd 22686+
4bf69007
AM
22687+ /* debug options */
22688+#ifdef CONFIG_VSERVER_DEBUG
22689+ (1 << VCI_KCBIT_DEBUG) |
22690+#endif
22691+#ifdef CONFIG_VSERVER_HISTORY
22692+ (1 << VCI_KCBIT_HISTORY) |
22693+#endif
2ba6f0dd 22694+
4bf69007
AM
22695+ /* inode context tagging */
22696+#if defined(CONFIG_TAGGING_NONE)
22697+ (0 << VCI_KCBIT_TAGGED) |
22698+#elif defined(CONFIG_TAGGING_UID16)
22699+ (1 << VCI_KCBIT_TAGGED) |
22700+#elif defined(CONFIG_TAGGING_GID16)
22701+ (2 << VCI_KCBIT_TAGGED) |
22702+#elif defined(CONFIG_TAGGING_ID24)
22703+ (3 << VCI_KCBIT_TAGGED) |
22704+#elif defined(CONFIG_TAGGING_INTERN)
22705+ (4 << VCI_KCBIT_TAGGED) |
22706+#elif defined(CONFIG_TAGGING_RUNTIME)
22707+ (5 << VCI_KCBIT_TAGGED) |
22708+#else
22709+ (7 << VCI_KCBIT_TAGGED) |
22710+#endif
22711+ (1 << VCI_KCBIT_PPTAG) |
22712+ 0;
22713+}
2ba6f0dd 22714+
c2806d43
AM
22715diff -urNp -x '*.orig' linux-4.4/mm/memcontrol.c linux-4.4/mm/memcontrol.c
22716--- linux-4.4/mm/memcontrol.c 2021-02-24 16:56:12.009093341 +0100
22717+++ linux-4.4/mm/memcontrol.c 2021-02-24 16:56:24.616157039 +0100
3cc86a71 22718@@ -2907,6 +2907,42 @@ static u64 mem_cgroup_read_u64(struct cg
927ca606 22719 }
4bf69007
AM
22720 }
22721
1d9ad342 22722+unsigned long mem_cgroup_mem_usage_pages(struct mem_cgroup *memcg)
4bf69007 22723+{
1d9ad342 22724+ return mem_cgroup_usage(memcg, false);
4bf69007 22725+}
2ba6f0dd 22726+
1d9ad342 22727+unsigned long mem_cgroup_mem_limit_pages(struct mem_cgroup *memcg)
4bf69007 22728+{
927ca606 22729+ return (u64)memcg->memory.limit;
4bf69007 22730+}
2ba6f0dd 22731+
1d9ad342 22732+unsigned long mem_cgroup_memsw_usage_pages(struct mem_cgroup *memcg)
4bf69007 22733+{
1d9ad342 22734+ return mem_cgroup_usage(memcg, true);
4bf69007 22735+}
2ba6f0dd 22736+
1d9ad342 22737+unsigned long mem_cgroup_memsw_limit_pages(struct mem_cgroup *memcg)
4bf69007 22738+{
927ca606 22739+ return (u64)memcg->memsw.limit;
4bf69007 22740+}
2ba6f0dd 22741+
1d9ad342
AM
22742+void dump_mem_cgroup(struct mem_cgroup *memcg)
22743+{
22744+ printk(KERN_INFO "memcg: %p/%d:\n"
22745+ "\tmemory:\t%lu/%lu %lu/%lu\n"
22746+ "\tmemsw:\t%lu/%lu %lu/%lu\n"
22747+ "\tkmem:\t%lu/%lu %lu/%lu\n",
22748+ memcg, memcg->id.id,
22749+ page_counter_read(&memcg->memory), memcg->memory.limit,
22750+ memcg->memory.watermark, memcg->memory.failcnt,
22751+ page_counter_read(&memcg->memsw), memcg->memsw.limit,
22752+ memcg->memsw.watermark, memcg->memsw.failcnt,
22753+ page_counter_read(&memcg->kmem), memcg->kmem.limit,
22754+ memcg->kmem.watermark, memcg->kmem.failcnt);
22755+}
2ba6f0dd 22756+
927ca606
AM
22757+
22758 #ifdef CONFIG_MEMCG_KMEM
22759 static int memcg_activate_kmem(struct mem_cgroup *memcg,
22760 unsigned long nr_pages)
c2806d43
AM
22761diff -urNp -x '*.orig' linux-4.4/mm/oom_kill.c linux-4.4/mm/oom_kill.c
22762--- linux-4.4/mm/oom_kill.c 2021-02-24 16:56:12.012426780 +0100
22763+++ linux-4.4/mm/oom_kill.c 2021-02-24 16:56:24.616157039 +0100
4bf69007
AM
22764@@ -35,6 +35,8 @@
22765 #include <linux/freezer.h>
22766 #include <linux/ftrace.h>
22767 #include <linux/ratelimit.h>
22768+#include <linux/reboot.h>
22769+#include <linux/vs_context.h>
22770
22771 #define CREATE_TRACE_POINTS
22772 #include <trace/events/oom.h>
927ca606 22773@@ -131,11 +133,18 @@ static inline bool is_sysrq_oom(struct o
4bf69007 22774 static bool oom_unkillable_task(struct task_struct *p,
927ca606 22775 struct mem_cgroup *memcg, const nodemask_t *nodemask)
4bf69007
AM
22776 {
22777- if (is_global_init(p))
22778+ unsigned xid = vx_current_xid();
2ba6f0dd 22779+
4bf69007
AM
22780+ /* skip the init task, global and per guest */
22781+ if (task_is_init(p))
22782 return true;
22783 if (p->flags & PF_KTHREAD)
22784 return true;
22785
22786+ /* skip other guest and host processes if oom in guest */
22787+ if (xid && vx_task_xid(p) != xid)
22788+ return true;
2ba6f0dd 22789+
4bf69007
AM
22790 /* When mem_cgroup_out_of_memory() and p is not member of the group */
22791 if (memcg && !task_in_mem_cgroup(p, memcg))
22792 return true;
927ca606
AM
22793@@ -534,8 +543,8 @@ void oom_kill_process(struct oom_control
22794 if (__ratelimit(&oom_rs))
22795 dump_header(oc, p, memcg);
4bf69007 22796
927ca606 22797- pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
4bf69007
AM
22798- message, task_pid_nr(p), p->comm, points);
22799+ pr_err("%s: Kill process %d:#%u (%s) score %d or sacrifice child\n",
22800+ message, task_pid_nr(p), p->xid, p->comm, points);
4bf69007
AM
22801
22802 /*
927ca606 22803 * If any of p's children has a different mm and is eligible for kill,
3cc86a71 22804@@ -593,8 +602,8 @@ void oom_kill_process(struct oom_control
927ca606
AM
22805 */
22806 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
22807 mark_oom_victim(victim);
4bf69007
AM
22808- pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
22809- task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
927ca606 22810+ pr_err("Killed process %d:%u (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
4bf69007
AM
22811+ task_pid_nr(victim), victim->xid, victim->comm, K(victim->mm->total_vm),
22812 K(get_mm_counter(victim->mm, MM_ANONPAGES)),
22813 K(get_mm_counter(victim->mm, MM_FILEPAGES)));
22814 task_unlock(victim);
3cc86a71 22815@@ -630,6 +639,8 @@ void oom_kill_process(struct oom_control
4bf69007 22816 }
927ca606 22817 #undef K
4bf69007
AM
22818
22819+long vs_oom_action(unsigned int);
2ba6f0dd 22820+
4bf69007 22821 /*
927ca606
AM
22822 * Determines whether the kernel must panic because of the panic_on_oom sysctl.
22823 */
3cc86a71 22824@@ -730,7 +741,12 @@ bool out_of_memory(struct oom_control *o
4bf69007 22825 /* Found nothing?!?! Either we hang forever, or we panic. */
927ca606
AM
22826 if (!p && !is_sysrq_oom(oc)) {
22827 dump_header(oc, NULL, NULL);
4bf69007 22828- panic("Out of memory and no killable processes...\n");
2ba6f0dd 22829+
4bf69007
AM
22830+ /* avoid panic for guest OOM */
22831+ if (vx_current_xid())
22832+ vs_oom_action(LINUX_REBOOT_CMD_OOM);
22833+ else
22834+ panic("Out of memory and no killable processes...\n");
22835 }
927ca606
AM
22836 if (p && p != (void *)-1UL) {
22837 oom_kill_process(oc, p, points, totalpages, NULL,
c2806d43
AM
22838diff -urNp -x '*.orig' linux-4.4/mm/page_alloc.c linux-4.4/mm/page_alloc.c
22839--- linux-4.4/mm/page_alloc.c 2021-02-24 16:56:12.015760218 +0100
22840+++ linux-4.4/mm/page_alloc.c 2021-02-24 16:56:24.616157039 +0100
927ca606 22841@@ -62,6 +62,8 @@
b00e13aa 22842 #include <linux/sched/rt.h>
927ca606
AM
22843 #include <linux/page_owner.h>
22844 #include <linux/kthread.h>
4bf69007
AM
22845+#include <linux/vs_base.h>
22846+#include <linux/vs_limit.h>
22847
c2e5f7c8 22848 #include <asm/sections.h>
4bf69007 22849 #include <asm/tlbflush.h>
c2806d43 22850@@ -3682,14 +3684,17 @@ long si_mem_available(void)
48cb6a3c
JR
22851 */
22852 pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
22853 pagecache -= min(pagecache / 2, wmark_low);
22854- available += pagecache;
22855+ if (!vx_flags(VXF_VIRT_MEM, 0))
22856+ available += pagecache;
22857
22858 /*
22859 * Part of the reclaimable slab consists of items that are in use,
22860 * and cannot be freed. Cap this estimate at the low watermark.
22861 */
22862- available += global_page_state(NR_SLAB_RECLAIMABLE) -
22863- min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
22864+ if (!vx_flags(VXF_VIRT_MEM, 0))
22865+ available += global_page_state(NR_SLAB_RECLAIMABLE) -
22866+ min(global_page_state(NR_SLAB_RECLAIMABLE) / 2,
22867+ wmark_low);
22868
22869 if (available < 0)
22870 available = 0;
c2806d43 22871@@ -3706,6 +3711,9 @@ void si_meminfo(struct sysinfo *val)
4bf69007
AM
22872 val->totalhigh = totalhigh_pages;
22873 val->freehigh = nr_free_highpages();
22874 val->mem_unit = PAGE_SIZE;
2ba6f0dd 22875+
4bf69007
AM
22876+ if (vx_flags(VXF_VIRT_MEM, 0))
22877+ vx_vsi_meminfo(val);
22878 }
22879
22880 EXPORT_SYMBOL(si_meminfo);
c2806d43 22881@@ -3731,6 +3739,9 @@ void si_meminfo_node(struct sysinfo *val
4bf69007
AM
22882 val->freehigh = 0;
22883 #endif
22884 val->mem_unit = PAGE_SIZE;
2ba6f0dd 22885+
4bf69007
AM
22886+ if (vx_flags(VXF_VIRT_MEM, 0))
22887+ vx_vsi_meminfo(val);
22888 }
22889 #endif
22890
c2806d43
AM
22891diff -urNp -x '*.orig' linux-4.4/mm/pgtable-generic.c linux-4.4/mm/pgtable-generic.c
22892--- linux-4.4/mm/pgtable-generic.c 2021-02-24 16:56:12.015760218 +0100
22893+++ linux-4.4/mm/pgtable-generic.c 2021-02-24 16:56:24.616157039 +0100
4bf69007
AM
22894@@ -6,6 +6,8 @@
22895 * Copyright (C) 2010 Linus Torvalds
22896 */
22897
22898+#include <linux/mm.h>
2ba6f0dd 22899+
4bf69007
AM
22900 #include <linux/pagemap.h>
22901 #include <asm/tlb.h>
22902 #include <asm-generic/pgtable.h>
c2806d43
AM
22903diff -urNp -x '*.orig' linux-4.4/mm/shmem.c linux-4.4/mm/shmem.c
22904--- linux-4.4/mm/shmem.c 2021-02-24 16:56:12.019093656 +0100
22905+++ linux-4.4/mm/shmem.c 2021-02-24 16:56:24.616157039 +0100
48cb6a3c 22906@@ -2200,7 +2200,7 @@ static int shmem_statfs(struct dentry *d
4bf69007
AM
22907 {
22908 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
22909
22910- buf->f_type = TMPFS_MAGIC;
22911+ buf->f_type = TMPFS_SUPER_MAGIC;
22912 buf->f_bsize = PAGE_CACHE_SIZE;
22913 buf->f_namelen = NAME_MAX;
22914 if (sbinfo->max_blocks) {
48cb6a3c 22915@@ -3050,7 +3050,7 @@ int shmem_fill_super(struct super_block
4bf69007
AM
22916 sb->s_maxbytes = MAX_LFS_FILESIZE;
22917 sb->s_blocksize = PAGE_CACHE_SIZE;
22918 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
22919- sb->s_magic = TMPFS_MAGIC;
22920+ sb->s_magic = TMPFS_SUPER_MAGIC;
22921 sb->s_op = &shmem_ops;
22922 sb->s_time_gran = 1;
22923 #ifdef CONFIG_TMPFS_XATTR
c2806d43
AM
22924diff -urNp -x '*.orig' linux-4.4/mm/slab.c linux-4.4/mm/slab.c
22925--- linux-4.4/mm/slab.c 2021-02-24 16:56:12.019093656 +0100
22926+++ linux-4.4/mm/slab.c 2021-02-24 16:56:24.616157039 +0100
927ca606 22927@@ -337,6 +337,8 @@ static void kmem_cache_node_init(struct
4bf69007
AM
22928 #define STATS_INC_FREEMISS(x) do { } while (0)
22929 #endif
22930
22931+#include "slab_vs.h"
2ba6f0dd 22932+
4bf69007
AM
22933 #if DEBUG
22934
22935 /*
3cc86a71 22936@@ -3186,6 +3188,7 @@ slab_alloc_node(struct kmem_cache *cache
4bf69007
AM
22937 /* ___cache_alloc_node can fall back to other nodes */
22938 ptr = ____cache_alloc_node(cachep, flags, nodeid);
22939 out:
22940+ vx_slab_alloc(cachep, flags);
22941 local_irq_restore(save_flags);
22942 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
22943 kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
3cc86a71 22944@@ -3374,6 +3377,7 @@ static inline void __cache_free(struct k
4bf69007
AM
22945 check_irq_off();
22946 kmemleak_free_recursive(objp, cachep->flags);
22947 objp = cache_free_debugcheck(cachep, objp, caller);
22948+ vx_slab_free(cachep);
22949
22950 kmemcheck_slab_free(cachep, objp, cachep->object_size);
22951
c2806d43
AM
22952diff -urNp -x '*.orig' linux-4.4/mm/slab_vs.h linux-4.4/mm/slab_vs.h
22953--- linux-4.4/mm/slab_vs.h 1970-01-01 01:00:00.000000000 +0100
22954+++ linux-4.4/mm/slab_vs.h 2021-02-24 16:56:24.619490478 +0100
4bf69007 22955@@ -0,0 +1,29 @@
2ba6f0dd 22956+
4bf69007 22957+#include <linux/vserver/context.h>
2ba6f0dd 22958+
4bf69007 22959+#include <linux/vs_context.h>
2ba6f0dd 22960+
4bf69007
AM
22961+static inline
22962+void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
22963+{
22964+ int what = gfp_zone(cachep->allocflags);
22965+ struct vx_info *vxi = current_vx_info();
2ba6f0dd 22966+
4bf69007
AM
22967+ if (!vxi)
22968+ return;
2ba6f0dd 22969+
4bf69007
AM
22970+ atomic_add(cachep->size, &vxi->cacct.slab[what]);
22971+}
2ba6f0dd 22972+
4bf69007
AM
22973+static inline
22974+void vx_slab_free(struct kmem_cache *cachep)
22975+{
22976+ int what = gfp_zone(cachep->allocflags);
22977+ struct vx_info *vxi = current_vx_info();
2ba6f0dd 22978+
4bf69007
AM
22979+ if (!vxi)
22980+ return;
2ba6f0dd 22981+
4bf69007
AM
22982+ atomic_sub(cachep->size, &vxi->cacct.slab[what]);
22983+}
2ba6f0dd 22984+
c2806d43
AM
22985diff -urNp -x '*.orig' linux-4.4/mm/swapfile.c linux-4.4/mm/swapfile.c
22986--- linux-4.4/mm/swapfile.c 2021-02-24 16:56:12.019093656 +0100
22987+++ linux-4.4/mm/swapfile.c 2021-02-24 16:56:24.619490478 +0100
4bf69007
AM
22988@@ -39,6 +39,7 @@
22989 #include <asm/tlbflush.h>
22990 #include <linux/swapops.h>
927ca606 22991 #include <linux/swap_cgroup.h>
4bf69007
AM
22992+#include <linux/vs_base.h>
22993
22994 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
22995 unsigned char);
927ca606 22996@@ -2070,6 +2071,16 @@ static int swap_show(struct seq_file *sw
4bf69007
AM
22997
22998 if (si == SEQ_START_TOKEN) {
22999 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
23000+ if (vx_flags(VXF_VIRT_MEM, 0)) {
927ca606 23001+ struct sysinfo si = { 0 };
2ba6f0dd 23002+
4bf69007
AM
23003+ vx_vsi_swapinfo(&si);
23004+ if (si.totalswap < (1 << 10))
23005+ return 0;
23006+ seq_printf(swap, "%s\t\t\t\t\t%s\t%lu\t%lu\t%d\n",
23007+ "hdv0", "partition", si.totalswap >> 10,
23008+ (si.totalswap - si.freeswap) >> 10, -1);
23009+ }
23010 return 0;
23011 }
23012
8931d859 23013@@ -2627,6 +2638,8 @@ void si_swapinfo(struct sysinfo *val)
b00e13aa 23014 val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
4bf69007
AM
23015 val->totalswap = total_swap_pages + nr_to_be_unused;
23016 spin_unlock(&swap_lock);
23017+ if (vx_flags(VXF_VIRT_MEM, 0))
23018+ vx_vsi_swapinfo(val);
23019 }
23020
23021 /*
c2806d43
AM
23022diff -urNp -x '*.orig' linux-4.4/net/bridge/br_multicast.c linux-4.4/net/bridge/br_multicast.c
23023--- linux-4.4/net/bridge/br_multicast.c 2021-02-24 16:56:12.045761163 +0100
23024+++ linux-4.4/net/bridge/br_multicast.c 2021-02-24 16:56:24.619490478 +0100
927ca606 23025@@ -462,7 +462,7 @@ static struct sk_buff *br_ip6_multicast_
4bf69007
AM
23026 ip6h->hop_limit = 1;
23027 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
23028 if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
23029- &ip6h->saddr)) {
23030+ &ip6h->saddr, NULL)) {
23031 kfree_skb(skb);
927ca606 23032 br->has_ipv6_addr = 0;
4bf69007 23033 return NULL;
c2806d43
AM
23034diff -urNp -x '*.orig' linux-4.4/net/core/dev.c linux-4.4/net/core/dev.c
23035--- linux-4.4/net/core/dev.c 2021-02-24 16:56:12.545776909 +0100
23036+++ linux-4.4/net/core/dev.c 2021-02-24 16:56:24.619490478 +0100
23037@@ -125,6 +125,7 @@
4bf69007
AM
23038 #include <linux/in.h>
23039 #include <linux/jhash.h>
23040 #include <linux/random.h>
23041+#include <linux/vs_inet.h>
23042 #include <trace/events/napi.h>
23043 #include <trace/events/net.h>
23044 #include <trace/events/skb.h>
c2806d43 23045@@ -730,7 +731,8 @@ struct net_device *__dev_get_by_name(str
4bf69007
AM
23046 struct hlist_head *head = dev_name_hash(net, name);
23047
b00e13aa 23048 hlist_for_each_entry(dev, head, name_hlist)
4bf69007
AM
23049- if (!strncmp(dev->name, name, IFNAMSIZ))
23050+ if (!strncmp(dev->name, name, IFNAMSIZ) &&
23051+ nx_dev_visible(current_nx_info(), dev))
23052 return dev;
23053
23054 return NULL;
c2806d43 23055@@ -755,7 +757,8 @@ struct net_device *dev_get_by_name_rcu(s
4bf69007
AM
23056 struct hlist_head *head = dev_name_hash(net, name);
23057
b00e13aa 23058 hlist_for_each_entry_rcu(dev, head, name_hlist)
4bf69007
AM
23059- if (!strncmp(dev->name, name, IFNAMSIZ))
23060+ if (!strncmp(dev->name, name, IFNAMSIZ) &&
23061+ nx_dev_visible(current_nx_info(), dev))
23062 return dev;
23063
23064 return NULL;
c2806d43 23065@@ -805,7 +808,8 @@ struct net_device *__dev_get_by_index(st
4bf69007
AM
23066 struct hlist_head *head = dev_index_hash(net, ifindex);
23067
b00e13aa 23068 hlist_for_each_entry(dev, head, index_hlist)
4bf69007
AM
23069- if (dev->ifindex == ifindex)
23070+ if ((dev->ifindex == ifindex) &&
23071+ nx_dev_visible(current_nx_info(), dev))
23072 return dev;
23073
23074 return NULL;
c2806d43 23075@@ -823,7 +827,7 @@ EXPORT_SYMBOL(__dev_get_by_index);
4bf69007
AM
23076 * about locking. The caller must hold RCU lock.
23077 */
23078
23079-struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
23080+struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex)
23081 {
4bf69007 23082 struct net_device *dev;
b00e13aa 23083 struct hlist_head *head = dev_index_hash(net, ifindex);
c2806d43 23084@@ -834,6 +838,16 @@ struct net_device *dev_get_by_index_rcu(
4bf69007
AM
23085
23086 return NULL;
23087 }
23088+EXPORT_SYMBOL(dev_get_by_index_real_rcu);
2ba6f0dd 23089+
4bf69007
AM
23090+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
23091+{
23092+ struct net_device *dev = dev_get_by_index_real_rcu(net, ifindex);
2ba6f0dd 23093+
4bf69007
AM
23094+ if (nx_dev_visible(current_nx_info(), dev))
23095+ return dev;
23096+ return NULL;
23097+}
23098 EXPORT_SYMBOL(dev_get_by_index_rcu);
23099
23100
c2806d43 23101@@ -911,7 +925,8 @@ struct net_device *dev_getbyhwaddr_rcu(s
4bf69007
AM
23102
23103 for_each_netdev_rcu(net, dev)
23104 if (dev->type == type &&
23105- !memcmp(dev->dev_addr, ha, dev->addr_len))
23106+ !memcmp(dev->dev_addr, ha, dev->addr_len) &&
23107+ nx_dev_visible(current_nx_info(), dev))
23108 return dev;
23109
23110 return NULL;
c2806d43 23111@@ -923,9 +938,11 @@ struct net_device *__dev_getfirstbyhwtyp
4bf69007
AM
23112 struct net_device *dev;
23113
23114 ASSERT_RTNL();
23115- for_each_netdev(net, dev)
23116- if (dev->type == type)
23117+ for_each_netdev(net, dev) {
23118+ if ((dev->type == type) &&
23119+ nx_dev_visible(current_nx_info(), dev))
23120 return dev;
23121+ }
23122
23123 return NULL;
23124 }
c2806d43 23125@@ -937,7 +954,8 @@ struct net_device *dev_getfirstbyhwtype(
b00e13aa
AM
23126
23127 rcu_read_lock();
23128 for_each_netdev_rcu(net, dev)
23129- if (dev->type == type) {
23130+ if ((dev->type == type) &&
23131+ nx_dev_visible(current_nx_info(), dev)) {
23132 dev_hold(dev);
23133 ret = dev;
23134 break;
c2806d43 23135@@ -967,7 +985,8 @@ struct net_device *__dev_get_by_flags(st
b00e13aa
AM
23136
23137 ret = NULL;
bb20add7 23138 for_each_netdev(net, dev) {
b00e13aa
AM
23139- if (((dev->flags ^ if_flags) & mask) == 0) {
23140+ if ((((dev->flags ^ if_flags) & mask) == 0) &&
23141+ nx_dev_visible(current_nx_info(), dev)) {
23142 ret = dev;
23143 break;
23144 }
c2806d43 23145@@ -1045,6 +1064,8 @@ static int __dev_alloc_name(struct net *
4bf69007
AM
23146 continue;
23147 if (i < 0 || i >= max_netdevices)
23148 continue;
23149+ if (!nx_dev_visible(current_nx_info(), d))
23150+ continue;
23151
23152 /* avoid cases where sscanf is not exact inverse of printf */
23153 snprintf(buf, IFNAMSIZ, name, i);
c2806d43
AM
23154diff -urNp -x '*.orig' linux-4.4/net/core/net-procfs.c linux-4.4/net/core/net-procfs.c
23155--- linux-4.4/net/core/net-procfs.c 2016-01-11 00:01:32.000000000 +0100
23156+++ linux-4.4/net/core/net-procfs.c 2021-02-24 16:56:24.619490478 +0100
8ce283e1
AM
23157@@ -1,6 +1,7 @@
23158 #include <linux/netdevice.h>
23159 #include <linux/proc_fs.h>
23160 #include <linux/seq_file.h>
23161+#include <linux/vs_inet.h>
23162 #include <net/wext.h>
23163
23164 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
23165@@ -77,8 +78,13 @@ static void dev_seq_stop(struct seq_file
23166 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
23167 {
23168 struct rtnl_link_stats64 temp;
23169- const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
23170+ const struct rtnl_link_stats64 *stats;
c2806d43 23171
8ce283e1
AM
23172+ /* device visible inside network context? */
23173+ if (!nx_dev_visible(current_nx_info(), dev))
23174+ return;
c2806d43 23175+
8ce283e1
AM
23176+ stats = dev_get_stats(dev, &temp);
23177 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
23178 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
23179 dev->name, stats->rx_bytes, stats->rx_packets,
c2806d43
AM
23180diff -urNp -x '*.orig' linux-4.4/net/core/rtnetlink.c linux-4.4/net/core/rtnetlink.c
23181--- linux-4.4/net/core/rtnetlink.c 2021-02-24 16:56:12.059094916 +0100
23182+++ linux-4.4/net/core/rtnetlink.c 2021-02-24 16:56:24.619490478 +0100
23183@@ -1457,6 +1457,8 @@ static int rtnl_dump_ifinfo(struct sk_bu
927ca606 23184 hlist_for_each_entry(dev, head, index_hlist) {
4bf69007
AM
23185 if (idx < s_idx)
23186 goto cont;
23187+ if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
23188+ continue;
7ed51edd
JR
23189 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
23190 NETLINK_CB(cb->skb).portid,
23191 cb->nlh->nlmsg_seq, 0,
c2806d43 23192@@ -2585,6 +2587,9 @@ void rtmsg_ifinfo(int type, struct net_d
927ca606
AM
23193 {
23194 struct sk_buff *skb;
4bf69007
AM
23195
23196+ if (!nx_dev_visible(current_nx_info(), dev))
23197+ return;
2ba6f0dd 23198+
927ca606
AM
23199 if (dev->reg_state != NETREG_REGISTERED)
23200 return;
23201
c2806d43
AM
23202diff -urNp -x '*.orig' linux-4.4/net/core/sock.c linux-4.4/net/core/sock.c
23203--- linux-4.4/net/core/sock.c 2021-02-24 16:56:12.059094916 +0100
23204+++ linux-4.4/net/core/sock.c 2021-02-24 16:56:24.619490478 +0100
927ca606
AM
23205@@ -134,6 +134,10 @@
23206 #include <linux/sock_diag.h>
4bf69007
AM
23207
23208 #include <linux/filter.h>
23209+#include <linux/vs_socket.h>
23210+#include <linux/vs_limit.h>
23211+#include <linux/vs_context.h>
23212+#include <linux/vs_network.h>
23213
23214 #include <trace/events/sock.h>
23215
c2806d43 23216@@ -1365,6 +1369,8 @@ static struct sock *sk_prot_alloc(struct
4bf69007
AM
23217 goto out_free_sec;
23218 sk_tx_queue_clear(sk);
23219 }
23220+ sock_vx_init(sk);
23221+ sock_nx_init(sk);
23222
23223 return sk;
23224
c2806d43 23225@@ -1472,6 +1478,11 @@ void sk_destruct(struct sock *sk)
9117a00a 23226
927ca606
AM
23227 if (likely(sk->sk_net_refcnt))
23228 put_net(sock_net(sk));
4bf69007
AM
23229+ vx_sock_dec(sk);
23230+ clr_vx_info(&sk->sk_vx_info);
23231+ sk->sk_xid = -1;
23232+ clr_nx_info(&sk->sk_nx_info);
23233+ sk->sk_nid = -1;
23234 sk_prot_free(sk->sk_prot_creator, sk);
23235 }
23236
c2806d43 23237@@ -1524,6 +1535,8 @@ struct sock *sk_clone_lock(const struct
4bf69007 23238 /* SANITY */
927ca606
AM
23239 if (likely(newsk->sk_net_refcnt))
23240 get_net(sock_net(newsk));
4bf69007
AM
23241+ sock_vx_init(newsk);
23242+ sock_nx_init(newsk);
23243 sk_node_init(&newsk->sk_node);
23244 sock_lock_init(newsk);
23245 bh_lock_sock(newsk);
c2806d43 23246@@ -1589,6 +1602,12 @@ struct sock *sk_clone_lock(const struct
4bf69007
AM
23247 smp_wmb();
23248 atomic_set(&newsk->sk_refcnt, 2);
23249
23250+ set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
23251+ newsk->sk_xid = sk->sk_xid;
23252+ vx_sock_inc(newsk);
23253+ set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
23254+ newsk->sk_nid = sk->sk_nid;
2ba6f0dd 23255+
4bf69007
AM
23256 /*
23257 * Increment the counter in the same struct proto as the master
23258 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
c2806d43 23259@@ -2452,6 +2471,12 @@ void sock_init_data(struct socket *sock,
3cc86a71
AM
23260 seqlock_init(&sk->sk_stamp_seq);
23261 #endif
4bf69007
AM
23262
23263+ set_vx_info(&sk->sk_vx_info, current_vx_info());
23264+ sk->sk_xid = vx_current_xid();
23265+ vx_sock_inc(sk);
23266+ set_nx_info(&sk->sk_nx_info, current_nx_info());
23267+ sk->sk_nid = nx_current_nid();
2ba6f0dd 23268+
c2e5f7c8
JR
23269 #ifdef CONFIG_NET_RX_BUSY_POLL
23270 sk->sk_napi_id = 0;
23271 sk->sk_ll_usec = sysctl_net_busy_read;
c2806d43
AM
23272diff -urNp -x '*.orig' linux-4.4/net/ipv4/af_inet.c linux-4.4/net/ipv4/af_inet.c
23273--- linux-4.4/net/ipv4/af_inet.c 2021-02-24 16:56:12.069095231 +0100
23274+++ linux-4.4/net/ipv4/af_inet.c 2021-02-24 16:56:24.622823916 +0100
927ca606 23275@@ -308,10 +308,15 @@ lookup_protocol:
4bf69007
AM
23276 }
23277
23278 err = -EPERM;
23279+ if ((protocol == IPPROTO_ICMP) &&
23280+ nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
23281+ goto override;
927ca606 23282+
b00e13aa
AM
23283 if (sock->type == SOCK_RAW && !kern &&
23284 !ns_capable(net->user_ns, CAP_NET_RAW))
4bf69007 23285 goto out_rcu_unlock;
927ca606 23286
a4a22af8
AM
23287+override:
23288 sock->ops = answer->ops;
23289 answer_prot = answer->prot;
bb20add7 23290 answer_flags = answer->flags;
927ca606 23291@@ -425,6 +430,7 @@ int inet_bind(struct socket *sock, struc
4bf69007
AM
23292 struct sock *sk = sock->sk;
23293 struct inet_sock *inet = inet_sk(sk);
b00e13aa 23294 struct net *net = sock_net(sk);
927ca606 23295+ struct nx_v4_sock_addr nsa;
4bf69007
AM
23296 unsigned short snum;
23297 int chk_addr_ret;
927ca606
AM
23298 u32 tb_id = RT_TABLE_LOCAL;
23299@@ -450,7 +456,11 @@ int inet_bind(struct socket *sock, struc
4bf69007
AM
23300 }
23301
927ca606
AM
23302 tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
23303- chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
4bf69007
AM
23304+ err = v4_map_sock_addr(inet, addr, &nsa);
23305+ if (err)
23306+ goto out;
2ba6f0dd 23307+
927ca606 23308+ chk_addr_ret = inet_addr_type_table(net, nsa.saddr, tb_id);
4bf69007
AM
23309
23310 /* Not specified by any standard per-se, however it breaks too
23311 * many applications when removed. It is unfortunate since
927ca606 23312@@ -462,7 +472,7 @@ int inet_bind(struct socket *sock, struc
4bf69007 23313 err = -EADDRNOTAVAIL;
bb20add7 23314 if (!net->ipv4.sysctl_ip_nonlocal_bind &&
4bf69007
AM
23315 !(inet->freebind || inet->transparent) &&
23316- addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
23317+ nsa.saddr != htonl(INADDR_ANY) &&
23318 chk_addr_ret != RTN_LOCAL &&
23319 chk_addr_ret != RTN_MULTICAST &&
23320 chk_addr_ret != RTN_BROADCAST)
927ca606 23321@@ -488,7 +498,7 @@ int inet_bind(struct socket *sock, struc
4bf69007
AM
23322 if (sk->sk_state != TCP_CLOSE || inet->inet_num)
23323 goto out_release_sock;
23324
23325- inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
23326+ v4_set_sock_addr(inet, &nsa);
23327 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
23328 inet->inet_saddr = 0; /* Use device */
23329
927ca606 23330@@ -708,11 +718,13 @@ int inet_getname(struct socket *sock, st
4bf69007
AM
23331 peer == 1))
23332 return -ENOTCONN;
23333 sin->sin_port = inet->inet_dport;
23334- sin->sin_addr.s_addr = inet->inet_daddr;
23335+ sin->sin_addr.s_addr =
23336+ nx_map_sock_lback(sk->sk_nx_info, inet->inet_daddr);
23337 } else {
23338 __be32 addr = inet->inet_rcv_saddr;
23339 if (!addr)
23340 addr = inet->inet_saddr;
23341+ addr = nx_map_sock_lback(sk->sk_nx_info, addr);
23342 sin->sin_port = inet->inet_sport;
23343 sin->sin_addr.s_addr = addr;
23344 }
927ca606
AM
23345@@ -896,6 +908,7 @@ static int inet_compat_ioctl(struct sock
23346 return err;
23347 }
23348 #endif
23349+#include <linux/vs_limit.h>
23350
23351 const struct proto_ops inet_stream_ops = {
23352 .family = PF_INET,
c2806d43
AM
23353diff -urNp -x '*.orig' linux-4.4/net/ipv4/arp.c linux-4.4/net/ipv4/arp.c
23354--- linux-4.4/net/ipv4/arp.c 2021-02-24 16:56:12.069095231 +0100
23355+++ linux-4.4/net/ipv4/arp.c 2021-02-24 16:56:24.622823916 +0100
8931d859 23356@@ -1307,6 +1307,7 @@ static void arp_format_neigh_entry(struc
4bf69007
AM
23357 struct net_device *dev = n->dev;
23358 int hatype = dev->type;
23359
23360+ /* FIXME: check for network context */
23361 read_lock(&n->lock);
23362 /* Convert hardware address to XX:XX:XX:XX ... form. */
23363 #if IS_ENABLED(CONFIG_AX25)
8931d859 23364@@ -1338,6 +1339,7 @@ static void arp_format_pneigh_entry(stru
4bf69007
AM
23365 int hatype = dev ? dev->type : 0;
23366 char tbuf[16];
23367
23368+ /* FIXME: check for network context */
23369 sprintf(tbuf, "%pI4", n->key);
23370 seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
23371 tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
c2806d43
AM
23372diff -urNp -x '*.orig' linux-4.4/net/ipv4/devinet.c linux-4.4/net/ipv4/devinet.c
23373--- linux-4.4/net/ipv4/devinet.c 2021-02-24 16:56:12.069095231 +0100
23374+++ linux-4.4/net/ipv4/devinet.c 2021-02-24 16:56:24.622823916 +0100
23375@@ -547,6 +547,7 @@ struct in_device *inetdev_by_index(struc
4bf69007
AM
23376 }
23377 EXPORT_SYMBOL(inetdev_by_index);
23378
2ba6f0dd 23379+
4bf69007
AM
23380 /* Called only from RTNL semaphored context. No locks. */
23381
23382 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
c2806d43 23383@@ -1006,6 +1007,8 @@ int devinet_ioctl(struct net *net, unsig
4bf69007
AM
23384
23385 in_dev = __in_dev_get_rtnl(dev);
23386 if (in_dev) {
23387+ struct nx_info *nxi = current_nx_info();
2ba6f0dd 23388+
4bf69007
AM
23389 if (tryaddrmatch) {
23390 /* Matthias Andree */
23391 /* compare label and address (4.4BSD style) */
c2806d43 23392@@ -1014,6 +1017,8 @@ int devinet_ioctl(struct net *net, unsig
4bf69007
AM
23393 This is checked above. */
23394 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
23395 ifap = &ifa->ifa_next) {
23396+ if (!nx_v4_ifa_visible(nxi, ifa))
23397+ continue;
23398 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
23399 sin_orig.sin_addr.s_addr ==
23400 ifa->ifa_local) {
c2806d43 23401@@ -1026,9 +1031,12 @@ int devinet_ioctl(struct net *net, unsig
4bf69007
AM
23402 comparing just the label */
23403 if (!ifa) {
23404 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
23405- ifap = &ifa->ifa_next)
23406+ ifap = &ifa->ifa_next) {
23407+ if (!nx_v4_ifa_visible(nxi, ifa))
23408+ continue;
23409 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
23410 break;
23411+ }
23412 }
23413 }
23414
c2806d43 23415@@ -1182,6 +1190,8 @@ static int inet_gifconf(struct net_devic
4bf69007
AM
23416 goto out;
23417
23418 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
23419+ if (!nx_v4_ifa_visible(current_nx_info(), ifa))
23420+ continue;
23421 if (!buf) {
23422 done += sizeof(ifr);
23423 continue;
c2806d43 23424@@ -1582,6 +1592,7 @@ static int inet_dump_ifaddr(struct sk_bu
4bf69007
AM
23425 struct net_device *dev;
23426 struct in_device *in_dev;
23427 struct in_ifaddr *ifa;
23428+ struct sock *sk = skb->sk;
23429 struct hlist_head *head;
4bf69007 23430
b00e13aa 23431 s_h = cb->args[0];
c2806d43 23432@@ -1605,6 +1616,8 @@ static int inet_dump_ifaddr(struct sk_bu
4bf69007
AM
23433
23434 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
23435 ifa = ifa->ifa_next, ip_idx++) {
23436+ if (sk && !nx_v4_ifa_visible(sk->sk_nx_info, ifa))
23437+ continue;
23438 if (ip_idx < s_ip_idx)
23439 continue;
23440 if (inet_fill_ifaddr(skb, ifa,
c2806d43
AM
23441diff -urNp -x '*.orig' linux-4.4/net/ipv4/fib_trie.c linux-4.4/net/ipv4/fib_trie.c
23442--- linux-4.4/net/ipv4/fib_trie.c 2021-02-24 16:56:12.072428669 +0100
23443+++ linux-4.4/net/ipv4/fib_trie.c 2021-02-24 16:56:24.622823916 +0100
23444@@ -2606,6 +2606,7 @@ static int fib_route_seq_show(struct seq
927ca606
AM
23445
23446 seq_setwidth(seq, 127);
23447
23448+ /* FIXME: check for network context? */
23449 if (fi)
23450 seq_printf(seq,
23451 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
c2806d43
AM
23452diff -urNp -x '*.orig' linux-4.4/net/ipv4/inet_connection_sock.c linux-4.4/net/ipv4/inet_connection_sock.c
23453--- linux-4.4/net/ipv4/inet_connection_sock.c 2021-02-24 16:56:12.072428669 +0100
23454+++ linux-4.4/net/ipv4/inet_connection_sock.c 2021-02-24 16:56:24.622823916 +0100
927ca606 23455@@ -43,6 +43,37 @@ void inet_get_local_port_range(struct ne
4bf69007
AM
23456 }
23457 EXPORT_SYMBOL(inet_get_local_port_range);
23458
23459+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
23460+{
c2e5f7c8
JR
23461+ __be32 sk1_rcv_saddr = sk1->sk_rcv_saddr,
23462+ sk2_rcv_saddr = sk2->sk_rcv_saddr;
2ba6f0dd 23463+
4bf69007
AM
23464+ if (inet_v6_ipv6only(sk2))
23465+ return 0;
2ba6f0dd 23466+
4bf69007
AM
23467+ if (sk1_rcv_saddr &&
23468+ sk2_rcv_saddr &&
23469+ sk1_rcv_saddr == sk2_rcv_saddr)
23470+ return 1;
2ba6f0dd 23471+
4bf69007
AM
23472+ if (sk1_rcv_saddr &&
23473+ !sk2_rcv_saddr &&
23474+ v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, NXA_MASK_BIND))
23475+ return 1;
2ba6f0dd 23476+
4bf69007
AM
23477+ if (sk2_rcv_saddr &&
23478+ !sk1_rcv_saddr &&
23479+ v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, NXA_MASK_BIND))
23480+ return 1;
2ba6f0dd 23481+
4bf69007
AM
23482+ if (!sk1_rcv_saddr &&
23483+ !sk2_rcv_saddr &&
23484+ nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info))
23485+ return 1;
2ba6f0dd 23486+
4bf69007
AM
23487+ return 0;
23488+}
2ba6f0dd 23489+
4bf69007
AM
23490 int inet_csk_bind_conflict(const struct sock *sk,
23491 const struct inet_bind_bucket *tb, bool relax)
23492 {
927ca606 23493@@ -70,15 +101,13 @@ int inet_csk_bind_conflict(const struct
b00e13aa
AM
23494 (sk2->sk_state != TCP_TIME_WAIT &&
23495 !uid_eq(uid, sock_i_uid(sk2))))) {
c2e5f7c8
JR
23496
23497- if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
23498- sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
4bf69007
AM
23499+ if (ipv4_rcv_saddr_equal(sk, sk2))
23500 break;
23501 }
23502 if (!relax && reuse && sk2->sk_reuse &&
b00e13aa 23503 sk2->sk_state != TCP_LISTEN) {
c2e5f7c8
JR
23504
23505- if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
23506- sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
b00e13aa
AM
23507+ if (ipv4_rcv_saddr_equal(sk, sk2))
23508 break;
23509 }
23510 }
c2806d43
AM
23511diff -urNp -x '*.orig' linux-4.4/net/ipv4/inet_diag.c linux-4.4/net/ipv4/inet_diag.c
23512--- linux-4.4/net/ipv4/inet_diag.c 2021-02-24 16:56:12.072428669 +0100
23513+++ linux-4.4/net/ipv4/inet_diag.c 2021-02-24 16:56:24.622823916 +0100
4bf69007
AM
23514@@ -31,6 +31,8 @@
23515
23516 #include <linux/inet.h>
23517 #include <linux/stddef.h>
23518+#include <linux/vs_network.h>
23519+#include <linux/vs_inet.h>
23520
23521 #include <linux/inet_diag.h>
23522 #include <linux/sock_diag.h>
8931d859
AM
23523@@ -85,8 +87,8 @@ static void inet_diag_msg_common_fill(st
23524 memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
23525 memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
23526
23527- r->id.idiag_src[0] = sk->sk_rcv_saddr;
23528- r->id.idiag_dst[0] = sk->sk_daddr;
23529+ r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, sk->sk_rcv_saddr);
23530+ r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, sk->sk_daddr);
23531 }
23532 }
23533
3cc86a71 23534@@ -768,6 +770,9 @@ void inet_diag_dump_icsk(struct inet_has
4bf69007
AM
23535 if (!net_eq(sock_net(sk), net))
23536 continue;
23537
23538+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23539+ continue;
8931d859 23540+
4bf69007
AM
23541 if (num < s_num) {
23542 num++;
23543 continue;
3cc86a71 23544@@ -829,6 +834,8 @@ skip_listen_ht:
4bf69007
AM
23545
23546 if (!net_eq(sock_net(sk), net))
23547 continue;
23548+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23549+ continue;
23550 if (num < s_num)
23551 goto next_normal;
c2e5f7c8 23552 state = (sk->sk_state == TCP_TIME_WAIT) ?
c2806d43
AM
23553diff -urNp -x '*.orig' linux-4.4/net/ipv4/inet_hashtables.c linux-4.4/net/ipv4/inet_hashtables.c
23554--- linux-4.4/net/ipv4/inet_hashtables.c 2021-02-24 16:56:12.072428669 +0100
23555+++ linux-4.4/net/ipv4/inet_hashtables.c 2021-02-24 16:56:24.622823916 +0100
927ca606 23556@@ -23,6 +23,7 @@
4bf69007
AM
23557 #include <net/inet_connection_sock.h>
23558 #include <net/inet_hashtables.h>
23559 #include <net/secure_seq.h>
23560+#include <net/route.h>
23561 #include <net/ip.h>
23562
927ca606 23563 static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
c2806d43 23564@@ -184,6 +185,11 @@ static inline int compute_score(struct s
4bf69007
AM
23565 if (rcv_saddr != daddr)
23566 return -1;
b00e13aa 23567 score += 4;
4bf69007
AM
23568+ } else {
23569+ /* block non nx_info ips */
23570+ if (!v4_addr_in_nx_info(sk->sk_nx_info,
23571+ daddr, NXA_MASK_BIND))
23572+ return -1;
23573 }
23574 if (sk->sk_bound_dev_if) {
23575 if (sk->sk_bound_dev_if != dif)
c2806d43 23576@@ -203,7 +209,6 @@ static inline int compute_score(struct s
4bf69007
AM
23577 * wildcarded during the search since they can never be otherwise.
23578 */
23579
23580-
23581 struct sock *__inet_lookup_listener(struct net *net,
23582 struct inet_hashinfo *hashinfo,
b00e13aa 23583 const __be32 saddr, __be16 sport,
c2806d43 23584@@ -239,6 +244,7 @@ begin:
b00e13aa 23585 phash = next_pseudo_random32(phash);
4bf69007
AM
23586 }
23587 }
2ba6f0dd 23588+
4bf69007
AM
23589 /*
23590 * if the nulls value we got at the end of this lookup is
23591 * not the expected one, we must restart lookup.
c2806d43
AM
23592diff -urNp -x '*.orig' linux-4.4/net/ipv4/netfilter.c linux-4.4/net/ipv4/netfilter.c
23593--- linux-4.4/net/ipv4/netfilter.c 2021-02-24 16:56:12.079095546 +0100
23594+++ linux-4.4/net/ipv4/netfilter.c 2021-02-24 16:56:24.622823916 +0100
09be7631 23595@@ -11,7 +11,7 @@
4bf69007
AM
23596 #include <linux/skbuff.h>
23597 #include <linux/gfp.h>
23598 #include <linux/export.h>
23599-#include <net/route.h>
23600+// #include <net/route.h>
23601 #include <net/xfrm.h>
23602 #include <net/ip.h>
23603 #include <net/netfilter/nf_queue.h>
c2806d43
AM
23604diff -urNp -x '*.orig' linux-4.4/net/ipv4/raw.c linux-4.4/net/ipv4/raw.c
23605--- linux-4.4/net/ipv4/raw.c 2021-02-24 16:56:12.082428984 +0100
23606+++ linux-4.4/net/ipv4/raw.c 2021-02-24 16:56:24.622823916 +0100
927ca606 23607@@ -126,7 +126,7 @@ static struct sock *__raw_v4_lookup(stru
4bf69007
AM
23608
23609 if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
23610 !(inet->inet_daddr && inet->inet_daddr != raddr) &&
23611- !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
23612+ v4_sock_addr_match(sk->sk_nx_info, inet, laddr) &&
23613 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
23614 goto found; /* gotcha */
23615 }
927ca606
AM
23616@@ -416,6 +416,12 @@ static int raw_send_hdrinc(struct sock *
23617 skb_transport_header(skb))->type);
23618 }
4bf69007
AM
23619
23620+ err = -EPERM;
23621+ if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
23622+ sk->sk_nx_info &&
23623+ !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
23624+ goto error_free;
2ba6f0dd 23625+
927ca606
AM
23626 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
23627 net, sk, skb, NULL, rt->dst.dev,
23628 dst_output);
c2806d43 23629@@ -628,6 +634,16 @@ static int raw_sendmsg(struct sock *sk,
4bf69007
AM
23630 goto done;
23631 }
23632
23633+ if (sk->sk_nx_info) {
23634+ rt = ip_v4_find_src(sock_net(sk), sk->sk_nx_info, &fl4);
23635+ if (IS_ERR(rt)) {
23636+ err = PTR_ERR(rt);
23637+ rt = NULL;
23638+ goto done;
23639+ }
23640+ ip_rt_put(rt);
23641+ }
2ba6f0dd 23642+
4bf69007 23643 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
927ca606 23644 rt = ip_route_output_flow(net, &fl4, sk);
4bf69007 23645 if (IS_ERR(rt)) {
6aa3bfb4 23646@@ -722,6 +722,7 @@ static int raw_bind(struct sock *sk, str
4bf69007
AM
23647 {
23648 struct inet_sock *inet = inet_sk(sk);
23649 struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
23650+ struct nx_v4_sock_addr nsa = { 0 };
23651 int ret = -EINVAL;
23652 int chk_addr_ret;
23653
6aa3bfb4
AM
23654@@ -728,12 +728,13 @@ static int raw_bind(struct sock *sk, str
23655 lock_sock(sk);
4bf69007
AM
23656 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
23657 goto out;
23658- chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
6aa3bfb4
AM
23659+ v4_map_sock_addr(inet, addr, &nsa);
23660+ chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
4bf69007
AM
23661 ret = -EADDRNOTAVAIL;
23662- if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
6aa3bfb4 23663+ if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
4bf69007
AM
23664 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
23665 goto out;
23666- inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
6aa3bfb4 23667+ v4_set_sock_addr(inet, &nsa);
4bf69007
AM
23668 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
23669 inet->inet_saddr = 0; /* Use device */
23670 sk_dst_reset(sk);
c2806d43 23671@@ -765,7 +783,8 @@ static int raw_recvmsg(struct sock *sk,
4bf69007
AM
23672 /* Copy the address. */
23673 if (sin) {
23674 sin->sin_family = AF_INET;
23675- sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
23676+ sin->sin_addr.s_addr =
23677+ nx_map_sock_lback(sk->sk_nx_info, ip_hdr(skb)->saddr);
23678 sin->sin_port = 0;
23679 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
c2e5f7c8 23680 *addr_len = sizeof(*sin);
c2806d43 23681@@ -961,7 +980,8 @@ static struct sock *raw_get_first(struct
b00e13aa
AM
23682 for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
23683 ++state->bucket) {
23684 sk_for_each(sk, &state->h->ht[state->bucket])
4bf69007
AM
23685- if (sock_net(sk) == seq_file_net(seq))
23686+ if ((sock_net(sk) == seq_file_net(seq)) &&
b00e13aa 23687+ nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
4bf69007
AM
23688 goto found;
23689 }
23690 sk = NULL;
c2806d43 23691@@ -977,7 +997,8 @@ static struct sock *raw_get_next(struct
4bf69007
AM
23692 sk = sk_next(sk);
23693 try_again:
23694 ;
23695- } while (sk && sock_net(sk) != seq_file_net(seq));
23696+ } while (sk && ((sock_net(sk) != seq_file_net(seq)) ||
23697+ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
23698
23699 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
23700 sk = sk_head(&state->h->ht[state->bucket]);
c2806d43
AM
23701diff -urNp -x '*.orig' linux-4.4/net/ipv4/route.c linux-4.4/net/ipv4/route.c
23702--- linux-4.4/net/ipv4/route.c 2021-02-24 16:56:12.082428984 +0100
23703+++ linux-4.4/net/ipv4/route.c 2021-02-24 16:56:24.626157354 +0100
23704@@ -2282,7 +2282,7 @@ struct rtable *__ip_route_output_key_has
4bf69007
AM
23705
23706
23707 if (fl4->flowi4_oif) {
23708- dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
23709+ dev_out = dev_get_by_index_real_rcu(net, fl4->flowi4_oif);
23710 rth = ERR_PTR(-ENODEV);
927ca606 23711 if (!dev_out)
4bf69007 23712 goto out;
c2806d43
AM
23713diff -urNp -x '*.orig' linux-4.4/net/ipv4/tcp.c linux-4.4/net/ipv4/tcp.c
23714--- linux-4.4/net/ipv4/tcp.c 2021-02-24 16:56:12.082428984 +0100
23715+++ linux-4.4/net/ipv4/tcp.c 2021-02-24 16:56:24.626157354 +0100
927ca606 23716@@ -269,6 +269,7 @@
4bf69007
AM
23717 #include <linux/crypto.h>
23718 #include <linux/time.h>
23719 #include <linux/slab.h>
23720+#include <linux/in.h>
23721
23722 #include <net/icmp.h>
23723 #include <net/inet_common.h>
c2806d43
AM
23724diff -urNp -x '*.orig' linux-4.4/net/ipv4/tcp_ipv4.c linux-4.4/net/ipv4/tcp_ipv4.c
23725--- linux-4.4/net/ipv4/tcp_ipv4.c 2021-02-24 16:56:12.085762422 +0100
23726+++ linux-4.4/net/ipv4/tcp_ipv4.c 2021-02-24 16:56:24.626157354 +0100
23727@@ -1900,6 +1900,10 @@ static void *listening_get_next(struct s
927ca606 23728 sk = sk_nulls_next(sk);
4bf69007
AM
23729 get_sk:
23730 sk_nulls_for_each_from(sk, node) {
23731+ vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
23732+ sk, sk->sk_nid, nx_current_nid());
23733+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23734+ continue;
23735 if (!net_eq(sock_net(sk), net))
23736 continue;
23737 if (sk->sk_family == st->family) {
c2806d43 23738@@ -1964,6 +1968,11 @@ static void *established_get_first(struc
4bf69007
AM
23739
23740 spin_lock_bh(lock);
23741 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
23742+ vxdprintk(VXD_CBIT(net, 6),
23743+ "sk,egf: %p [#%d] (from %d)",
23744+ sk, sk->sk_nid, nx_current_nid());
23745+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23746+ continue;
23747 if (sk->sk_family != st->family ||
23748 !net_eq(sock_net(sk), net)) {
23749 continue;
c2806d43 23750@@ -1990,6 +1999,11 @@ static void *established_get_next(struct
c2e5f7c8 23751 sk = sk_nulls_next(sk);
4bf69007
AM
23752
23753 sk_nulls_for_each_from(sk, node) {
23754+ vxdprintk(VXD_CBIT(net, 6),
23755+ "sk,egn: %p [#%d] (from %d)",
23756+ sk, sk->sk_nid, nx_current_nid());
23757+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23758+ continue;
23759 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
c2e5f7c8 23760 return sk;
4bf69007 23761 }
c2806d43 23762@@ -2181,9 +2195,9 @@ static void get_openreq4(const struct re
4bf69007 23763 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
c2e5f7c8 23764 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
4bf69007 23765 i,
c2e5f7c8
JR
23766- ireq->ir_loc_addr,
23767+ nx_map_sock_lback(current_nx_info(), ireq->ir_loc_addr),
927ca606 23768 ireq->ir_num,
c2e5f7c8
JR
23769- ireq->ir_rmt_addr,
23770+ nx_map_sock_lback(current_nx_info(), ireq->ir_rmt_addr),
23771 ntohs(ireq->ir_rmt_port),
4bf69007
AM
23772 TCP_SYN_RECV,
23773 0, 0, /* could print option size, but that is af dependent. */
c2806d43 23774@@ -2206,8 +2220,8 @@ static void get_tcp4_sock(struct sock *s
4bf69007
AM
23775 const struct inet_connection_sock *icsk = inet_csk(sk);
23776 const struct inet_sock *inet = inet_sk(sk);
927ca606 23777 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
4bf69007
AM
23778- __be32 dest = inet->inet_daddr;
23779- __be32 src = inet->inet_rcv_saddr;
23780+ __be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
23781+ __be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
23782 __u16 destp = ntohs(inet->inet_dport);
23783 __u16 srcp = ntohs(inet->inet_sport);
23784 int rx_queue;
c2806d43 23785@@ -2266,8 +2280,8 @@ static void get_timewait4_sock(const str
927ca606 23786 __be32 dest, src;
4bf69007 23787 __u16 destp, srcp;
4bf69007
AM
23788
23789- dest = tw->tw_daddr;
23790- src = tw->tw_rcv_saddr;
23791+ dest = nx_map_sock_lback(current_nx_info(), tw->tw_daddr);
23792+ src = nx_map_sock_lback(current_nx_info(), tw->tw_rcv_saddr);
23793 destp = ntohs(tw->tw_dport);
23794 srcp = ntohs(tw->tw_sport);
23795
c2806d43
AM
23796diff -urNp -x '*.orig' linux-4.4/net/ipv4/tcp_minisocks.c linux-4.4/net/ipv4/tcp_minisocks.c
23797--- linux-4.4/net/ipv4/tcp_minisocks.c 2021-02-24 16:56:12.089095860 +0100
23798+++ linux-4.4/net/ipv4/tcp_minisocks.c 2021-02-24 16:56:24.626157354 +0100
4bf69007
AM
23799@@ -23,6 +23,9 @@
23800 #include <linux/slab.h>
23801 #include <linux/sysctl.h>
23802 #include <linux/workqueue.h>
23803+#include <linux/vs_limit.h>
23804+#include <linux/vs_socket.h>
23805+#include <linux/vs_context.h>
23806 #include <net/tcp.h>
23807 #include <net/inet_common.h>
23808 #include <net/xfrm.h>
8931d859 23809@@ -292,6 +295,11 @@ void tcp_time_wait(struct sock *sk, int
b00e13aa 23810 tcptw->tw_ts_offset = tp->tsoffset;
927ca606 23811 tcptw->tw_last_oow_ack_time = 0;
4bf69007
AM
23812
23813+ tw->tw_xid = sk->sk_xid;
23814+ tw->tw_vx_info = NULL;
23815+ tw->tw_nid = sk->sk_nid;
23816+ tw->tw_nx_info = NULL;
2ba6f0dd 23817+
4bf69007
AM
23818 #if IS_ENABLED(CONFIG_IPV6)
23819 if (tw->tw_family == PF_INET6) {
23820 struct ipv6_pinfo *np = inet6_sk(sk);
c2806d43
AM
23821diff -urNp -x '*.orig' linux-4.4/net/ipv4/udp.c linux-4.4/net/ipv4/udp.c
23822--- linux-4.4/net/ipv4/udp.c 2021-02-24 16:56:12.089095860 +0100
23823+++ linux-4.4/net/ipv4/udp.c 2021-02-24 16:56:24.626157354 +0100
bb20add7 23824@@ -309,14 +309,7 @@ fail:
4bf69007
AM
23825 }
23826 EXPORT_SYMBOL(udp_lib_get_port);
23827
23828-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
23829-{
23830- struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
23831-
23832- return (!ipv6_only_sock(sk2) &&
23833- (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
23834- inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
23835-}
23836+extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
23837
927ca606
AM
23838 static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
23839 unsigned int port)
23840@@ -355,6 +348,11 @@ static inline int compute_score(struct s
23841 if (inet->inet_rcv_saddr != daddr)
23842 return -1;
23843 score += 4;
4bf69007
AM
23844+ } else {
23845+ /* block non nx_info ips */
23846+ if (!v4_addr_in_nx_info(sk->sk_nx_info,
23847+ daddr, NXA_MASK_BIND))
23848+ return -1;
927ca606
AM
23849 }
23850
23851 if (inet->inet_daddr) {
23852@@ -489,6 +487,7 @@ begin:
4bf69007
AM
23853 return result;
23854 }
23855
2ba6f0dd 23856+
4bf69007
AM
23857 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
23858 * harder than this. -DaveM
23859 */
927ca606 23860@@ -535,6 +534,11 @@ begin:
4bf69007
AM
23861 sk_nulls_for_each_rcu(sk, node, &hslot->head) {
23862 score = compute_score(sk, net, saddr, hnum, sport,
23863 daddr, dport, dif);
23864+ /* FIXME: disabled?
23865+ if (score == 9) {
23866+ result = sk;
23867+ break;
23868+ } else */
23869 if (score > badness) {
23870 result = sk;
23871 badness = score;
927ca606 23872@@ -559,6 +563,7 @@ begin:
4bf69007
AM
23873 if (get_nulls_value(node) != slot)
23874 goto begin;
23875
2ba6f0dd 23876+
4bf69007
AM
23877 if (result) {
23878 if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
23879 result = NULL;
927ca606 23880@@ -568,6 +573,7 @@ begin:
4bf69007
AM
23881 goto begin;
23882 }
23883 }
2ba6f0dd 23884+
4bf69007
AM
23885 rcu_read_unlock();
23886 return result;
23887 }
927ca606 23888@@ -602,7 +608,7 @@ static inline bool __udp_is_mcast_sock(s
c2e5f7c8
JR
23889 udp_sk(sk)->udp_port_hash != hnum ||
23890 (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
23891 (inet->inet_dport != rmt_port && inet->inet_dport) ||
23892- (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
23893+ !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr) ||
23894 ipv6_only_sock(sk) ||
23895 (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
23896 return false;
8931d859 23897@@ -1035,6 +1041,16 @@ int udp_sendmsg(struct sock *sk, struct
927ca606
AM
23898 goto out;
23899 }
4bf69007
AM
23900
23901+ if (sk->sk_nx_info) {
23902+ rt = ip_v4_find_src(net, sk->sk_nx_info, fl4);
23903+ if (IS_ERR(rt)) {
23904+ err = PTR_ERR(rt);
23905+ rt = NULL;
23906+ goto out;
23907+ }
23908+ ip_rt_put(rt);
23909+ }
2ba6f0dd 23910+
4bf69007
AM
23911 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
23912 rt = ip_route_output_flow(net, fl4, sk);
23913 if (IS_ERR(rt)) {
8931d859 23914@@ -1340,7 +1356,8 @@ try_again:
4bf69007
AM
23915 if (sin) {
23916 sin->sin_family = AF_INET;
23917 sin->sin_port = udp_hdr(skb)->source;
23918- sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
23919+ sin->sin_addr.s_addr = nx_map_sock_lback(
23920+ skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
23921 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
c2e5f7c8 23922 *addr_len = sizeof(*sin);
4bf69007 23923 }
c2806d43 23924@@ -2329,6 +2346,8 @@ static struct sock *udp_get_first(struct
4bf69007
AM
23925 sk_nulls_for_each(sk, node, &hslot->head) {
23926 if (!net_eq(sock_net(sk), net))
23927 continue;
23928+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23929+ continue;
23930 if (sk->sk_family == state->family)
23931 goto found;
23932 }
c2806d43 23933@@ -2346,7 +2365,9 @@ static struct sock *udp_get_next(struct
4bf69007
AM
23934
23935 do {
23936 sk = sk_nulls_next(sk);
23937- } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
23938+ } while (sk && (!net_eq(sock_net(sk), net) ||
23939+ sk->sk_family != state->family ||
23940+ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
23941
23942 if (!sk) {
23943 if (state->bucket <= state->udp_table->mask)
c2806d43 23944@@ -2442,8 +2463,8 @@ static void udp4_format_sock(struct sock
c2e5f7c8 23945 int bucket)
4bf69007
AM
23946 {
23947 struct inet_sock *inet = inet_sk(sp);
23948- __be32 dest = inet->inet_daddr;
23949- __be32 src = inet->inet_rcv_saddr;
23950+ __be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
23951+ __be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
23952 __u16 destp = ntohs(inet->inet_dport);
23953 __u16 srcp = ntohs(inet->inet_sport);
23954
c2806d43
AM
23955diff -urNp -x '*.orig' linux-4.4/net/ipv4/udp_diag.c linux-4.4/net/ipv4/udp_diag.c
23956--- linux-4.4/net/ipv4/udp_diag.c 2016-01-11 00:01:32.000000000 +0100
23957+++ linux-4.4/net/ipv4/udp_diag.c 2021-02-24 16:56:24.626157354 +0100
8931d859
AM
23958@@ -118,6 +118,8 @@ static void udp_dump(struct udp_table *t
23959
23960 if (!net_eq(sock_net(sk), net))
23961 continue;
23962+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23963+ continue;
23964 if (num < s_num)
23965 goto next;
23966 if (!(r->idiag_states & (1 << sk->sk_state)))
c2806d43
AM
23967diff -urNp -x '*.orig' linux-4.4/net/ipv6/addrconf.c linux-4.4/net/ipv6/addrconf.c
23968--- linux-4.4/net/ipv6/addrconf.c 2021-02-24 16:56:12.092429299 +0100
23969+++ linux-4.4/net/ipv6/addrconf.c 2021-02-24 16:56:24.626157354 +0100
927ca606 23970@@ -92,6 +92,8 @@
4bf69007
AM
23971 #include <linux/proc_fs.h>
23972 #include <linux/seq_file.h>
23973 #include <linux/export.h>
23974+#include <linux/vs_network.h>
23975+#include <linux/vs_inet6.h>
23976
23977 /* Set to 3 to get tracing... */
23978 #define ACONF_DEBUG 2
c2806d43 23979@@ -1453,7 +1455,8 @@ static int __ipv6_dev_get_saddr(struct n
927ca606
AM
23980 struct ipv6_saddr_dst *dst,
23981 struct inet6_dev *idev,
23982 struct ipv6_saddr_score *scores,
23983- int hiscore_idx)
23984+ int hiscore_idx,
23985+ struct nx_info *nxi)
23986 {
23987 struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
23988
c2806d43 23989@@ -1483,6 +1486,8 @@ static int __ipv6_dev_get_saddr(struct n
927ca606
AM
23990 idev->dev->name);
23991 continue;
23992 }
23993+ if (!v6_addr_in_nx_info(nxi, &score->ifa->addr, -1))
23994+ continue;
23995
23996 score->rule = -1;
23997 bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
c2806d43 23998@@ -1530,7 +1535,7 @@ out:
4bf69007
AM
23999
24000 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
24001 const struct in6_addr *daddr, unsigned int prefs,
24002- struct in6_addr *saddr)
24003+ struct in6_addr *saddr, struct nx_info *nxi)
24004 {
927ca606
AM
24005 struct ipv6_saddr_score scores[2], *hiscore;
24006 struct ipv6_saddr_dst dst;
c2806d43 24007@@ -1579,13 +1584,15 @@ int ipv6_dev_get_saddr(struct net *net,
927ca606
AM
24008
24009 if (use_oif_addr) {
24010 if (idev)
24011- hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
24012+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst,
24013+ idev, scores, hiscore_idx, nxi);
24014 } else {
24015 for_each_netdev_rcu(net, dev) {
24016 idev = __in6_dev_get(dev);
24017 if (!idev)
4bf69007 24018 continue;
927ca606
AM
24019- hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
24020+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst,
24021+ idev, scores, hiscore_idx, nxi);
24022 }
24023 }
24024 rcu_read_unlock();
c2806d43 24025@@ -3866,7 +3873,10 @@ static void if6_seq_stop(struct seq_file
4bf69007
AM
24026 static int if6_seq_show(struct seq_file *seq, void *v)
24027 {
24028 struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
24029- seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
2ba6f0dd 24030+
4bf69007
AM
24031+ if (nx_check(0, VS_ADMIN|VS_WATCH) ||
24032+ v6_addr_in_nx_info(current_nx_info(), &ifp->addr, -1))
24033+ seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
24034 &ifp->addr,
24035 ifp->idev->dev->ifindex,
24036 ifp->prefix_len,
c2806d43 24037@@ -4450,6 +4460,11 @@ static int in6_dump_addrs(struct inet6_d
4bf69007
AM
24038 struct ifacaddr6 *ifaca;
24039 int err = 1;
24040 int ip_idx = *p_ip_idx;
24041+ struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
2ba6f0dd 24042+
4bf69007
AM
24043+ /* disable ipv6 on non v6 guests */
24044+ if (nxi && !nx_info_has_v6(nxi))
24045+ return skb->len;
24046
24047 read_lock_bh(&idev->lock);
24048 switch (type) {
c2806d43 24049@@ -4460,6 +4475,8 @@ static int in6_dump_addrs(struct inet6_d
4bf69007 24050 list_for_each_entry(ifa, &idev->addr_list, if_list) {
1a30d8a3
JR
24051 if (ip_idx < s_ip_idx)
24052 goto next;
927ca606 24053+ if (!v6_addr_in_nx_info(nxi, &ifa->addr, -1))
1a30d8a3 24054+ goto next;
4bf69007
AM
24055 err = inet6_fill_ifaddr(skb, ifa,
24056 NETLINK_CB(cb->skb).portid,
24057 cb->nlh->nlmsg_seq,
c2806d43 24058@@ -4479,6 +4496,8 @@ next:
4bf69007
AM
24059 ifmca = ifmca->next, ip_idx++) {
24060 if (ip_idx < s_ip_idx)
24061 continue;
927ca606
AM
24062+ if (!v6_addr_in_nx_info(nxi, &ifmca->mca_addr, -1))
24063+ continue;
4bf69007
AM
24064 err = inet6_fill_ifmcaddr(skb, ifmca,
24065 NETLINK_CB(cb->skb).portid,
24066 cb->nlh->nlmsg_seq,
c2806d43 24067@@ -4494,6 +4513,8 @@ next:
4bf69007
AM
24068 ifaca = ifaca->aca_next, ip_idx++) {
24069 if (ip_idx < s_ip_idx)
24070 continue;
927ca606
AM
24071+ if (!v6_addr_in_nx_info(nxi, &ifaca->aca_addr, -1))
24072+ continue;
4bf69007
AM
24073 err = inet6_fill_ifacaddr(skb, ifaca,
24074 NETLINK_CB(cb->skb).portid,
24075 cb->nlh->nlmsg_seq,
c2806d43 24076@@ -4522,6 +4543,10 @@ static int inet6_dump_addr(struct sk_buf
4bf69007
AM
24077 struct inet6_dev *idev;
24078 struct hlist_head *head;
b00e13aa 24079
4bf69007
AM
24080+ /* FIXME: maybe disable ipv6 on non v6 guests?
24081+ if (skb->sk && skb->sk->sk_vx_info)
24082+ return skb->len; */
b00e13aa
AM
24083+
24084 s_h = cb->args[0];
24085 s_idx = idx = cb->args[1];
24086 s_ip_idx = ip_idx = cb->args[2];
c2806d43 24087@@ -5030,6 +5055,7 @@ static int inet6_dump_ifinfo(struct sk_b
b00e13aa
AM
24088 struct net_device *dev;
24089 struct inet6_dev *idev;
24090 struct hlist_head *head;
24091+ struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
4bf69007
AM
24092
24093 s_h = cb->args[0];
24094 s_idx = cb->args[1];
c2806d43 24095@@ -5041,6 +5067,8 @@ static int inet6_dump_ifinfo(struct sk_b
b00e13aa 24096 hlist_for_each_entry_rcu(dev, head, index_hlist) {
4bf69007
AM
24097 if (idx < s_idx)
24098 goto cont;
24099+ if (!v6_dev_in_nx_info(dev, nxi))
24100+ goto cont;
24101 idev = __in6_dev_get(dev);
24102 if (!idev)
24103 goto cont;
c2806d43
AM
24104diff -urNp -x '*.orig' linux-4.4/net/ipv6/af_inet6.c linux-4.4/net/ipv6/af_inet6.c
24105--- linux-4.4/net/ipv6/af_inet6.c 2021-02-24 16:56:12.092429299 +0100
24106+++ linux-4.4/net/ipv6/af_inet6.c 2021-02-24 16:56:24.626157354 +0100
4bf69007
AM
24107@@ -43,6 +43,8 @@
24108 #include <linux/netdevice.h>
24109 #include <linux/icmpv6.h>
24110 #include <linux/netfilter_ipv6.h>
24111+#include <linux/vs_inet.h>
24112+#include <linux/vs_inet6.h>
24113
24114 #include <net/ip.h>
24115 #include <net/ipv6.h>
927ca606 24116@@ -158,10 +160,13 @@ lookup_protocol:
4bf69007
AM
24117 }
24118
24119 err = -EPERM;
24120+ if ((protocol == IPPROTO_ICMPV6) &&
24121+ nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
24122+ goto override;
b00e13aa
AM
24123 if (sock->type == SOCK_RAW && !kern &&
24124 !ns_capable(net->user_ns, CAP_NET_RAW))
4bf69007
AM
24125 goto out_rcu_unlock;
24126-
24127+override:
24128 sock->ops = answer->ops;
24129 answer_prot = answer->prot;
bb20add7 24130 answer_flags = answer->flags;
927ca606 24131@@ -259,6 +264,7 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24132 struct inet_sock *inet = inet_sk(sk);
24133 struct ipv6_pinfo *np = inet6_sk(sk);
24134 struct net *net = sock_net(sk);
24135+ struct nx_v6_sock_addr nsa;
24136 __be32 v4addr = 0;
24137 unsigned short snum;
24138 int addr_type = 0;
927ca606 24139@@ -274,6 +280,10 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24140 if (addr->sin6_family != AF_INET6)
24141 return -EAFNOSUPPORT;
24142
24143+ err = v6_map_sock_addr(inet, addr, &nsa);
24144+ if (err)
24145+ return err;
2ba6f0dd 24146+
4bf69007
AM
24147 addr_type = ipv6_addr_type(&addr->sin6_addr);
24148 if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
24149 return -EINVAL;
3cc86a71 24150@@ -326,6 +336,10 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24151 err = -EADDRNOTAVAIL;
24152 goto out;
24153 }
24154+ if (!v4_addr_in_nx_info(sk->sk_nx_info, v4addr, NXA_MASK_BIND)) {
24155+ err = -EADDRNOTAVAIL;
24156+ goto out;
24157+ }
24158 } else {
24159 if (addr_type != IPV6_ADDR_ANY) {
24160 struct net_device *dev = NULL;
3cc86a71 24161@@ -355,6 +369,11 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24162 }
24163 }
24164
24165+ if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
24166+ err = -EADDRNOTAVAIL;
24167+ goto out_unlock;
24168+ }
2ba6f0dd 24169+
4bf69007
AM
24170 /* ipv4 addr of the socket is invalid. Only the
24171 * unspecified and mapped address have a v4 equivalent.
24172 */
3cc86a71 24173@@ -372,6 +391,9 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24174 }
24175 }
24176
24177+ /* what's that for? */
24178+ v6_set_sock_addr(inet, &nsa);
2ba6f0dd 24179+
4bf69007
AM
24180 inet->inet_rcv_saddr = v4addr;
24181 inet->inet_saddr = v4addr;
24182
3cc86a71 24183@@ -476,9 +498,11 @@ int inet6_getname(struct socket *sock, s
4bf69007
AM
24184 return -ENOTCONN;
24185 sin->sin6_port = inet->inet_dport;
c2e5f7c8 24186 sin->sin6_addr = sk->sk_v6_daddr;
4bf69007
AM
24187+ /* FIXME: remap lback? */
24188 if (np->sndflow)
24189 sin->sin6_flowinfo = np->flow_label;
24190 } else {
24191+ /* FIXME: remap lback? */
c2e5f7c8 24192 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
4bf69007
AM
24193 sin->sin6_addr = np->saddr;
24194 else
c2806d43
AM
24195diff -urNp -x '*.orig' linux-4.4/net/ipv6/datagram.c linux-4.4/net/ipv6/datagram.c
24196--- linux-4.4/net/ipv6/datagram.c 2021-02-24 16:56:12.092429299 +0100
24197+++ linux-4.4/net/ipv6/datagram.c 2021-02-24 16:56:24.629490793 +0100
24198@@ -737,7 +737,7 @@ int ip6_datagram_send_ctl(struct net *ne
4bf69007
AM
24199
24200 rcu_read_lock();
24201 if (fl6->flowi6_oif) {
24202- dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
24203+ dev = dev_get_by_index_real_rcu(net, fl6->flowi6_oif);
24204 if (!dev) {
24205 rcu_read_unlock();
24206 return -ENODEV;
c2806d43
AM
24207diff -urNp -x '*.orig' linux-4.4/net/ipv6/fib6_rules.c linux-4.4/net/ipv6/fib6_rules.c
24208--- linux-4.4/net/ipv6/fib6_rules.c 2021-02-24 16:56:12.092429299 +0100
24209+++ linux-4.4/net/ipv6/fib6_rules.c 2021-02-24 16:56:24.629490793 +0100
c2e5f7c8 24210@@ -97,7 +97,7 @@ static int fib6_rule_action(struct fib_r
4bf69007
AM
24211 ip6_dst_idev(&rt->dst)->dev,
24212 &flp6->daddr,
24213 rt6_flags2srcprefs(flags),
24214- &saddr))
24215+ &saddr, NULL))
24216 goto again;
24217 if (!ipv6_prefix_equal(&saddr, &r->src.addr,
24218 r->src.plen))
c2806d43
AM
24219diff -urNp -x '*.orig' linux-4.4/net/ipv6/inet6_hashtables.c linux-4.4/net/ipv6/inet6_hashtables.c
24220--- linux-4.4/net/ipv6/inet6_hashtables.c 2016-01-11 00:01:32.000000000 +0100
24221+++ linux-4.4/net/ipv6/inet6_hashtables.c 2021-02-24 16:56:24.629490793 +0100
4bf69007
AM
24222@@ -16,6 +16,7 @@
24223
24224 #include <linux/module.h>
24225 #include <linux/random.h>
24226+#include <linux/vs_inet6.h>
24227
24228 #include <net/inet_connection_sock.h>
24229 #include <net/inet_hashtables.h>
927ca606 24230@@ -66,7 +67,6 @@ struct sock *__inet6_lookup_established(
4bf69007
AM
24231 unsigned int slot = hash & hashinfo->ehash_mask;
24232 struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
24233
24234-
24235 rcu_read_lock();
24236 begin:
24237 sk_nulls_for_each_rcu(sk, node, &head->chain) {
927ca606 24238@@ -108,6 +108,9 @@ static inline int compute_score(struct s
c2e5f7c8 24239 if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
4bf69007
AM
24240 return -1;
24241 score++;
24242+ } else {
24243+ if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
24244+ return -1;
24245 }
24246 if (sk->sk_bound_dev_if) {
24247 if (sk->sk_bound_dev_if != dif)
c2806d43
AM
24248diff -urNp -x '*.orig' linux-4.4/net/ipv6/ip6_fib.c linux-4.4/net/ipv6/ip6_fib.c
24249--- linux-4.4/net/ipv6/ip6_fib.c 2021-02-24 16:56:12.095762737 +0100
24250+++ linux-4.4/net/ipv6/ip6_fib.c 2021-02-24 16:56:24.629490793 +0100
48cb6a3c 24251@@ -1969,6 +1969,7 @@ static int ipv6_route_seq_show(struct se
c2e5f7c8
JR
24252 struct rt6_info *rt = v;
24253 struct ipv6_route_iter *iter = seq->private;
24254
24255+ /* FIXME: check for network context? */
24256 seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
24257
24258 #ifdef CONFIG_IPV6_SUBTREES
c2806d43
AM
24259diff -urNp -x '*.orig' linux-4.4/net/ipv6/ip6_output.c linux-4.4/net/ipv6/ip6_output.c
24260--- linux-4.4/net/ipv6/ip6_output.c 2021-02-24 16:56:12.549110347 +0100
24261+++ linux-4.4/net/ipv6/ip6_output.c 2021-02-24 16:56:24.629490793 +0100
24262@@ -949,7 +949,8 @@ static int ip6_dst_lookup_tail(struct ne
927ca606 24263 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
4bf69007
AM
24264 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
24265 sk ? inet6_sk(sk)->srcprefs : 0,
24266- &fl6->saddr);
24267+ &fl6->saddr,
24268+ sk ? sk->sk_nx_info : NULL);
24269 if (err)
24270 goto out_err_release;
927ca606 24271
c2806d43
AM
24272diff -urNp -x '*.orig' linux-4.4/net/ipv6/ndisc.c linux-4.4/net/ipv6/ndisc.c
24273--- linux-4.4/net/ipv6/ndisc.c 2021-02-24 16:56:12.099096175 +0100
24274+++ linux-4.4/net/ipv6/ndisc.c 2021-02-24 16:56:24.629490793 +0100
927ca606 24275@@ -501,7 +501,7 @@ void ndisc_send_na(struct net_device *de
4bf69007
AM
24276 } else {
24277 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
24278 inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
24279- &tmpaddr))
24280+ &tmpaddr, NULL))
24281 return;
24282 src_addr = &tmpaddr;
24283 }
c2806d43
AM
24284diff -urNp -x '*.orig' linux-4.4/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c linux-4.4/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
24285--- linux-4.4/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c 2016-01-11 00:01:32.000000000 +0100
24286+++ linux-4.4/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c 2021-02-24 16:56:24.629490793 +0100
bb20add7 24287@@ -35,7 +35,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *s
4bf69007
AM
24288 ctinfo == IP_CT_RELATED_REPLY));
24289
927ca606 24290 if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
4bf69007
AM
24291- &ipv6_hdr(skb)->daddr, 0, &src) < 0)
24292+ &ipv6_hdr(skb)->daddr, 0, &src, NULL) < 0)
24293 return NF_DROP;
24294
bb20add7 24295 nfct_nat(ct)->masq_index = out->ifindex;
c2806d43
AM
24296diff -urNp -x '*.orig' linux-4.4/net/ipv6/raw.c linux-4.4/net/ipv6/raw.c
24297--- linux-4.4/net/ipv6/raw.c 2021-02-24 16:56:12.102429614 +0100
24298+++ linux-4.4/net/ipv6/raw.c 2021-02-24 16:56:24.629490793 +0100
4bf69007
AM
24299@@ -30,6 +30,7 @@
24300 #include <linux/icmpv6.h>
24301 #include <linux/netfilter.h>
24302 #include <linux/netfilter_ipv6.h>
24303+#include <linux/vs_inet6.h>
24304 #include <linux/skbuff.h>
24305 #include <linux/compat.h>
927ca606 24306 #include <linux/uaccess.h>
3cc86a71 24307@@ -293,6 +294,13 @@ static int rawv6_bind(struct sock *sk, s
4bf69007
AM
24308 goto out_unlock;
24309 }
24310
24311+ if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
24312+ err = -EADDRNOTAVAIL;
24313+ if (dev)
24314+ dev_put(dev);
24315+ goto out;
24316+ }
2ba6f0dd 24317+
4bf69007
AM
24318 /* ipv4 addr of the socket is invalid. Only the
24319 * unspecified and mapped address have a v4 equivalent.
24320 */
c2806d43
AM
24321diff -urNp -x '*.orig' linux-4.4/net/ipv6/route.c linux-4.4/net/ipv6/route.c
24322--- linux-4.4/net/ipv6/route.c 2021-02-24 16:56:12.102429614 +0100
24323+++ linux-4.4/net/ipv6/route.c 2021-02-24 16:56:24.629490793 +0100
927ca606
AM
24324@@ -62,6 +62,7 @@
24325 #include <net/lwtunnel.h>
24326 #include <net/ip_tunnels.h>
24327 #include <net/l3mdev.h>
4bf69007
AM
24328+#include <linux/vs_inet6.h>
24329
24330 #include <asm/uaccess.h>
24331
c2806d43 24332@@ -2582,16 +2583,18 @@ int ip6_route_get_saddr(struct net *net,
4bf69007
AM
24333 struct rt6_info *rt,
24334 const struct in6_addr *daddr,
24335 unsigned int prefs,
24336- struct in6_addr *saddr)
24337+ struct in6_addr *saddr,
24338+ struct nx_info *nxi)
24339 {
927ca606
AM
24340 struct inet6_dev *idev =
24341 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
4bf69007 24342 int err = 0;
927ca606
AM
24343- if (rt && rt->rt6i_prefsrc.plen)
24344+ if (rt && rt->rt6i_prefsrc.plen && (!nxi ||
4bf69007
AM
24345+ v6_addr_in_nx_info(nxi, &rt->rt6i_prefsrc.addr, NXA_TYPE_ADDR)))
24346 *saddr = rt->rt6i_prefsrc.addr;
24347 else
24348 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
24349- daddr, prefs, saddr);
24350+ daddr, prefs, saddr, nxi);
24351 return err;
24352 }
24353
c2806d43 24354@@ -3208,7 +3211,8 @@ static int rt6_fill_node(struct net *net
4bf69007
AM
24355 goto nla_put_failure;
24356 } else if (dst) {
24357 struct in6_addr saddr_buf;
24358- if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
24359+ if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf,
24360+ (skb->sk ? skb->sk->sk_nx_info : NULL)) == 0 &&
927ca606 24361 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4bf69007
AM
24362 goto nla_put_failure;
24363 }
c2806d43
AM
24364diff -urNp -x '*.orig' linux-4.4/net/ipv6/tcp_ipv6.c linux-4.4/net/ipv6/tcp_ipv6.c
24365--- linux-4.4/net/ipv6/tcp_ipv6.c 2021-02-24 16:56:12.105763052 +0100
24366+++ linux-4.4/net/ipv6/tcp_ipv6.c 2021-02-24 16:56:24.629490793 +0100
bb20add7 24367@@ -69,6 +69,7 @@
4bf69007
AM
24368
24369 #include <linux/crypto.h>
24370 #include <linux/scatterlist.h>
24371+#include <linux/vs_inet6.h>
24372
927ca606
AM
24373 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
24374 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
24375@@ -150,11 +151,18 @@ static int tcp_v6_connect(struct sock *s
4bf69007
AM
24376 */
24377
927ca606
AM
24378 if (ipv6_addr_any(&usin->sin6_addr)) {
24379- if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
24380- ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
24381- &usin->sin6_addr);
24382- else
24383- usin->sin6_addr = in6addr_loopback;
4bf69007 24384+ struct nx_info *nxi = sk->sk_nx_info;
2ba6f0dd 24385+
4bf69007
AM
24386+ if (nxi && nx_info_has_v6(nxi))
24387+ /* FIXME: remap lback? */
24388+ usin->sin6_addr = nxi->v6.ip;
927ca606
AM
24389+ else {
24390+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
24391+ ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
24392+ &usin->sin6_addr);
24393+ else
24394+ usin->sin6_addr = in6addr_loopback;
24395+ }
24396 }
4bf69007
AM
24397
24398 addr_type = ipv6_addr_type(&usin->sin6_addr);
c2806d43
AM
24399diff -urNp -x '*.orig' linux-4.4/net/ipv6/udp.c linux-4.4/net/ipv6/udp.c
24400--- linux-4.4/net/ipv6/udp.c 2021-02-24 16:56:12.105763052 +0100
24401+++ linux-4.4/net/ipv6/udp.c 2021-02-24 16:56:24.629490793 +0100
c2e5f7c8 24402@@ -47,6 +47,7 @@
4bf69007 24403 #include <net/xfrm.h>
b00e13aa 24404 #include <net/inet6_hashtables.h>
c2e5f7c8 24405 #include <net/busy_poll.h>
4bf69007
AM
24406+#include <linux/vs_inet6.h>
24407
24408 #include <linux/proc_fs.h>
24409 #include <linux/seq_file.h>
927ca606
AM
24410@@ -76,32 +77,60 @@ static u32 udp6_ehashfn(const struct net
24411 udp_ipv6_hash_secret + net_hash_mix(net));
24412 }
24413
24414-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
24415+int ipv6_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
24416 {
24417+ const struct in6_addr *sk1_rcv_saddr6 = inet6_rcv_saddr(sk1);
24418 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
24419+ __be32 sk1_rcv_saddr = sk1->sk_rcv_saddr;
24420+ __be32 sk2_rcv_saddr = sk2->sk_rcv_saddr;
24421 int sk2_ipv6only = inet_v6_ipv6only(sk2);
24422- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
24423+ int addr_type1 = ipv6_addr_type(sk1_rcv_saddr6);
24424 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
24425
24426 /* if both are mapped, treat as IPv4 */
24427- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
24428- return (!sk2_ipv6only &&
24429- (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
24430- sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
24431+ if (addr_type1 == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
24432+ if (!sk2_ipv6only &&
24433+ (!sk1->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
24434+ sk1->sk_rcv_saddr == sk2->sk_rcv_saddr))
24435+ goto vs_v4;
24436+ else
24437+ return 0;
24438+ }
24439
24440 if (addr_type2 == IPV6_ADDR_ANY &&
24441- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
24442- return 1;
24443+ !(sk2_ipv6only && addr_type1 == IPV6_ADDR_MAPPED))
24444+ goto vs;
24445
24446- if (addr_type == IPV6_ADDR_ANY &&
24447- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
24448- return 1;
24449+ if (addr_type1 == IPV6_ADDR_ANY &&
24450+ !(ipv6_only_sock(sk1) && addr_type2 == IPV6_ADDR_MAPPED))
24451+ goto vs;
24452
24453 if (sk2_rcv_saddr6 &&
24454- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
24455- return 1;
24456+ ipv6_addr_equal(&sk1->sk_v6_rcv_saddr, sk2_rcv_saddr6))
24457+ goto vs;
24458
24459 return 0;
24460+
24461+vs_v4:
24462+ if (!sk1_rcv_saddr && !sk2_rcv_saddr)
24463+ return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24464+ if (!sk2_rcv_saddr)
24465+ return v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, -1);
24466+ if (!sk1_rcv_saddr)
24467+ return v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, -1);
24468+ return 1;
24469+vs:
24470+ if (addr_type2 == IPV6_ADDR_ANY && addr_type1 == IPV6_ADDR_ANY)
24471+ return nx_v6_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24472+ else if (addr_type2 == IPV6_ADDR_ANY)
24473+ return v6_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr6, -1);
24474+ else if (addr_type1 == IPV6_ADDR_ANY) {
24475+ if (addr_type2 == IPV6_ADDR_MAPPED)
24476+ return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24477+ else
24478+ return v6_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr6, -1);
24479+ }
24480+ return 1;
24481 }
24482
24483 static u32 udp6_portaddr_hash(const struct net *net,
24484@@ -162,6 +191,10 @@ static inline int compute_score(struct s
24485 if (inet->inet_dport != sport)
24486 return -1;
24487 score++;
4bf69007
AM
24488+ } else {
24489+ /* block non nx_info ips */
24490+ if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
24491+ return -1;
927ca606
AM
24492 }
24493
24494 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
c2806d43
AM
24495diff -urNp -x '*.orig' linux-4.4/net/ipv6/xfrm6_policy.c linux-4.4/net/ipv6/xfrm6_policy.c
24496--- linux-4.4/net/ipv6/xfrm6_policy.c 2021-02-24 16:56:12.105763052 +0100
24497+++ linux-4.4/net/ipv6/xfrm6_policy.c 2021-02-24 16:56:24.632824231 +0100
927ca606
AM
24498@@ -64,7 +64,8 @@ static int xfrm6_get_saddr(struct net *n
24499 return -EHOSTUNREACH;
24500
4bf69007 24501 dev = ip6_dst_idev(dst)->dev;
927ca606
AM
24502- ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
24503+ ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6,
24504+ 0, &saddr->in6, NULL);
4bf69007
AM
24505 dst_release(dst);
24506 return 0;
24507 }
c2806d43
AM
24508diff -urNp -x '*.orig' linux-4.4/net/netfilter/ipvs/ip_vs_xmit.c linux-4.4/net/netfilter/ipvs/ip_vs_xmit.c
24509--- linux-4.4/net/netfilter/ipvs/ip_vs_xmit.c 2016-01-11 00:01:32.000000000 +0100
24510+++ linux-4.4/net/netfilter/ipvs/ip_vs_xmit.c 2021-02-24 16:56:24.632824231 +0100
927ca606 24511@@ -381,7 +381,7 @@ __ip_vs_route_output_v6(struct net *net,
4bf69007
AM
24512 return dst;
24513 if (ipv6_addr_any(&fl6.saddr) &&
24514 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
24515- &fl6.daddr, 0, &fl6.saddr) < 0)
24516+ &fl6.daddr, 0, &fl6.saddr, NULL) < 0)
24517 goto out_err;
24518 if (do_xfrm) {
24519 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
c2806d43
AM
24520diff -urNp -x '*.orig' linux-4.4/net/netlink/af_netlink.c linux-4.4/net/netlink/af_netlink.c
24521--- linux-4.4/net/netlink/af_netlink.c 2021-02-24 16:56:12.139097435 +0100
24522+++ linux-4.4/net/netlink/af_netlink.c 2021-02-24 16:56:24.632824231 +0100
8931d859 24523@@ -63,6 +63,8 @@
bb20add7 24524 #include <linux/hash.h>
927ca606 24525 #include <linux/genetlink.h>
7b1099ae 24526 #include <linux/nospec.h>
4bf69007
AM
24527+#include <linux/vs_context.h>
24528+#include <linux/vs_network.h>
4bf69007
AM
24529
24530 #include <net/net_namespace.h>
bb20add7 24531 #include <net/sock.h>
48cb6a3c 24532@@ -2473,7 +2475,8 @@ static void *__netlink_seq_next(struct s
927ca606
AM
24533 if (err)
24534 return ERR_PTR(err);
24535 }
24536- } while (sock_net(&nlk->sk) != seq_file_net(seq));
24537+ } while ((sock_net(&nlk->sk) != seq_file_net(seq)) ||
24538+ !nx_check(nlk->sk.sk_nid, VS_WATCH_P | VS_IDENT));
bb20add7 24539
927ca606
AM
24540 return nlk;
24541 }
c2806d43
AM
24542diff -urNp -x '*.orig' linux-4.4/net/packet/diag.c linux-4.4/net/packet/diag.c
24543--- linux-4.4/net/packet/diag.c 2016-01-11 00:01:32.000000000 +0100
24544+++ linux-4.4/net/packet/diag.c 2021-02-24 16:56:24.632824231 +0100
8931d859
AM
24545@@ -4,6 +4,7 @@
24546 #include <linux/netdevice.h>
24547 #include <linux/packet_diag.h>
24548 #include <linux/percpu.h>
24549+#include <linux/vs_network.h>
24550 #include <net/net_namespace.h>
24551 #include <net/sock.h>
24552
24553@@ -201,6 +202,8 @@ static int packet_diag_dump(struct sk_bu
24554 sk_for_each(sk, &net->packet.sklist) {
24555 if (!net_eq(sock_net(sk), net))
24556 continue;
24557+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24558+ continue;
24559 if (num < s_num)
24560 goto next;
24561
c2806d43
AM
24562diff -urNp -x '*.orig' linux-4.4/net/socket.c linux-4.4/net/socket.c
24563--- linux-4.4/net/socket.c 2021-02-24 16:56:12.169098380 +0100
24564+++ linux-4.4/net/socket.c 2021-02-24 16:56:24.632824231 +0100
8931d859 24565@@ -100,10 +100,12 @@
4bf69007
AM
24566
24567 #include <net/sock.h>
24568 #include <linux/netfilter.h>
4bf69007
AM
24569+#include <linux/vs_socket.h>
24570+#include <linux/vs_inet.h>
24571+#include <linux/vs_inet6.h>
24572
24573 #include <linux/if_tun.h>
24574 #include <linux/ipv6_route.h>
927ca606
AM
24575-#include <linux/route.h>
24576 #include <linux/sockios.h>
24577 #include <linux/atalk.h>
24578 #include <net/busy_poll.h>
3cc86a71 24579@@ -597,8 +599,24 @@ EXPORT_SYMBOL(__sock_tx_timestamp);
4bf69007 24580
927ca606
AM
24581 static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
24582 {
24583- int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
24584- BUG_ON(ret == -EIOCBQUEUED);
24585+ size_t size = msg_data_left(msg);
24586+ int ret = sock->ops->sendmsg(sock, msg, size);
24587+#if 0
4bf69007 24588+ if (sock->sk) {
927ca606 24589+ if (!ret)
4bf69007 24590+ vx_sock_fail(sock->sk, size);
927ca606
AM
24591+ else
24592+ vx_sock_send(sock->sk, size);
4bf69007 24593+ }
927ca606 24594+#endif
4bf69007 24595+ vxdprintk(VXD_CBIT(net, 7),
927ca606 24596+ "sock_sendmsg_nosec: %p[%p,%p,%p;%d/%d]:%zu/%zu",
4bf69007
AM
24597+ sock, sock->sk,
24598+ (sock->sk)?sock->sk->sk_nx_info:0,
24599+ (sock->sk)?sock->sk->sk_vx_info:0,
24600+ (sock->sk)?sock->sk->sk_xid:0,
24601+ (sock->sk)?sock->sk->sk_nid:0,
927ca606
AM
24602+ size, msg_data_left(msg));
24603 return ret;
4bf69007
AM
24604 }
24605
3cc86a71 24606@@ -1089,6 +1107,13 @@ int __sock_create(struct net *net, int f
4bf69007
AM
24607 if (type < 0 || type >= SOCK_MAX)
24608 return -EINVAL;
24609
24610+ if (!nx_check(0, VS_ADMIN)) {
24611+ if (family == PF_INET && !current_nx_info_has_v4())
24612+ return -EAFNOSUPPORT;
24613+ if (family == PF_INET6 && !current_nx_info_has_v6())
24614+ return -EAFNOSUPPORT;
24615+ }
2ba6f0dd 24616+
4bf69007
AM
24617 /* Compatibility.
24618
24619 This uglymoron is moved from INET layer to here to avoid
3cc86a71 24620@@ -1223,6 +1248,7 @@ SYSCALL_DEFINE3(socket, int, family, int
4bf69007
AM
24621 if (retval < 0)
24622 goto out;
24623
24624+ set_bit(SOCK_USER_SOCKET, &sock->flags);
24625 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
24626 if (retval < 0)
24627 goto out_release;
3cc86a71 24628@@ -1264,10 +1290,12 @@ SYSCALL_DEFINE4(socketpair, int, family,
4bf69007
AM
24629 err = sock_create(family, type, protocol, &sock1);
24630 if (err < 0)
24631 goto out;
24632+ set_bit(SOCK_USER_SOCKET, &sock1->flags);
24633
24634 err = sock_create(family, type, protocol, &sock2);
24635 if (err < 0)
24636 goto out_release_1;
24637+ set_bit(SOCK_USER_SOCKET, &sock2->flags);
24638
24639 err = sock1->ops->socketpair(sock1, sock2);
24640 if (err < 0)
c2806d43
AM
24641diff -urNp -x '*.orig' linux-4.4/net/sunrpc/auth.c linux-4.4/net/sunrpc/auth.c
24642--- linux-4.4/net/sunrpc/auth.c 2016-01-11 00:01:32.000000000 +0100
24643+++ linux-4.4/net/sunrpc/auth.c 2021-02-24 16:56:24.632824231 +0100
4bf69007
AM
24644@@ -15,6 +15,7 @@
24645 #include <linux/sunrpc/clnt.h>
24646 #include <linux/sunrpc/gss_api.h>
24647 #include <linux/spinlock.h>
24648+#include <linux/vs_tag.h>
24649
927ca606 24650 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
4bf69007 24651 # define RPCDBG_FACILITY RPCDBG_AUTH
bb20add7 24652@@ -630,6 +631,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
4bf69007
AM
24653 memset(&acred, 0, sizeof(acred));
24654 acred.uid = cred->fsuid;
24655 acred.gid = cred->fsgid;
a4a22af8 24656+ acred.tag = make_ktag(&init_user_ns, dx_current_tag());
bb20add7 24657 acred.group_info = cred->group_info;
4bf69007 24658 ret = auth->au_ops->lookup_cred(auth, &acred, flags);
bb20add7
AM
24659 return ret;
24660@@ -669,6 +671,7 @@ rpcauth_bind_root_cred(struct rpc_task *
4bf69007 24661 struct auth_cred acred = {
b00e13aa
AM
24662 .uid = GLOBAL_ROOT_UID,
24663 .gid = GLOBAL_ROOT_GID,
a4a22af8 24664+ .tag = KTAGT_INIT(dx_current_tag()),
4bf69007
AM
24665 };
24666
24667 dprintk("RPC: %5u looking up %s cred\n",
c2806d43
AM
24668diff -urNp -x '*.orig' linux-4.4/net/sunrpc/auth_unix.c linux-4.4/net/sunrpc/auth_unix.c
24669--- linux-4.4/net/sunrpc/auth_unix.c 2016-01-11 00:01:32.000000000 +0100
24670+++ linux-4.4/net/sunrpc/auth_unix.c 2021-02-24 16:56:24.632824231 +0100
4bf69007
AM
24671@@ -13,11 +13,13 @@
24672 #include <linux/sunrpc/clnt.h>
24673 #include <linux/sunrpc/auth.h>
24674 #include <linux/user_namespace.h>
24675+#include <linux/vs_tag.h>
24676
24677 #define NFS_NGROUPS 16
24678
24679 struct unx_cred {
24680 struct rpc_cred uc_base;
b00e13aa
AM
24681+ ktag_t uc_tag;
24682 kgid_t uc_gid;
24683 kgid_t uc_gids[NFS_NGROUPS];
4bf69007 24684 };
b00e13aa 24685@@ -80,6 +82,7 @@ unx_create_cred(struct rpc_auth *auth, s
4bf69007
AM
24686 groups = NFS_NGROUPS;
24687
24688 cred->uc_gid = acred->gid;
24689+ cred->uc_tag = acred->tag;
b00e13aa
AM
24690 for (i = 0; i < groups; i++)
24691 cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
24692 if (i < NFS_NGROUPS)
24693@@ -121,7 +124,9 @@ unx_match(struct auth_cred *acred, struc
4bf69007
AM
24694 unsigned int i;
24695
24696
b00e13aa
AM
24697- if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid))
24698+ if (!uid_eq(cred->uc_uid, acred->uid) ||
24699+ !gid_eq(cred->uc_gid, acred->gid) ||
24700+ !tag_eq(cred->uc_tag, acred->tag))
4bf69007
AM
24701 return 0;
24702
24703 if (acred->group_info != NULL)
b00e13aa 24704@@ -146,7 +151,7 @@ unx_marshal(struct rpc_task *task, __be3
4bf69007
AM
24705 struct rpc_clnt *clnt = task->tk_client;
24706 struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
24707 __be32 *base, *hold;
24708- int i;
24709+ int i, tag;
24710
24711 *p++ = htonl(RPC_AUTH_UNIX);
24712 base = p++;
a4a22af8 24713@@ -157,8 +162,11 @@ unx_marshal(struct rpc_task *task, __be3
4bf69007
AM
24714 */
24715 p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
4bf69007 24716
b00e13aa
AM
24717- *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
24718- *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
24719+ tag = task->tk_client->cl_tag;
a4a22af8
AM
24720+ *p++ = htonl((u32) from_kuid(&init_user_ns,
24721+ TAGINO_KUID(tag, cred->uc_uid, cred->uc_tag)));
24722+ *p++ = htonl((u32) from_kgid(&init_user_ns,
24723+ TAGINO_KGID(tag, cred->uc_gid, cred->uc_tag)));
4bf69007 24724 hold = p++;
b00e13aa
AM
24725 for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++)
24726 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
c2806d43
AM
24727diff -urNp -x '*.orig' linux-4.4/net/sunrpc/clnt.c linux-4.4/net/sunrpc/clnt.c
24728--- linux-4.4/net/sunrpc/clnt.c 2021-02-24 16:56:12.172431818 +0100
24729+++ linux-4.4/net/sunrpc/clnt.c 2021-02-24 16:56:24.632824231 +0100
4bf69007 24730@@ -31,6 +31,7 @@
c2e5f7c8 24731 #include <linux/in.h>
4bf69007
AM
24732 #include <linux/in6.h>
24733 #include <linux/un.h>
4bf69007
AM
24734+#include <linux/vs_cvirt.h>
24735
24736 #include <linux/sunrpc/clnt.h>
b00e13aa 24737 #include <linux/sunrpc/addr.h>
927ca606 24738@@ -477,6 +478,9 @@ static struct rpc_clnt *rpc_create_xprt(
4bf69007
AM
24739 if (!(args->flags & RPC_CLNT_CREATE_QUIET))
24740 clnt->cl_chatty = 1;
24741
24742+ /* TODO: handle RPC_CLNT_CREATE_TAGGED
24743+ if (args->flags & RPC_CLNT_CREATE_TAGGED)
24744+ clnt->cl_tag = 1; */
24745 return clnt;
24746 }
927ca606 24747
c2806d43
AM
24748diff -urNp -x '*.orig' linux-4.4/net/unix/af_unix.c linux-4.4/net/unix/af_unix.c
24749--- linux-4.4/net/unix/af_unix.c 2021-02-24 16:56:12.179098695 +0100
24750+++ linux-4.4/net/unix/af_unix.c 2021-02-24 16:56:24.632824231 +0100
bb20add7 24751@@ -117,6 +117,8 @@
4bf69007
AM
24752 #include <net/checksum.h>
24753 #include <linux/security.h>
c2e5f7c8 24754 #include <linux/freezer.h>
4bf69007
AM
24755+#include <linux/vs_context.h>
24756+#include <linux/vs_limit.h>
24757
868ca535
JR
24758 #include "scm.h"
24759
c2806d43 24760@@ -290,6 +292,8 @@ static struct sock *__unix_find_socket_b
4bf69007
AM
24761 if (!net_eq(sock_net(s), net))
24762 continue;
24763
24764+ if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
24765+ continue;
24766 if (u->addr->len == len &&
24767 !memcmp(u->addr->name, sunname, len))
24768 goto found;
c2806d43 24769@@ -2757,6 +2761,8 @@ static struct sock *unix_from_bucket(str
4bf69007
AM
24770 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
24771 if (sock_net(sk) != seq_file_net(seq))
24772 continue;
24773+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24774+ continue;
24775 if (++count == offset)
24776 break;
24777 }
c2806d43 24778@@ -2774,6 +2780,8 @@ static struct sock *unix_next_socket(str
4bf69007
AM
24779 sk = sk_next(sk);
24780 if (!sk)
24781 goto next_bucket;
24782+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24783+ continue;
24784 if (sock_net(sk) == seq_file_net(seq))
24785 return sk;
24786 }
c2806d43
AM
24787diff -urNp -x '*.orig' linux-4.4/net/unix/diag.c linux-4.4/net/unix/diag.c
24788--- linux-4.4/net/unix/diag.c 2021-02-24 16:56:12.179098695 +0100
24789+++ linux-4.4/net/unix/diag.c 2021-02-24 16:56:24.632824231 +0100
8931d859
AM
24790@@ -4,6 +4,7 @@
24791 #include <linux/unix_diag.h>
24792 #include <linux/skbuff.h>
24793 #include <linux/module.h>
24794+#include <linux/vs_network.h>
24795 #include <net/netlink.h>
24796 #include <net/af_unix.h>
24797 #include <net/tcp_states.h>
3cc86a71 24798@@ -200,6 +201,8 @@ static int unix_diag_dump(struct sk_buff
8931d859
AM
24799 sk_for_each(sk, &unix_socket_table[slot]) {
24800 if (!net_eq(sock_net(sk), net))
24801 continue;
24802+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24803+ continue;
24804 if (num < s_num)
24805 goto next;
24806 if (!(req->udiag_states & (1 << sk->sk_state)))
c2806d43
AM
24807diff -urNp -x '*.orig' linux-4.4/scripts/checksyscalls.sh linux-4.4/scripts/checksyscalls.sh
24808--- linux-4.4/scripts/checksyscalls.sh 2016-01-11 00:01:32.000000000 +0100
24809+++ linux-4.4/scripts/checksyscalls.sh 2021-02-24 16:56:24.636157669 +0100
bb20add7 24810@@ -196,7 +196,6 @@ cat << EOF
4bf69007
AM
24811 #define __IGNORE_afs_syscall
24812 #define __IGNORE_getpmsg
24813 #define __IGNORE_putpmsg
24814-#define __IGNORE_vserver
24815 EOF
24816 }
24817
c2806d43
AM
24818diff -urNp -x '*.orig' linux-4.4/security/commoncap.c linux-4.4/security/commoncap.c
24819--- linux-4.4/security/commoncap.c 2021-02-24 16:56:12.205766201 +0100
24820+++ linux-4.4/security/commoncap.c 2021-02-24 16:56:24.636157669 +0100
927ca606 24821@@ -71,6 +71,7 @@ static void warn_setuid_and_fcaps_mixed(
4bf69007
AM
24822 int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
24823 int cap, int audit)
24824 {
24825+ struct vx_info *vxi = current_vx_info(); /* FIXME: get vxi from cred? */
b00e13aa 24826 struct user_namespace *ns = targ_ns;
4bf69007 24827
b00e13aa 24828 /* See if cred has the capability in the target user namespace
927ca606 24829@@ -79,8 +80,12 @@ int cap_capable(const struct cred *cred,
b00e13aa
AM
24830 */
24831 for (;;) {
4bf69007 24832 /* Do we have the necessary capabilities? */
b00e13aa 24833- if (ns == cred->user_ns)
4bf69007 24834- return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
b00e13aa 24835+ if (ns == cred->user_ns) {
4bf69007
AM
24836+ if (vx_info_flags(vxi, VXF_STATE_SETUP, 0) &&
24837+ cap_raised(cred->cap_effective, cap))
24838+ return 0;
24839+ return vx_cap_raised(vxi, cred->cap_effective, cap) ? 0 : -EPERM;
24840+ }
24841
24842 /* Have we tried all of the parent namespaces? */
b00e13aa 24843 if (ns == &init_user_ns)
c2806d43 24844@@ -665,7 +670,7 @@ int cap_inode_setxattr(struct dentry *de
4bf69007
AM
24845
24846 if (!strncmp(name, XATTR_SECURITY_PREFIX,
24847 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
24848- !capable(CAP_SYS_ADMIN))
24849+ !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
24850 return -EPERM;
24851 return 0;
24852 }
c2806d43 24853@@ -691,7 +696,7 @@ int cap_inode_removexattr(struct dentry
4bf69007
AM
24854
24855 if (!strncmp(name, XATTR_SECURITY_PREFIX,
24856 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
24857- !capable(CAP_SYS_ADMIN))
24858+ !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
24859 return -EPERM;
24860 return 0;
24861 }
c2806d43
AM
24862diff -urNp -x '*.orig' linux-4.4/security/selinux/hooks.c linux-4.4/security/selinux/hooks.c
24863--- linux-4.4/security/selinux/hooks.c 2021-02-24 16:56:12.212433078 +0100
24864+++ linux-4.4/security/selinux/hooks.c 2021-02-24 16:56:24.636157669 +0100
927ca606 24865@@ -67,7 +67,6 @@
4bf69007
AM
24866 #include <linux/dccp.h>
24867 #include <linux/quota.h>
24868 #include <linux/un.h> /* for Unix socket types */
24869-#include <net/af_unix.h> /* for Unix socket types */
24870 #include <linux/parser.h>
24871 #include <linux/nfs_mount.h>
24872 #include <net/ipv6.h>
This page took 6.664423 seconds and 4 git commands to generate.