]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-vserver-2.3.patch
- up to 4.4.192
[packages/kernel.git] / kernel-vserver-2.3.patch
CommitLineData
8931d859
AM
1diff -NurpP --minimal linux-4.4.161/Documentation/vserver/debug.txt linux-4.4.161-vs2.3.9.8/Documentation/vserver/debug.txt
2--- linux-4.4.161/Documentation/vserver/debug.txt 1970-01-01 00:00:00.000000000 +0000
3+++ linux-4.4.161-vs2.3.9.8/Documentation/vserver/debug.txt 2018-10-20 04:57:21.000000000 +0000
d33d7b00
AM
4@@ -0,0 +1,154 @@
5+
6+debug_cvirt:
7+
8+ 2 4 "vx_map_tgid: %p/%llx: %d -> %d"
9+ "vx_rmap_tgid: %p/%llx: %d -> %d"
10+
11+debug_dlim:
12+
13+ 0 1 "ALLOC (%p,#%d)%c inode (%d)"
14+ "FREE (%p,#%d)%c inode"
15+ 1 2 "ALLOC (%p,#%d)%c %lld bytes (%d)"
16+ "FREE (%p,#%d)%c %lld bytes"
17+ 2 4 "ADJUST: %lld,%lld on %ld,%ld [mult=%d]"
18+ 3 8 "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d"
19+ "ext3_has_free_blocks(%p): free=%lu, root=%lu"
20+ "rcu_free_dl_info(%p)"
21+ 4 10 "alloc_dl_info(%p,%d) = %p"
22+ "dealloc_dl_info(%p)"
23+ "get_dl_info(%p[#%d.%d])"
24+ "put_dl_info(%p[#%d.%d])"
25+ 5 20 "alloc_dl_info(%p,%d)*"
26+ 6 40 "__hash_dl_info: %p[#%d]"
27+ "__unhash_dl_info: %p[#%d]"
28+ 7 80 "locate_dl_info(%p,#%d) = %p"
29+
30+debug_misc:
31+
32+ 0 1 "destroy_dqhash: %p [#0x%08x] c=%d"
33+ "new_dqhash: %p [#0x%08x]"
34+ "vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]"
35+ "vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]"
36+ "vroot[%d]_set_dev: dev=%p[%lu,%d:%d]"
37+ "vroot_get_real_bdev not set"
d6221c00
AM
38+ 1 2 "cow_break_link(?%s?)"
39+ "temp copy ?%s?"
d33d7b00
AM
40+ 2 4 "dentry_open(new): %p"
41+ "dentry_open(old): %p"
42+ "lookup_create(new): %p"
d6221c00 43+ "old path ?%s?"
d33d7b00
AM
44+ "path_lookup(old): %d"
45+ "vfs_create(new): %d"
46+ "vfs_rename: %d"
47+ "vfs_sendfile: %d"
48+ 3 8 "fput(new_file=%p[#%d])"
49+ "fput(old_file=%p[#%d])"
50+ 4 10 "vx_info_kill(%p[#%d],%d,%d) = %d"
51+ "vx_info_kill(%p[#%d],%d,%d)*"
52+ 5 20 "vs_reboot(%p[#%d],%d)"
53+ 6 40 "dropping task %p[#%u,%u] for %p[#%u,%u]"
54+
55+debug_net:
56+
57+ 2 4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d"
58+ 3 8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
59+ "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
60+ 4 10 "ip_route_connect(%p) %p,%p;%lx"
61+ 5 20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
62+ 6 40 "sk,egf: %p [#%d] (from %d)"
63+ "sk,egn: %p [#%d] (from %d)"
64+ "sk,req: %p [#%d] (from %d)"
65+ "sk: %p [#%d] (from %d)"
66+ "tw: %p [#%d] (from %d)"
67+ 7 80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
68+ "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
69+
70+debug_nid:
71+
72+ 0 1 "__lookup_nx_info(#%u): %p[#%u]"
73+ "alloc_nx_info(%d) = %p"
74+ "create_nx_info(%d) (dynamic rejected)"
75+ "create_nx_info(%d) = %p (already there)"
76+ "create_nx_info(%d) = %p (new)"
77+ "dealloc_nx_info(%p)"
78+ 1 2 "alloc_nx_info(%d)*"
79+ "create_nx_info(%d)*"
80+ 2 4 "get_nx_info(%p[#%d.%d])"
81+ "put_nx_info(%p[#%d.%d])"
82+ 3 8 "claim_nx_info(%p[#%d.%d.%d]) %p"
83+ "clr_nx_info(%p[#%d.%d])"
84+ "init_nx_info(%p[#%d.%d])"
85+ "release_nx_info(%p[#%d.%d.%d]) %p"
86+ "set_nx_info(%p[#%d.%d])"
87+ 4 10 "__hash_nx_info: %p[#%d]"
88+ "__nx_dynamic_id: [#%d]"
89+ "__unhash_nx_info: %p[#%d.%d.%d]"
90+ 5 20 "moved task %p into nxi:%p[#%d]"
91+ "nx_migrate_task(%p,%p[#%d.%d.%d])"
92+ "task_get_nx_info(%p)"
93+ 6 40 "nx_clear_persistent(%p[#%d])"
94+
95+debug_quota:
96+
97+ 0 1 "quota_sync_dqh(%p,%d) discard inode %p"
98+ 1 2 "quota_sync_dqh(%p,%d)"
99+ "sync_dquots(%p,%d)"
100+ "sync_dquots_dqh(%p,%d)"
101+ 3 8 "do_quotactl(%p,%d,cmd=%d,id=%d,%p)"
102+
103+debug_switch:
104+
105+ 0 1 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]"
106+ 1 2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]"
107+ 4 10 "%s: (%s %s) returned %s with %d"
108+
109+debug_tag:
110+
d6221c00 111+ 7 80 "dx_parse_tag(?%s?): %d:#%d"
d33d7b00
AM
112+ "dx_propagate_tag(%p[#%lu.%d]): %d,%d"
113+
114+debug_xid:
115+
116+ 0 1 "__lookup_vx_info(#%u): %p[#%u]"
117+ "alloc_vx_info(%d) = %p"
118+ "alloc_vx_info(%d)*"
119+ "create_vx_info(%d) (dynamic rejected)"
120+ "create_vx_info(%d) = %p (already there)"
121+ "create_vx_info(%d) = %p (new)"
122+ "dealloc_vx_info(%p)"
123+ "loc_vx_info(%d) = %p (found)"
124+ "loc_vx_info(%d) = %p (new)"
125+ "loc_vx_info(%d) = %p (not available)"
126+ 1 2 "create_vx_info(%d)*"
127+ "loc_vx_info(%d)*"
128+ 2 4 "get_vx_info(%p[#%d.%d])"
129+ "put_vx_info(%p[#%d.%d])"
130+ 3 8 "claim_vx_info(%p[#%d.%d.%d]) %p"
131+ "clr_vx_info(%p[#%d.%d])"
132+ "init_vx_info(%p[#%d.%d])"
133+ "release_vx_info(%p[#%d.%d.%d]) %p"
134+ "set_vx_info(%p[#%d.%d])"
135+ 4 10 "__hash_vx_info: %p[#%d]"
136+ "__unhash_vx_info: %p[#%d.%d.%d]"
137+ "__vx_dynamic_id: [#%d]"
138+ 5 20 "enter_vx_info(%p[#%d],%p) %p[#%d,%p]"
139+ "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]"
140+ "moved task %p into vxi:%p[#%d]"
141+ "task_get_vx_info(%p)"
142+ "vx_migrate_task(%p,%p[#%d.%d])"
143+ 6 40 "vx_clear_persistent(%p[#%d])"
144+ "vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
145+ "vx_set_init(%p[#%d],%p[#%d,%d,%d])"
146+ "vx_set_persistent(%p[#%d])"
147+ "vx_set_reaper(%p[#%d],%p[#%d,%d])"
148+ 7 80 "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]"
149+
150+
151+debug_limit:
152+
153+ n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s"
154+ "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
155+
156+ m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s"
157+ "vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
158+ "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
8931d859
AM
159diff -NurpP --minimal linux-4.4.161/arch/alpha/Kconfig linux-4.4.161-vs2.3.9.8/arch/alpha/Kconfig
160--- linux-4.4.161/arch/alpha/Kconfig 2016-01-10 23:01:32.000000000 +0000
161+++ linux-4.4.161-vs2.3.9.8/arch/alpha/Kconfig 2018-10-20 04:57:21.000000000 +0000
927ca606 162@@ -745,6 +745,8 @@ config DUMMY_CONSOLE
2380c486
JR
163 depends on VGA_HOSE
164 default y
d337f35e
JR
165
166+source "kernel/vserver/Kconfig"
167+
168 source "security/Kconfig"
169
170 source "crypto/Kconfig"
8931d859
AM
171diff -NurpP --minimal linux-4.4.161/arch/alpha/kernel/systbls.S linux-4.4.161-vs2.3.9.8/arch/alpha/kernel/systbls.S
172--- linux-4.4.161/arch/alpha/kernel/systbls.S 2016-01-10 23:01:32.000000000 +0000
173+++ linux-4.4.161-vs2.3.9.8/arch/alpha/kernel/systbls.S 2018-10-20 04:57:21.000000000 +0000
d337f35e
JR
174@@ -446,7 +446,7 @@ sys_call_table:
175 .quad sys_stat64 /* 425 */
176 .quad sys_lstat64
177 .quad sys_fstat64
178- .quad sys_ni_syscall /* sys_vserver */
179+ .quad sys_vserver /* sys_vserver */
180 .quad sys_ni_syscall /* sys_mbind */
181 .quad sys_ni_syscall /* sys_get_mempolicy */
182 .quad sys_ni_syscall /* sys_set_mempolicy */
8931d859
AM
183diff -NurpP --minimal linux-4.4.161/arch/alpha/kernel/traps.c linux-4.4.161-vs2.3.9.8/arch/alpha/kernel/traps.c
184--- linux-4.4.161/arch/alpha/kernel/traps.c 2016-01-10 23:01:32.000000000 +0000
185+++ linux-4.4.161-vs2.3.9.8/arch/alpha/kernel/traps.c 2018-10-20 04:57:21.000000000 +0000
927ca606 186@@ -174,7 +174,8 @@ die_if_kernel(char * str, struct pt_regs
d337f35e
JR
187 #ifdef CONFIG_SMP
188 printk("CPU %d ", hard_smp_processor_id());
189 #endif
2380c486 190- printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
61333608 191+ printk("%s(%d:#%u): %s %ld\n", current->comm,
2380c486 192+ task_pid_nr(current), current->xid, str, err);
d337f35e 193 dik_show_regs(regs, r9_15);
b00e13aa 194 add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
d337f35e 195 dik_show_trace((unsigned long *)(regs+1));
8931d859
AM
196diff -NurpP --minimal linux-4.4.161/arch/arm/Kconfig linux-4.4.161-vs2.3.9.8/arch/arm/Kconfig
197--- linux-4.4.161/arch/arm/Kconfig 2016-01-10 23:01:32.000000000 +0000
198+++ linux-4.4.161-vs2.3.9.8/arch/arm/Kconfig 2018-10-20 04:57:21.000000000 +0000
927ca606 199@@ -2159,6 +2159,8 @@ source "fs/Kconfig"
d337f35e
JR
200
201 source "arch/arm/Kconfig.debug"
202
203+source "kernel/vserver/Kconfig"
204+
205 source "security/Kconfig"
206
207 source "crypto/Kconfig"
8931d859
AM
208diff -NurpP --minimal linux-4.4.161/arch/arm/kernel/calls.S linux-4.4.161-vs2.3.9.8/arch/arm/kernel/calls.S
209--- linux-4.4.161/arch/arm/kernel/calls.S 2016-01-10 23:01:32.000000000 +0000
210+++ linux-4.4.161-vs2.3.9.8/arch/arm/kernel/calls.S 2018-10-20 04:57:21.000000000 +0000
d337f35e
JR
211@@ -322,7 +322,7 @@
212 /* 310 */ CALL(sys_request_key)
213 CALL(sys_keyctl)
214 CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
215-/* vserver */ CALL(sys_ni_syscall)
216+ CALL(sys_vserver)
217 CALL(sys_ioprio_set)
218 /* 315 */ CALL(sys_ioprio_get)
219 CALL(sys_inotify_init)
8931d859
AM
220diff -NurpP --minimal linux-4.4.161/arch/arm/kernel/traps.c linux-4.4.161-vs2.3.9.8/arch/arm/kernel/traps.c
221--- linux-4.4.161/arch/arm/kernel/traps.c 2018-10-20 02:34:24.000000000 +0000
222+++ linux-4.4.161-vs2.3.9.8/arch/arm/kernel/traps.c 2018-10-20 04:57:21.000000000 +0000
223@@ -259,8 +259,8 @@ static int __die(const char *str, int er
78865d5b 224
d337f35e
JR
225 print_modules();
226 __show_regs(regs);
927ca606
AM
227- pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
228- TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
229+ pr_emerg("Process %.*s (pid: %d:%u, stack limit = 0x%p)\n",
230+ TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), tsk->xid, end_of_stack(tsk));
d337f35e
JR
231
232 if (!user_mode(regs) || in_interrupt()) {
7e46296a 233 dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
8931d859
AM
234diff -NurpP --minimal linux-4.4.161/arch/cris/Kconfig linux-4.4.161-vs2.3.9.8/arch/cris/Kconfig
235--- linux-4.4.161/arch/cris/Kconfig 2016-01-10 23:01:32.000000000 +0000
236+++ linux-4.4.161-vs2.3.9.8/arch/cris/Kconfig 2018-10-20 04:57:21.000000000 +0000
927ca606 237@@ -581,6 +581,8 @@ source "fs/Kconfig"
d337f35e
JR
238
239 source "arch/cris/Kconfig.debug"
240
241+source "kernel/vserver/Kconfig"
242+
243 source "security/Kconfig"
244
245 source "crypto/Kconfig"
8931d859
AM
246diff -NurpP --minimal linux-4.4.161/arch/ia64/Kconfig linux-4.4.161-vs2.3.9.8/arch/ia64/Kconfig
247--- linux-4.4.161/arch/ia64/Kconfig 2016-01-10 23:01:32.000000000 +0000
248+++ linux-4.4.161-vs2.3.9.8/arch/ia64/Kconfig 2018-10-20 04:57:21.000000000 +0000
927ca606 249@@ -606,6 +606,8 @@ source "fs/Kconfig"
2380c486
JR
250
251 source "arch/ia64/Kconfig.debug"
d337f35e
JR
252
253+source "kernel/vserver/Kconfig"
254+
255 source "security/Kconfig"
256
257 source "crypto/Kconfig"
8931d859
AM
258diff -NurpP --minimal linux-4.4.161/arch/ia64/kernel/entry.S linux-4.4.161-vs2.3.9.8/arch/ia64/kernel/entry.S
259--- linux-4.4.161/arch/ia64/kernel/entry.S 2016-01-10 23:01:32.000000000 +0000
260+++ linux-4.4.161-vs2.3.9.8/arch/ia64/kernel/entry.S 2018-10-20 04:57:21.000000000 +0000
927ca606 261@@ -1694,7 +1694,7 @@ sys_call_table:
2380c486
JR
262 data8 sys_mq_notify
263 data8 sys_mq_getsetattr
264 data8 sys_kexec_load
265- data8 sys_ni_syscall // reserved for vserver
266+ data8 sys_vserver
267 data8 sys_waitid // 1270
268 data8 sys_add_key
269 data8 sys_request_key
8931d859
AM
270diff -NurpP --minimal linux-4.4.161/arch/ia64/kernel/ptrace.c linux-4.4.161-vs2.3.9.8/arch/ia64/kernel/ptrace.c
271--- linux-4.4.161/arch/ia64/kernel/ptrace.c 2016-01-10 23:01:32.000000000 +0000
272+++ linux-4.4.161-vs2.3.9.8/arch/ia64/kernel/ptrace.c 2018-10-20 04:57:21.000000000 +0000
78865d5b 273@@ -21,6 +21,7 @@
2380c486 274 #include <linux/regset.h>
d337f35e 275 #include <linux/elf.h>
ec22aa5c 276 #include <linux/tracehook.h>
d337f35e
JR
277+#include <linux/vs_base.h>
278
279 #include <asm/pgtable.h>
280 #include <asm/processor.h>
8931d859
AM
281diff -NurpP --minimal linux-4.4.161/arch/ia64/kernel/traps.c linux-4.4.161-vs2.3.9.8/arch/ia64/kernel/traps.c
282--- linux-4.4.161/arch/ia64/kernel/traps.c 2016-01-10 23:01:32.000000000 +0000
283+++ linux-4.4.161-vs2.3.9.8/arch/ia64/kernel/traps.c 2018-10-20 04:57:21.000000000 +0000
1e8b8f9b 284@@ -60,8 +60,9 @@ die (const char *str, struct pt_regs *re
d337f35e
JR
285 put_cpu();
286
287 if (++die.lock_owner_depth < 3) {
288- printk("%s[%d]: %s %ld [%d]\n",
2380c486 289- current->comm, task_pid_nr(current), str, err, ++die_counter);
61333608 290+ printk("%s[%d:#%u]: %s %ld [%d]\n",
2380c486 291+ current->comm, task_pid_nr(current), current->xid,
d337f35e 292+ str, err, ++die_counter);
2380c486
JR
293 if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
294 != NOTIFY_STOP)
295 show_regs(regs);
1e8b8f9b 296@@ -324,8 +325,9 @@ handle_fpu_swa (int fp_fault, struct pt_
2380c486
JR
297 if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
298 last.time = current_jiffies + 5 * HZ;
299 printk(KERN_WARNING
300- "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
301- current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
61333608 302+ "%s(%d:#%u): floating-point assist fault at ip %016lx, isr %016lx\n",
2380c486
JR
303+ current->comm, task_pid_nr(current), current->xid,
304+ regs->cr_iip + ia64_psr(regs)->ri, isr);
305 }
306 }
d337f35e 307 }
8931d859
AM
308diff -NurpP --minimal linux-4.4.161/arch/m32r/kernel/traps.c linux-4.4.161-vs2.3.9.8/arch/m32r/kernel/traps.c
309--- linux-4.4.161/arch/m32r/kernel/traps.c 2016-01-10 23:01:32.000000000 +0000
310+++ linux-4.4.161-vs2.3.9.8/arch/m32r/kernel/traps.c 2018-10-20 04:57:21.000000000 +0000
09be7631 311@@ -184,8 +184,9 @@ static void show_registers(struct pt_reg
d337f35e
JR
312 } else {
313 printk("SPI: %08lx\n", sp);
314 }
315- printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
2380c486 316- current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
61333608 317+ printk("Process %s (pid: %d:#%u, process nr: %d, stackpage=%08lx)",
2380c486 318+ current->comm, task_pid_nr(current), current->xid,
d337f35e
JR
319+ 0xffff & i, 4096+(unsigned long)current);
320
321 /*
322 * When in-kernel, we also print out the stack and code at the
8931d859
AM
323diff -NurpP --minimal linux-4.4.161/arch/m68k/Kconfig linux-4.4.161-vs2.3.9.8/arch/m68k/Kconfig
324--- linux-4.4.161/arch/m68k/Kconfig 2016-01-10 23:01:32.000000000 +0000
325+++ linux-4.4.161-vs2.3.9.8/arch/m68k/Kconfig 2018-10-20 04:57:21.000000000 +0000
927ca606 326@@ -164,6 +164,8 @@ source "fs/Kconfig"
d337f35e
JR
327
328 source "arch/m68k/Kconfig.debug"
329
330+source "kernel/vserver/Kconfig"
331+
332 source "security/Kconfig"
333
334 source "crypto/Kconfig"
8931d859
AM
335diff -NurpP --minimal linux-4.4.161/arch/mips/Kconfig linux-4.4.161-vs2.3.9.8/arch/mips/Kconfig
336--- linux-4.4.161/arch/mips/Kconfig 2018-10-20 02:34:25.000000000 +0000
337+++ linux-4.4.161-vs2.3.9.8/arch/mips/Kconfig 2018-10-20 04:57:21.000000000 +0000
927ca606 338@@ -3026,6 +3026,8 @@ source "fs/Kconfig"
d337f35e
JR
339
340 source "arch/mips/Kconfig.debug"
341
342+source "kernel/vserver/Kconfig"
343+
344 source "security/Kconfig"
345
346 source "crypto/Kconfig"
8931d859
AM
347diff -NurpP --minimal linux-4.4.161/arch/mips/kernel/ptrace.c linux-4.4.161-vs2.3.9.8/arch/mips/kernel/ptrace.c
348--- linux-4.4.161/arch/mips/kernel/ptrace.c 2018-10-20 02:34:25.000000000 +0000
349+++ linux-4.4.161-vs2.3.9.8/arch/mips/kernel/ptrace.c 2018-10-20 05:50:17.000000000 +0000
927ca606 350@@ -30,6 +30,7 @@
2380c486
JR
351 #include <linux/audit.h>
352 #include <linux/seccomp.h>
c2e5f7c8 353 #include <linux/ftrace.h>
d337f35e
JR
354+#include <linux/vs_base.h>
355
356 #include <asm/byteorder.h>
357 #include <asm/cpu.h>
8931d859 358@@ -801,6 +802,9 @@ long arch_ptrace(struct task_struct *chi
ab30d09f
AM
359 void __user *datavp = (void __user *) data;
360 unsigned long __user *datalp = (void __user *) data;
d337f35e 361
2380c486 362+ if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
d337f35e
JR
363+ goto out;
364+
365 switch (request) {
366 /* when I and D space are separate, these will need to be fixed. */
367 case PTRACE_PEEKTEXT: /* read word at location addr. */
8931d859
AM
368diff -NurpP --minimal linux-4.4.161/arch/mips/kernel/scall32-o32.S linux-4.4.161-vs2.3.9.8/arch/mips/kernel/scall32-o32.S
369--- linux-4.4.161/arch/mips/kernel/scall32-o32.S 2018-10-20 02:34:25.000000000 +0000
370+++ linux-4.4.161-vs2.3.9.8/arch/mips/kernel/scall32-o32.S 2018-10-20 04:57:21.000000000 +0000
927ca606 371@@ -512,7 +512,7 @@ EXPORT(sys_call_table)
c2e5f7c8
JR
372 PTR sys_mq_timedreceive
373 PTR sys_mq_notify /* 4275 */
374 PTR sys_mq_getsetattr
375- PTR sys_ni_syscall /* sys_vserver */
376+ PTR sys_vserver
377 PTR sys_waitid
378 PTR sys_ni_syscall /* available, was setaltroot */
379 PTR sys_add_key /* 4280 */
8931d859
AM
380diff -NurpP --minimal linux-4.4.161/arch/mips/kernel/scall64-64.S linux-4.4.161-vs2.3.9.8/arch/mips/kernel/scall64-64.S
381--- linux-4.4.161/arch/mips/kernel/scall64-64.S 2018-10-20 02:34:25.000000000 +0000
382+++ linux-4.4.161-vs2.3.9.8/arch/mips/kernel/scall64-64.S 2018-10-20 04:57:21.000000000 +0000
927ca606 383@@ -349,7 +349,7 @@ EXPORT(sys_call_table)
d337f35e
JR
384 PTR sys_mq_timedreceive
385 PTR sys_mq_notify
386 PTR sys_mq_getsetattr /* 5235 */
387- PTR sys_ni_syscall /* sys_vserver */
388+ PTR sys_vserver
389 PTR sys_waitid
390 PTR sys_ni_syscall /* available, was setaltroot */
391 PTR sys_add_key
8931d859
AM
392diff -NurpP --minimal linux-4.4.161/arch/mips/kernel/scall64-n32.S linux-4.4.161-vs2.3.9.8/arch/mips/kernel/scall64-n32.S
393--- linux-4.4.161/arch/mips/kernel/scall64-n32.S 2018-10-20 02:34:25.000000000 +0000
394+++ linux-4.4.161-vs2.3.9.8/arch/mips/kernel/scall64-n32.S 2018-10-20 04:57:21.000000000 +0000
927ca606 395@@ -339,7 +339,7 @@ EXPORT(sysn32_call_table)
d337f35e
JR
396 PTR compat_sys_mq_timedreceive
397 PTR compat_sys_mq_notify
398 PTR compat_sys_mq_getsetattr
399- PTR sys_ni_syscall /* 6240, sys_vserver */
400+ PTR sys32_vserver /* 6240 */
2380c486 401 PTR compat_sys_waitid
d337f35e
JR
402 PTR sys_ni_syscall /* available, was setaltroot */
403 PTR sys_add_key
8931d859
AM
404diff -NurpP --minimal linux-4.4.161/arch/mips/kernel/scall64-o32.S linux-4.4.161-vs2.3.9.8/arch/mips/kernel/scall64-o32.S
405--- linux-4.4.161/arch/mips/kernel/scall64-o32.S 2018-10-20 02:34:25.000000000 +0000
406+++ linux-4.4.161-vs2.3.9.8/arch/mips/kernel/scall64-o32.S 2018-10-20 04:57:21.000000000 +0000
927ca606 407@@ -495,7 +495,7 @@ EXPORT(sys32_call_table)
d337f35e
JR
408 PTR compat_sys_mq_timedreceive
409 PTR compat_sys_mq_notify /* 4275 */
410 PTR compat_sys_mq_getsetattr
411- PTR sys_ni_syscall /* sys_vserver */
412+ PTR sys32_vserver
b00e13aa 413 PTR compat_sys_waitid
d337f35e
JR
414 PTR sys_ni_syscall /* available, was setaltroot */
415 PTR sys_add_key /* 4280 */
8931d859
AM
416diff -NurpP --minimal linux-4.4.161/arch/mips/kernel/traps.c linux-4.4.161-vs2.3.9.8/arch/mips/kernel/traps.c
417--- linux-4.4.161/arch/mips/kernel/traps.c 2018-10-20 02:34:25.000000000 +0000
418+++ linux-4.4.161-vs2.3.9.8/arch/mips/kernel/traps.c 2018-10-20 05:50:17.000000000 +0000
419@@ -354,9 +354,10 @@ void show_registers(struct pt_regs *regs
2380c486
JR
420
421 __show_regs(regs);
d337f35e 422 print_modules();
2380c486
JR
423- printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
424- current->comm, current->pid, current_thread_info(), current,
425- field, current_thread_info()->tp_value);
426+ printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p, tls=%0*lx)\n",
427+ current->comm, task_pid_nr(current), current->xid,
428+ current_thread_info(), current,
429+ field, current_thread_info()->tp_value);
430 if (cpu_has_userlocal) {
431 unsigned long tls;
432
8931d859
AM
433diff -NurpP --minimal linux-4.4.161/arch/parisc/Kconfig linux-4.4.161-vs2.3.9.8/arch/parisc/Kconfig
434--- linux-4.4.161/arch/parisc/Kconfig 2018-10-20 02:34:25.000000000 +0000
435+++ linux-4.4.161-vs2.3.9.8/arch/parisc/Kconfig 2018-10-20 05:50:17.000000000 +0000
927ca606 436@@ -341,6 +341,8 @@ config SECCOMP
d337f35e 437
bb20add7 438 If unsure, say Y. Only embedded should say N here.
d337f35e
JR
439
440+source "kernel/vserver/Kconfig"
441+
442 source "security/Kconfig"
443
444 source "crypto/Kconfig"
8931d859
AM
445diff -NurpP --minimal linux-4.4.161/arch/parisc/kernel/syscall_table.S linux-4.4.161-vs2.3.9.8/arch/parisc/kernel/syscall_table.S
446--- linux-4.4.161/arch/parisc/kernel/syscall_table.S 2018-10-20 02:34:25.000000000 +0000
447+++ linux-4.4.161-vs2.3.9.8/arch/parisc/kernel/syscall_table.S 2018-10-20 04:57:21.000000000 +0000
b00e13aa 448@@ -358,7 +358,7 @@
d337f35e
JR
449 ENTRY_COMP(mbind) /* 260 */
450 ENTRY_COMP(get_mempolicy)
451 ENTRY_COMP(set_mempolicy)
452- ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */
453+ ENTRY_DIFF(vserver)
454 ENTRY_SAME(add_key)
455 ENTRY_SAME(request_key) /* 265 */
927ca606 456 ENTRY_COMP(keyctl)
8931d859
AM
457diff -NurpP --minimal linux-4.4.161/arch/parisc/kernel/traps.c linux-4.4.161-vs2.3.9.8/arch/parisc/kernel/traps.c
458--- linux-4.4.161/arch/parisc/kernel/traps.c 2018-10-20 02:34:25.000000000 +0000
459+++ linux-4.4.161-vs2.3.9.8/arch/parisc/kernel/traps.c 2018-10-20 04:57:21.000000000 +0000
927ca606 460@@ -235,8 +235,9 @@ void die_if_kernel(char *str, struct pt_
d337f35e
JR
461 return; /* STFU */
462
98968f7b
JR
463 parisc_printk_ratelimited(1, regs,
464- KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
2380c486 465- current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
98968f7b 466+ KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n",
2380c486 467+ current->comm, task_pid_nr(current), current->xid,
d337f35e 468+ str, err, regs->iaoq[0]);
98968f7b
JR
469
470 return;
471 }
927ca606 472@@ -266,8 +267,8 @@ void die_if_kernel(char *str, struct pt_
d337f35e
JR
473 pdc_console_restart();
474
2380c486
JR
475 if (err)
476- printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
477- current->comm, task_pid_nr(current), str, err);
478+ printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n",
479+ current->comm, task_pid_nr(current), current->xid, str, err);
480
481 /* Wot's wrong wif bein' racy? */
482 if (current->thread.flags & PARISC_KERNEL_DEATH) {
8931d859
AM
483diff -NurpP --minimal linux-4.4.161/arch/powerpc/Kconfig linux-4.4.161-vs2.3.9.8/arch/powerpc/Kconfig
484--- linux-4.4.161/arch/powerpc/Kconfig 2018-10-20 02:34:25.000000000 +0000
485+++ linux-4.4.161-vs2.3.9.8/arch/powerpc/Kconfig 2018-10-20 04:57:21.000000000 +0000
d6221c00 486@@ -1081,6 +1081,8 @@ source "lib/Kconfig"
d33d7b00
AM
487
488 source "arch/powerpc/Kconfig.debug"
489
490+source "kernel/vserver/Kconfig"
491+
492 source "security/Kconfig"
493
927ca606 494 source "crypto/Kconfig"
8931d859
AM
495diff -NurpP --minimal linux-4.4.161/arch/powerpc/include/uapi/asm/unistd.h linux-4.4.161-vs2.3.9.8/arch/powerpc/include/uapi/asm/unistd.h
496--- linux-4.4.161/arch/powerpc/include/uapi/asm/unistd.h 2016-01-10 23:01:32.000000000 +0000
497+++ linux-4.4.161-vs2.3.9.8/arch/powerpc/include/uapi/asm/unistd.h 2018-10-20 04:57:21.000000000 +0000
adc1caaa
AM
498@@ -275,7 +275,7 @@
499 #endif
500 #define __NR_rtas 255
501 #define __NR_sys_debug_setcontext 256
502-/* Number 257 is reserved for vserver */
503+#define __NR_vserver 257
504 #define __NR_migrate_pages 258
505 #define __NR_mbind 259
506 #define __NR_get_mempolicy 260
8931d859
AM
507diff -NurpP --minimal linux-4.4.161/arch/powerpc/kernel/traps.c linux-4.4.161-vs2.3.9.8/arch/powerpc/kernel/traps.c
508--- linux-4.4.161/arch/powerpc/kernel/traps.c 2018-10-20 02:34:25.000000000 +0000
509+++ linux-4.4.161-vs2.3.9.8/arch/powerpc/kernel/traps.c 2018-10-20 04:57:21.000000000 +0000
927ca606 510@@ -1315,8 +1315,9 @@ void nonrecoverable_exception(struct pt_
d337f35e
JR
511
512 void trace_syscall(struct pt_regs *regs)
513 {
514- printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n",
2380c486 515- current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
61333608 516+ printk("Task: %p(%d:#%u), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n",
2380c486 517+ current, task_pid_nr(current), current->xid,
d337f35e
JR
518+ regs->nip, regs->link, regs->gpr[0],
519 regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
520 }
521
8931d859
AM
522diff -NurpP --minimal linux-4.4.161/arch/s390/Kconfig linux-4.4.161-vs2.3.9.8/arch/s390/Kconfig
523--- linux-4.4.161/arch/s390/Kconfig 2018-10-20 02:34:25.000000000 +0000
524+++ linux-4.4.161-vs2.3.9.8/arch/s390/Kconfig 2018-10-20 04:57:21.000000000 +0000
525@@ -776,6 +776,8 @@ source "fs/Kconfig"
d33d7b00
AM
526
527 source "arch/s390/Kconfig.debug"
528
529+source "kernel/vserver/Kconfig"
530+
531 source "security/Kconfig"
0411181d 532
d33d7b00 533 source "crypto/Kconfig"
8931d859
AM
534diff -NurpP --minimal linux-4.4.161/arch/s390/include/asm/tlb.h linux-4.4.161-vs2.3.9.8/arch/s390/include/asm/tlb.h
535--- linux-4.4.161/arch/s390/include/asm/tlb.h 2016-01-10 23:01:32.000000000 +0000
536+++ linux-4.4.161-vs2.3.9.8/arch/s390/include/asm/tlb.h 2018-10-20 04:57:21.000000000 +0000
dd5f3080 537@@ -24,6 +24,7 @@
0411181d 538 #include <linux/mm.h>
d33d7b00 539 #include <linux/pagemap.h>
0411181d 540 #include <linux/swap.h>
0411181d
AM
541+
542 #include <asm/processor.h>
543 #include <asm/pgalloc.h>
763640ca 544 #include <asm/tlbflush.h>
8931d859
AM
545diff -NurpP --minimal linux-4.4.161/arch/s390/include/uapi/asm/unistd.h linux-4.4.161-vs2.3.9.8/arch/s390/include/uapi/asm/unistd.h
546--- linux-4.4.161/arch/s390/include/uapi/asm/unistd.h 2016-01-10 23:01:32.000000000 +0000
547+++ linux-4.4.161-vs2.3.9.8/arch/s390/include/uapi/asm/unistd.h 2018-10-20 04:57:21.000000000 +0000
92598135 548@@ -200,7 +200,7 @@
927ca606
AM
549 #define __NR_clock_gettime 260
550 #define __NR_clock_getres 261
551 #define __NR_clock_nanosleep 262
0411181d
AM
552-/* Number 263 is reserved for vserver */
553+#define __NR_vserver 263
554 #define __NR_statfs64 265
555 #define __NR_fstatfs64 266
556 #define __NR_remap_file_pages 267
8931d859
AM
557diff -NurpP --minimal linux-4.4.161/arch/s390/kernel/ptrace.c linux-4.4.161-vs2.3.9.8/arch/s390/kernel/ptrace.c
558--- linux-4.4.161/arch/s390/kernel/ptrace.c 2018-10-20 02:34:25.000000000 +0000
559+++ linux-4.4.161-vs2.3.9.8/arch/s390/kernel/ptrace.c 2018-10-20 04:57:21.000000000 +0000
db55b927 560@@ -21,6 +21,7 @@
ec22aa5c
AM
561 #include <linux/tracehook.h>
562 #include <linux/seccomp.h>
969f5c41 563 #include <linux/compat.h>
db55b927 564+#include <linux/vs_base.h>
ec22aa5c 565 #include <trace/syscall.h>
d337f35e 566 #include <asm/segment.h>
db55b927 567 #include <asm/page.h>
8931d859
AM
568diff -NurpP --minimal linux-4.4.161/arch/s390/kernel/syscalls.S linux-4.4.161-vs2.3.9.8/arch/s390/kernel/syscalls.S
569--- linux-4.4.161/arch/s390/kernel/syscalls.S 2018-10-20 02:34:25.000000000 +0000
570+++ linux-4.4.161-vs2.3.9.8/arch/s390/kernel/syscalls.S 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
571@@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,compat_sys_clo
572 SYSCALL(sys_clock_gettime,compat_sys_clock_gettime) /* 260 */
573 SYSCALL(sys_clock_getres,compat_sys_clock_getres)
574 SYSCALL(sys_clock_nanosleep,compat_sys_clock_nanosleep)
575-NI_SYSCALL /* reserved for vserver */
d337f35e 576+SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
927ca606
AM
577 SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
578 SYSCALL(sys_statfs64,compat_sys_statfs64)
579 SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
8931d859
AM
580diff -NurpP --minimal linux-4.4.161/arch/sh/Kconfig linux-4.4.161-vs2.3.9.8/arch/sh/Kconfig
581--- linux-4.4.161/arch/sh/Kconfig 2016-01-10 23:01:32.000000000 +0000
582+++ linux-4.4.161-vs2.3.9.8/arch/sh/Kconfig 2018-10-20 04:57:21.000000000 +0000
927ca606 583@@ -883,6 +883,8 @@ source "fs/Kconfig"
d337f35e
JR
584
585 source "arch/sh/Kconfig.debug"
586
587+source "kernel/vserver/Kconfig"
588+
589 source "security/Kconfig"
590
591 source "crypto/Kconfig"
8931d859
AM
592diff -NurpP --minimal linux-4.4.161/arch/sh/kernel/irq.c linux-4.4.161-vs2.3.9.8/arch/sh/kernel/irq.c
593--- linux-4.4.161/arch/sh/kernel/irq.c 2016-01-10 23:01:32.000000000 +0000
594+++ linux-4.4.161-vs2.3.9.8/arch/sh/kernel/irq.c 2018-10-20 04:57:21.000000000 +0000
f86f0b53 595@@ -14,6 +14,7 @@
7e46296a 596 #include <linux/ftrace.h>
76514441 597 #include <linux/delay.h>
763640ca 598 #include <linux/ratelimit.h>
f86f0b53 599+// #include <linux/vs_context.h>
d337f35e 600 #include <asm/processor.h>
2380c486 601 #include <asm/machvec.h>
f86f0b53 602 #include <asm/uaccess.h>
8931d859
AM
603diff -NurpP --minimal linux-4.4.161/arch/sparc/Kconfig linux-4.4.161-vs2.3.9.8/arch/sparc/Kconfig
604--- linux-4.4.161/arch/sparc/Kconfig 2018-10-20 02:34:25.000000000 +0000
605+++ linux-4.4.161-vs2.3.9.8/arch/sparc/Kconfig 2018-10-20 04:57:21.000000000 +0000
927ca606 606@@ -561,6 +561,8 @@ source "fs/Kconfig"
d33d7b00
AM
607
608 source "arch/sparc/Kconfig.debug"
609
610+source "kernel/vserver/Kconfig"
611+
612 source "security/Kconfig"
613
614 source "crypto/Kconfig"
8931d859
AM
615diff -NurpP --minimal linux-4.4.161/arch/sparc/include/uapi/asm/unistd.h linux-4.4.161-vs2.3.9.8/arch/sparc/include/uapi/asm/unistd.h
616--- linux-4.4.161/arch/sparc/include/uapi/asm/unistd.h 2016-01-10 23:01:32.000000000 +0000
617+++ linux-4.4.161-vs2.3.9.8/arch/sparc/include/uapi/asm/unistd.h 2018-10-20 04:57:21.000000000 +0000
537831f9 618@@ -332,7 +332,7 @@
ec22aa5c
AM
619 #define __NR_timer_getoverrun 264
620 #define __NR_timer_delete 265
621 #define __NR_timer_create 266
622-/* #define __NR_vserver 267 Reserved for VSERVER */
623+#define __NR_vserver 267
624 #define __NR_io_setup 268
625 #define __NR_io_destroy 269
626 #define __NR_io_submit 270
8931d859
AM
627diff -NurpP --minimal linux-4.4.161/arch/sparc/kernel/systbls_32.S linux-4.4.161-vs2.3.9.8/arch/sparc/kernel/systbls_32.S
628--- linux-4.4.161/arch/sparc/kernel/systbls_32.S 2016-01-10 23:01:32.000000000 +0000
629+++ linux-4.4.161-vs2.3.9.8/arch/sparc/kernel/systbls_32.S 2018-10-20 04:57:21.000000000 +0000
50e68740 630@@ -70,7 +70,7 @@ sys_call_table:
a168f21d 631 /*250*/ .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
50e68740
JR
632 /*255*/ .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
633 /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
634-/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
635+/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
636 /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
637 /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
638 /*280*/ .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
8931d859
AM
639diff -NurpP --minimal linux-4.4.161/arch/sparc/kernel/systbls_64.S linux-4.4.161-vs2.3.9.8/arch/sparc/kernel/systbls_64.S
640--- linux-4.4.161/arch/sparc/kernel/systbls_64.S 2016-01-10 23:01:32.000000000 +0000
641+++ linux-4.4.161-vs2.3.9.8/arch/sparc/kernel/systbls_64.S 2018-10-20 04:57:21.000000000 +0000
50e68740 642@@ -71,7 +71,7 @@ sys_call_table32:
b00e13aa 643 /*250*/ .word sys_mremap, compat_sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
50e68740
JR
644 .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
645 /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
646- .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
647+ .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
648 /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
649 .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
b00e13aa 650 /*280*/ .word sys_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat
927ca606 651@@ -152,7 +152,7 @@ sys_call_table:
a168f21d 652 /*250*/ .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
50e68740
JR
653 .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
654 /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
655- .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
656+ .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
657 /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
658 .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
659 /*280*/ .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
8931d859
AM
660diff -NurpP --minimal linux-4.4.161/arch/um/Kconfig.rest linux-4.4.161-vs2.3.9.8/arch/um/Kconfig.rest
661--- linux-4.4.161/arch/um/Kconfig.rest 2016-01-10 23:01:32.000000000 +0000
662+++ linux-4.4.161-vs2.3.9.8/arch/um/Kconfig.rest 2018-10-20 04:57:21.000000000 +0000
f6c5ef8b 663@@ -12,6 +12,8 @@ source "arch/um/Kconfig.net"
d33d7b00
AM
664
665 source "fs/Kconfig"
666
667+source "kernel/vserver/Kconfig"
668+
669 source "security/Kconfig"
670
671 source "crypto/Kconfig"
8931d859
AM
672diff -NurpP --minimal linux-4.4.161/arch/x86/Kconfig linux-4.4.161-vs2.3.9.8/arch/x86/Kconfig
673--- linux-4.4.161/arch/x86/Kconfig 2018-10-20 02:34:25.000000000 +0000
674+++ linux-4.4.161-vs2.3.9.8/arch/x86/Kconfig 2018-10-20 04:57:21.000000000 +0000
675@@ -2693,6 +2693,8 @@ source "fs/Kconfig"
e03b8c3c 676
d33d7b00 677 source "arch/x86/Kconfig.debug"
e03b8c3c
AM
678
679+source "kernel/vserver/Kconfig"
680+
681 source "security/Kconfig"
682
683 source "crypto/Kconfig"
8931d859
AM
684diff -NurpP --minimal linux-4.4.161/arch/x86/entry/syscalls/syscall_32.tbl linux-4.4.161-vs2.3.9.8/arch/x86/entry/syscalls/syscall_32.tbl
685--- linux-4.4.161/arch/x86/entry/syscalls/syscall_32.tbl 2018-10-20 02:34:25.000000000 +0000
686+++ linux-4.4.161-vs2.3.9.8/arch/x86/entry/syscalls/syscall_32.tbl 2018-10-20 04:57:21.000000000 +0000
db55b927
AM
687@@ -279,7 +279,7 @@
688 270 i386 tgkill sys_tgkill
689 271 i386 utimes sys_utimes compat_sys_utimes
690 272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
691-273 i386 vserver
692+273 i386 vserver sys_vserver sys32_vserver
693 274 i386 mbind sys_mbind
694 275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
695 276 i386 set_mempolicy sys_set_mempolicy
8931d859
AM
696diff -NurpP --minimal linux-4.4.161/arch/x86/entry/syscalls/syscall_64.tbl linux-4.4.161-vs2.3.9.8/arch/x86/entry/syscalls/syscall_64.tbl
697--- linux-4.4.161/arch/x86/entry/syscalls/syscall_64.tbl 2016-01-10 23:01:32.000000000 +0000
698+++ linux-4.4.161-vs2.3.9.8/arch/x86/entry/syscalls/syscall_64.tbl 2018-10-20 04:57:21.000000000 +0000
db55b927 699@@ -242,7 +242,7 @@
1e8b8f9b
AM
700 233 common epoll_ctl sys_epoll_ctl
701 234 common tgkill sys_tgkill
702 235 common utimes sys_utimes
db55b927
AM
703-236 64 vserver
704+236 64 vserver sys_vserver
1e8b8f9b
AM
705 237 common mbind sys_mbind
706 238 common set_mempolicy sys_set_mempolicy
707 239 common get_mempolicy sys_get_mempolicy
8931d859
AM
708diff -NurpP --minimal linux-4.4.161/block/ioprio.c linux-4.4.161-vs2.3.9.8/block/ioprio.c
709--- linux-4.4.161/block/ioprio.c 2018-10-20 02:34:26.000000000 +0000
710+++ linux-4.4.161-vs2.3.9.8/block/ioprio.c 2018-10-20 04:57:21.000000000 +0000
bb20add7
AM
711@@ -28,6 +28,7 @@
712 #include <linux/syscalls.h>
713 #include <linux/security.h>
714 #include <linux/pid_namespace.h>
715+#include <linux/vs_base.h>
716
717 int set_task_ioprio(struct task_struct *task, int ioprio)
718 {
719@@ -105,6 +106,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which,
720 else
721 pgrp = find_vpid(who);
722 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
723+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
724+ continue;
725 ret = set_task_ioprio(p, ioprio);
726 if (ret)
727 break;
927ca606 728@@ -203,6 +206,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which,
bb20add7
AM
729 else
730 pgrp = find_vpid(who);
731 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
732+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
733+ continue;
734 tmpio = get_task_ioprio(p);
735 if (tmpio < 0)
736 continue;
8931d859
AM
737diff -NurpP --minimal linux-4.4.161/drivers/block/Kconfig linux-4.4.161-vs2.3.9.8/drivers/block/Kconfig
738--- linux-4.4.161/drivers/block/Kconfig 2016-01-10 23:01:32.000000000 +0000
739+++ linux-4.4.161-vs2.3.9.8/drivers/block/Kconfig 2018-10-20 04:57:21.000000000 +0000
bb20add7 740@@ -283,6 +283,13 @@ config BLK_DEV_CRYPTOLOOP
2bf5ad28
AM
741
742 source "drivers/block/drbd/Kconfig"
d337f35e
JR
743
744+config BLK_DEV_VROOT
745+ tristate "Virtual Root device support"
746+ depends on QUOTACTL
747+ ---help---
748+ Saying Y here will allow you to use quota/fs ioctls on a shared
749+ partition within a virtual server without compromising security.
750+
751 config BLK_DEV_NBD
752 tristate "Network block device support"
753 depends on NET
8931d859
AM
754diff -NurpP --minimal linux-4.4.161/drivers/block/Makefile linux-4.4.161-vs2.3.9.8/drivers/block/Makefile
755--- linux-4.4.161/drivers/block/Makefile 2016-01-10 23:01:32.000000000 +0000
756+++ linux-4.4.161-vs2.3.9.8/drivers/block/Makefile 2018-10-20 04:57:21.000000000 +0000
927ca606 757@@ -32,6 +32,7 @@ obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
bb20add7 758
d33d7b00 759 obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
d33d7b00
AM
760 obj-$(CONFIG_BLK_DEV_HD) += hd.o
761+obj-$(CONFIG_BLK_DEV_VROOT) += vroot.o
762
763 obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
763640ca 764 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
8931d859
AM
765diff -NurpP --minimal linux-4.4.161/drivers/block/loop.c linux-4.4.161-vs2.3.9.8/drivers/block/loop.c
766--- linux-4.4.161/drivers/block/loop.c 2018-10-20 02:34:26.000000000 +0000
767+++ linux-4.4.161-vs2.3.9.8/drivers/block/loop.c 2018-10-20 04:57:21.000000000 +0000
927ca606 768@@ -76,6 +76,7 @@
a168f21d 769 #include <linux/miscdevice.h>
f6c5ef8b 770 #include <linux/falloc.h>
927ca606 771 #include <linux/uio.h>
d337f35e 772+#include <linux/vs_context.h>
c2e5f7c8 773 #include "loop.h"
f6c5ef8b 774
d337f35e 775 #include <asm/uaccess.h>
8931d859 776@@ -947,6 +948,7 @@ static int loop_set_fd(struct loop_devic
d337f35e
JR
777 lo->lo_blocksize = lo_blocksize;
778 lo->lo_device = bdev;
779 lo->lo_flags = lo_flags;
780+ lo->lo_xid = vx_current_xid();
781 lo->lo_backing_file = file;
927ca606 782 lo->transfer = NULL;
d337f35e 783 lo->ioctl = NULL;
8931d859 784@@ -1067,6 +1069,7 @@ static int loop_clr_fd(struct loop_devic
927ca606 785 lo->lo_offset = 0;
f6c5ef8b 786 lo->lo_sizelimit = 0;
2380c486 787 lo->lo_encrypt_key_size = 0;
2380c486
JR
788+ lo->lo_xid = 0;
789 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
790 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
791 memset(lo->lo_file_name, 0, LO_NAME_SIZE);
8931d859 792@@ -1113,7 +1116,7 @@ loop_set_status(struct loop_device *lo,
2380c486 793
ec22aa5c 794 if (lo->lo_encrypt_key_size &&
537831f9 795 !uid_eq(lo->lo_key_owner, uid) &&
d337f35e
JR
796- !capable(CAP_SYS_ADMIN))
797+ !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP))
798 return -EPERM;
799 if (lo->lo_state != Lo_bound)
800 return -ENXIO;
8931d859 801@@ -1218,7 +1221,8 @@ loop_get_status(struct loop_device *lo,
d337f35e
JR
802 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
803 info->lo_encrypt_type =
804 lo->lo_encryption ? lo->lo_encryption->number : 0;
805- if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
806+ if (lo->lo_encrypt_key_size &&
807+ vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) {
808 info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
809 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
810 lo->lo_encrypt_key_size);
8931d859 811@@ -1579,6 +1583,11 @@ static int lo_open(struct block_device *
a168f21d
AM
812 goto out;
813 }
d337f35e 814
dd5f3080 815+ if (!vx_check(lo->lo_xid, VS_IDENT|VS_HOSTID|VS_ADMIN_P)) {
816+ err = -EACCES;
817+ goto out;
818+ }
d337f35e 819+
927ca606
AM
820 atomic_inc(&lo->lo_refcnt);
821 out:
822 mutex_unlock(&loop_index_mutex);
8931d859
AM
823diff -NurpP --minimal linux-4.4.161/drivers/block/loop.h linux-4.4.161-vs2.3.9.8/drivers/block/loop.h
824--- linux-4.4.161/drivers/block/loop.h 2018-10-20 02:34:26.000000000 +0000
825+++ linux-4.4.161-vs2.3.9.8/drivers/block/loop.h 2018-10-20 04:57:21.000000000 +0000
927ca606 826@@ -43,6 +43,7 @@ struct loop_device {
c2e5f7c8
JR
827 struct loop_func_table *lo_encryption;
828 __u32 lo_init[2];
829 kuid_t lo_key_owner; /* Who set the key */
830+ vxid_t lo_xid;
831 int (*ioctl)(struct loop_device *, int cmd,
832 unsigned long arg);
833
8931d859
AM
834diff -NurpP --minimal linux-4.4.161/drivers/block/vroot.c linux-4.4.161-vs2.3.9.8/drivers/block/vroot.c
835--- linux-4.4.161/drivers/block/vroot.c 1970-01-01 00:00:00.000000000 +0000
836+++ linux-4.4.161-vs2.3.9.8/drivers/block/vroot.c 2018-10-20 04:57:21.000000000 +0000
f19bd705 837@@ -0,0 +1,291 @@
d337f35e
JR
838+/*
839+ * linux/drivers/block/vroot.c
840+ *
d6221c00
AM
841+ * written by Herbert P?tzl, 9/11/2002
842+ * ported to 2.6.10 by Herbert P?tzl, 30/12/2004
d337f35e
JR
843+ *
844+ * based on the loop.c code by Theodore Ts'o.
845+ *
d6221c00 846+ * Copyright (C) 2002-2007 by Herbert P?tzl.
d337f35e
JR
847+ * Redistribution of this file is permitted under the
848+ * GNU General Public License.
849+ *
850+ */
851+
852+#include <linux/module.h>
853+#include <linux/moduleparam.h>
854+#include <linux/file.h>
855+#include <linux/major.h>
856+#include <linux/blkdev.h>
76514441 857+#include <linux/slab.h>
d337f35e
JR
858+
859+#include <linux/vroot.h>
860+#include <linux/vs_context.h>
861+
862+
863+static int max_vroot = 8;
864+
865+static struct vroot_device *vroot_dev;
866+static struct gendisk **disks;
867+
868+
869+static int vroot_set_dev(
870+ struct vroot_device *vr,
d337f35e
JR
871+ struct block_device *bdev,
872+ unsigned int arg)
873+{
874+ struct block_device *real_bdev;
875+ struct file *file;
876+ struct inode *inode;
877+ int error;
878+
879+ error = -EBUSY;
880+ if (vr->vr_state != Vr_unbound)
881+ goto out;
882+
883+ error = -EBADF;
884+ file = fget(arg);
885+ if (!file)
886+ goto out;
887+
888+ error = -EINVAL;
927ca606 889+ inode = file->f_path.dentry->d_inode;
d337f35e
JR
890+
891+
892+ if (S_ISBLK(inode->i_mode)) {
893+ real_bdev = inode->i_bdev;
894+ vr->vr_device = real_bdev;
895+ __iget(real_bdev->bd_inode);
896+ } else
897+ goto out_fput;
898+
899+ vxdprintk(VXD_CBIT(misc, 0),
900+ "vroot[%d]_set_dev: dev=" VXF_DEV,
901+ vr->vr_number, VXD_DEV(real_bdev));
902+
903+ vr->vr_state = Vr_bound;
904+ error = 0;
905+
906+ out_fput:
907+ fput(file);
908+ out:
909+ return error;
910+}
911+
912+static int vroot_clr_dev(
913+ struct vroot_device *vr,
d337f35e
JR
914+ struct block_device *bdev)
915+{
916+ struct block_device *real_bdev;
917+
918+ if (vr->vr_state != Vr_bound)
919+ return -ENXIO;
920+ if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */
921+ return -EBUSY;
922+
923+ real_bdev = vr->vr_device;
924+
925+ vxdprintk(VXD_CBIT(misc, 0),
926+ "vroot[%d]_clr_dev: dev=" VXF_DEV,
927+ vr->vr_number, VXD_DEV(real_bdev));
928+
929+ bdput(real_bdev);
930+ vr->vr_state = Vr_unbound;
931+ vr->vr_device = NULL;
932+ return 0;
933+}
934+
935+
ec22aa5c 936+static int vr_ioctl(struct block_device *bdev, fmode_t mode,
d337f35e
JR
937+ unsigned int cmd, unsigned long arg)
938+{
ec22aa5c 939+ struct vroot_device *vr = bdev->bd_disk->private_data;
d337f35e
JR
940+ int err;
941+
942+ down(&vr->vr_ctl_mutex);
943+ switch (cmd) {
944+ case VROOT_SET_DEV:
ec22aa5c 945+ err = vroot_set_dev(vr, bdev, arg);
d337f35e
JR
946+ break;
947+ case VROOT_CLR_DEV:
ec22aa5c 948+ err = vroot_clr_dev(vr, bdev);
d337f35e
JR
949+ break;
950+ default:
951+ err = -EINVAL;
952+ break;
953+ }
954+ up(&vr->vr_ctl_mutex);
955+ return err;
956+}
957+
ec22aa5c 958+static int vr_open(struct block_device *bdev, fmode_t mode)
d337f35e 959+{
ec22aa5c 960+ struct vroot_device *vr = bdev->bd_disk->private_data;
d337f35e
JR
961+
962+ down(&vr->vr_ctl_mutex);
963+ vr->vr_refcnt++;
964+ up(&vr->vr_ctl_mutex);
965+ return 0;
966+}
967+
09be7631 968+static void vr_release(struct gendisk *disk, fmode_t mode)
d337f35e 969+{
ec22aa5c 970+ struct vroot_device *vr = disk->private_data;
d337f35e
JR
971+
972+ down(&vr->vr_ctl_mutex);
973+ --vr->vr_refcnt;
974+ up(&vr->vr_ctl_mutex);
d337f35e
JR
975+}
976+
977+static struct block_device_operations vr_fops = {
978+ .owner = THIS_MODULE,
979+ .open = vr_open,
980+ .release = vr_release,
981+ .ioctl = vr_ioctl,
982+};
983+
f19bd705 984+static blk_qc_t vroot_make_request(struct request_queue *q, struct bio *bio)
b3b0d4fd
AM
985+{
986+ printk("vroot_make_request %p, %p\n", q, bio);
987+ bio_io_error(bio);
f19bd705 988+ return BLK_QC_T_NONE;
b3b0d4fd
AM
989+}
990+
d337f35e
JR
991+struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
992+{
993+ struct inode *inode = bdev->bd_inode;
994+ struct vroot_device *vr;
995+ struct block_device *real_bdev;
996+ int minor = iminor(inode);
997+
998+ vr = &vroot_dev[minor];
999+ real_bdev = vr->vr_device;
1000+
1001+ vxdprintk(VXD_CBIT(misc, 0),
1002+ "vroot[%d]_get_real_bdev: dev=" VXF_DEV,
1003+ vr->vr_number, VXD_DEV(real_bdev));
1004+
1005+ if (vr->vr_state != Vr_bound)
1006+ return ERR_PTR(-ENXIO);
1007+
1008+ __iget(real_bdev->bd_inode);
1009+ return real_bdev;
1010+}
1011+
b3b0d4fd
AM
1012+
1013+
d337f35e
JR
1014+/*
1015+ * And now the modules code and kernel interface.
1016+ */
1017+
1018+module_param(max_vroot, int, 0);
1019+
1020+MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
1021+MODULE_LICENSE("GPL");
1022+MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
1023+
d6221c00 1024+MODULE_AUTHOR ("Herbert P?tzl");
d337f35e
JR
1025+MODULE_DESCRIPTION ("Virtual Root Device Mapper");
1026+
1027+
1028+int __init vroot_init(void)
1029+{
1030+ int err, i;
1031+
1032+ if (max_vroot < 1 || max_vroot > 256) {
1033+ max_vroot = MAX_VROOT_DEFAULT;
1034+ printk(KERN_WARNING "vroot: invalid max_vroot "
1035+ "(must be between 1 and 256), "
1036+ "using default (%d)\n", max_vroot);
1037+ }
1038+
1039+ if (register_blkdev(VROOT_MAJOR, "vroot"))
1040+ return -EIO;
1041+
1042+ err = -ENOMEM;
1043+ vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
1044+ if (!vroot_dev)
1045+ goto out_mem1;
1046+ memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
1047+
1048+ disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
1049+ if (!disks)
1050+ goto out_mem2;
1051+
1052+ for (i = 0; i < max_vroot; i++) {
1053+ disks[i] = alloc_disk(1);
1054+ if (!disks[i])
1055+ goto out_mem3;
2380c486
JR
1056+ disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
1057+ if (!disks[i]->queue)
1058+ goto out_mem3;
b3b0d4fd 1059+ blk_queue_make_request(disks[i]->queue, vroot_make_request);
d337f35e
JR
1060+ }
1061+
1062+ for (i = 0; i < max_vroot; i++) {
1063+ struct vroot_device *vr = &vroot_dev[i];
1064+ struct gendisk *disk = disks[i];
1065+
1066+ memset(vr, 0, sizeof(*vr));
5a9fc8e8 1067+ sema_init(&vr->vr_ctl_mutex, 1);
d337f35e
JR
1068+ vr->vr_number = i;
1069+ disk->major = VROOT_MAJOR;
1070+ disk->first_minor = i;
1071+ disk->fops = &vr_fops;
1072+ sprintf(disk->disk_name, "vroot%d", i);
1073+ disk->private_data = vr;
1074+ }
1075+
1076+ err = register_vroot_grb(&__vroot_get_real_bdev);
1077+ if (err)
1078+ goto out_mem3;
1079+
1080+ for (i = 0; i < max_vroot; i++)
1081+ add_disk(disks[i]);
1082+ printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
1083+ return 0;
1084+
1085+out_mem3:
1086+ while (i--)
1087+ put_disk(disks[i]);
1088+ kfree(disks);
1089+out_mem2:
1090+ kfree(vroot_dev);
1091+out_mem1:
1092+ unregister_blkdev(VROOT_MAJOR, "vroot");
1093+ printk(KERN_ERR "vroot: ran out of memory\n");
1094+ return err;
1095+}
1096+
1097+void vroot_exit(void)
1098+{
1099+ int i;
1100+
1101+ if (unregister_vroot_grb(&__vroot_get_real_bdev))
1102+ printk(KERN_WARNING "vroot: cannot unregister grb\n");
1103+
1104+ for (i = 0; i < max_vroot; i++) {
1105+ del_gendisk(disks[i]);
1106+ put_disk(disks[i]);
1107+ }
2380c486 1108+ unregister_blkdev(VROOT_MAJOR, "vroot");
d337f35e
JR
1109+
1110+ kfree(disks);
1111+ kfree(vroot_dev);
1112+}
1113+
1114+module_init(vroot_init);
1115+module_exit(vroot_exit);
1116+
1117+#ifndef MODULE
1118+
1119+static int __init max_vroot_setup(char *str)
1120+{
1121+ max_vroot = simple_strtol(str, NULL, 0);
1122+ return 1;
1123+}
1124+
1125+__setup("max_vroot=", max_vroot_setup);
1126+
1127+#endif
1128+
8931d859
AM
1129diff -NurpP --minimal linux-4.4.161/drivers/infiniband/core/addr.c linux-4.4.161-vs2.3.9.8/drivers/infiniband/core/addr.c
1130--- linux-4.4.161/drivers/infiniband/core/addr.c 2018-10-20 02:34:27.000000000 +0000
1131+++ linux-4.4.161-vs2.3.9.8/drivers/infiniband/core/addr.c 2018-10-20 04:57:21.000000000 +0000
1132@@ -299,7 +299,7 @@ static int addr6_resolve(struct sockaddr
5dd10c98 1133
763640ca 1134 if (ipv6_addr_any(&fl6.saddr)) {
927ca606 1135 ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
763640ca
JR
1136- &fl6.daddr, 0, &fl6.saddr);
1137+ &fl6.daddr, 0, &fl6.saddr, NULL);
5dd10c98
AM
1138 if (ret)
1139 goto put;
1140
8931d859
AM
1141diff -NurpP --minimal linux-4.4.161/drivers/md/dm-ioctl.c linux-4.4.161-vs2.3.9.8/drivers/md/dm-ioctl.c
1142--- linux-4.4.161/drivers/md/dm-ioctl.c 2018-10-20 02:34:27.000000000 +0000
1143+++ linux-4.4.161-vs2.3.9.8/drivers/md/dm-ioctl.c 2018-10-20 04:57:21.000000000 +0000
3bac966d
AM
1144@@ -16,6 +16,7 @@
1145 #include <linux/dm-ioctl.h>
1146 #include <linux/hdreg.h>
1147 #include <linux/compat.h>
1148+#include <linux/vs_context.h>
1149
1150 #include <asm/uaccess.h>
1151
c2e5f7c8 1152@@ -114,7 +115,8 @@ static struct hash_cell *__get_name_cell
3bac966d
AM
1153 unsigned int h = hash_str(str);
1154
1155 list_for_each_entry (hc, _name_buckets + h, name_list)
1156- if (!strcmp(hc->name, str)) {
1157+ if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1158+ !strcmp(hc->name, str)) {
1159 dm_get(hc->md);
1160 return hc;
1161 }
c2e5f7c8 1162@@ -128,7 +130,8 @@ static struct hash_cell *__get_uuid_cell
3bac966d
AM
1163 unsigned int h = hash_str(str);
1164
1165 list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
1166- if (!strcmp(hc->uuid, str)) {
1167+ if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1168+ !strcmp(hc->uuid, str)) {
1169 dm_get(hc->md);
1170 return hc;
1171 }
c2e5f7c8 1172@@ -139,13 +142,15 @@ static struct hash_cell *__get_uuid_cell
a168f21d
AM
1173 static struct hash_cell *__get_dev_cell(uint64_t dev)
1174 {
1175 struct mapped_device *md;
1176- struct hash_cell *hc;
1177+ struct hash_cell *hc = NULL;
1178
1179 md = dm_get_md(huge_decode_dev(dev));
1180 if (!md)
1181 return NULL;
1182
1183- hc = dm_get_mdptr(md);
1184+ if (vx_check(dm_get_xid(md), VS_WATCH_P | VS_IDENT))
1185+ hc = dm_get_mdptr(md);
1186+
1187 if (!hc) {
1188 dm_put(md);
1189 return NULL;
c2e5f7c8 1190@@ -467,6 +472,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl
3bac966d
AM
1191
1192 static int remove_all(struct dm_ioctl *param, size_t param_size)
1193 {
1194+ if (!vx_check(0, VS_ADMIN))
1195+ return -EPERM;
1196+
c2e5f7c8 1197 dm_hash_remove_all(true, !!(param->flags & DM_DEFERRED_REMOVE), false);
3bac966d
AM
1198 param->data_size = 0;
1199 return 0;
c2e5f7c8 1200@@ -514,6 +522,8 @@ static int list_devices(struct dm_ioctl
3bac966d
AM
1201 */
1202 for (i = 0; i < NUM_BUCKETS; i++) {
1203 list_for_each_entry (hc, _name_buckets + i, name_list) {
1204+ if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1205+ continue;
1206 needed += sizeof(struct dm_name_list);
1207 needed += strlen(hc->name) + 1;
1208 needed += ALIGN_MASK;
c2e5f7c8 1209@@ -537,6 +547,8 @@ static int list_devices(struct dm_ioctl
3bac966d
AM
1210 */
1211 for (i = 0; i < NUM_BUCKETS; i++) {
1212 list_for_each_entry (hc, _name_buckets + i, name_list) {
1213+ if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1214+ continue;
1215 if (old_nl)
1216 old_nl->next = (uint32_t) ((void *) nl -
1217 (void *) old_nl);
927ca606 1218@@ -1801,8 +1813,8 @@ static int ctl_ioctl(uint command, struc
763640ca 1219 size_t input_param_size;
b00e13aa 1220 struct dm_ioctl param_kernel;
3bac966d
AM
1221
1222- /* only root can play with this */
1223- if (!capable(CAP_SYS_ADMIN))
1224+ /* only root and certain contexts can play with this */
1225+ if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER))
1226 return -EACCES;
1227
1228 if (_IOC_TYPE(command) != DM_IOCTL)
8931d859
AM
1229diff -NurpP --minimal linux-4.4.161/drivers/md/dm.c linux-4.4.161-vs2.3.9.8/drivers/md/dm.c
1230--- linux-4.4.161/drivers/md/dm.c 2018-10-20 02:34:27.000000000 +0000
1231+++ linux-4.4.161-vs2.3.9.8/drivers/md/dm.c 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
1232@@ -25,6 +25,7 @@
1233 #include <linux/elevator.h> /* for rq_end_sector() */
1234 #include <linux/blk-mq.h>
1235 #include <linux/pr.h>
d33d7b00
AM
1236+#include <linux/vs_base.h>
1237
1238 #include <trace/events/block.h>
1239
927ca606 1240@@ -144,6 +145,7 @@ struct mapped_device {
c2e5f7c8 1241 struct mutex suspend_lock;
d33d7b00
AM
1242 atomic_t holders;
1243 atomic_t open_count;
61333608 1244+ vxid_t xid;
d33d7b00 1245
c2e5f7c8
JR
1246 /*
1247 * The current mapping.
927ca606 1248@@ -445,6 +447,7 @@ int dm_deleting_md(struct mapped_device
d33d7b00
AM
1249 static int dm_blk_open(struct block_device *bdev, fmode_t mode)
1250 {
1251 struct mapped_device *md;
1252+ int ret = -ENXIO;
1253
1254 spin_lock(&_minor_lock);
1255
927ca606 1256@@ -453,17 +456,19 @@ static int dm_blk_open(struct block_devi
d33d7b00
AM
1257 goto out;
1258
1259 if (test_bit(DMF_FREEING, &md->flags) ||
1260- dm_deleting_md(md)) {
1261- md = NULL;
1262+ dm_deleting_md(md))
1263+ goto out;
1264+
1265+ ret = -EACCES;
1266+ if (!vx_check(md->xid, VS_IDENT|VS_HOSTID))
1267 goto out;
1268- }
1269
1270 dm_get(md);
1271 atomic_inc(&md->open_count);
d33d7b00
AM
1272+ ret = 0;
1273 out:
1274 spin_unlock(&_minor_lock);
1275-
1276- return md ? 0 : -ENXIO;
1277+ return ret;
1278 }
1279
09be7631 1280 static void dm_blk_close(struct gendisk *disk, fmode_t mode)
927ca606 1281@@ -909,6 +914,14 @@ int dm_set_geometry(struct mapped_device
d33d7b00
AM
1282 return 0;
1283 }
1284
1285+/*
1286+ * Get the xid associated with a dm device
1287+ */
61333608 1288+vxid_t dm_get_xid(struct mapped_device *md)
d33d7b00
AM
1289+{
1290+ return md->xid;
1291+}
1292+
1293 /*-----------------------------------------------------------------
1294 * CRUD START:
1295 * A more elegant soln is in the works that uses the queue
8931d859 1296@@ -2381,6 +2394,7 @@ static struct mapped_device *alloc_dev(i
bb20add7 1297 INIT_LIST_HEAD(&md->table_devices);
d33d7b00
AM
1298 spin_lock_init(&md->uevent_lock);
1299
1300+ md->xid = vx_current_xid();
1301 md->queue = blk_alloc_queue(GFP_KERNEL);
1302 if (!md->queue)
927ca606 1303 goto bad;
8931d859
AM
1304diff -NurpP --minimal linux-4.4.161/drivers/md/dm.h linux-4.4.161-vs2.3.9.8/drivers/md/dm.h
1305--- linux-4.4.161/drivers/md/dm.h 2016-01-10 23:01:32.000000000 +0000
1306+++ linux-4.4.161-vs2.3.9.8/drivers/md/dm.h 2018-10-20 04:57:21.000000000 +0000
927ca606 1307@@ -52,6 +52,8 @@ struct dm_dev_internal {
d33d7b00
AM
1308 struct dm_table;
1309 struct dm_md_mempools;
1310
61333608 1311+vxid_t dm_get_xid(struct mapped_device *md);
d33d7b00
AM
1312+
1313 /*-----------------------------------------------------------------
1314 * Internal table functions.
1315 *---------------------------------------------------------------*/
8931d859
AM
1316diff -NurpP --minimal linux-4.4.161/drivers/net/tun.c linux-4.4.161-vs2.3.9.8/drivers/net/tun.c
1317--- linux-4.4.161/drivers/net/tun.c 2018-10-20 02:34:28.000000000 +0000
1318+++ linux-4.4.161-vs2.3.9.8/drivers/net/tun.c 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 1319@@ -65,6 +65,7 @@
d33d7b00
AM
1320 #include <linux/nsproxy.h>
1321 #include <linux/virtio_net.h>
1322 #include <linux/rcupdate.h>
1323+#include <linux/vs_network.h>
1324 #include <net/net_namespace.h>
1325 #include <net/netns/generic.h>
927ca606
AM
1326 #include <net/rtnetlink.h>
1327@@ -181,6 +182,7 @@ struct tun_struct {
d33d7b00 1328 unsigned int flags;
537831f9
AM
1329 kuid_t owner;
1330 kgid_t group;
61333608 1331+ vnid_t nid;
d33d7b00
AM
1332
1333 struct net_device *dev;
db55b927 1334 netdev_features_t set_features;
927ca606 1335@@ -475,6 +477,7 @@ static inline bool tun_not_capable(struc
b00e13aa
AM
1336 return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
1337 (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
1338 !ns_capable(net->user_ns, CAP_NET_ADMIN);
1339+ /* !cap_raised(current_cap(), CAP_NET_ADMIN) */
1340 }
1341
1342 static void tun_set_real_num_queues(struct tun_struct *tun)
927ca606 1343@@ -1463,6 +1466,7 @@ static void tun_setup(struct net_device
2380c486 1344
537831f9
AM
1345 tun->owner = INVALID_UID;
1346 tun->group = INVALID_GID;
1347+ tun->nid = nx_current_nid();
2380c486 1348
ec22aa5c
AM
1349 dev->ethtool_ops = &tun_ethtool_ops;
1350 dev->destructor = tun_free_netdev;
927ca606 1351@@ -1657,7 +1661,7 @@ static int tun_set_iff(struct net *net,
b00e13aa
AM
1352 int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
1353 MAX_TAP_QUEUES : 1;
1354
1355- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
c2e5f7c8 1356+ if (!nx_ns_capable(net->user_ns, CAP_NET_ADMIN, NXC_TUN_CREATE))
b00e13aa
AM
1357 return -EPERM;
1358 err = security_tun_dev_create();
1359 if (err < 0)
927ca606 1360@@ -2010,6 +2014,16 @@ static long __tun_chr_ioctl(struct file
537831f9 1361 from_kgid(&init_user_ns, tun->group));
2380c486 1362 break;
d337f35e 1363
2380c486
JR
1364+ case TUNSETNID:
1365+ if (!capable(CAP_CONTEXT))
1366+ return -EPERM;
d337f35e 1367+
2380c486 1368+ /* Set nid owner of the device */
61333608 1369+ tun->nid = (vnid_t) arg;
d337f35e 1370+
763640ca 1371+ tun_debug(KERN_INFO, tun, "nid owner set to %u\n", tun->nid);
2380c486 1372+ break;
d337f35e 1373+
2380c486
JR
1374 case TUNSETLINK:
1375 /* Only allow setting the type when the interface is down */
ec22aa5c 1376 if (tun->dev->flags & IFF_UP) {
8931d859
AM
1377diff -NurpP --minimal linux-4.4.161/drivers/scsi/cxgbi/libcxgbi.c linux-4.4.161-vs2.3.9.8/drivers/scsi/cxgbi/libcxgbi.c
1378--- linux-4.4.161/drivers/scsi/cxgbi/libcxgbi.c 2016-01-10 23:01:32.000000000 +0000
1379+++ linux-4.4.161-vs2.3.9.8/drivers/scsi/cxgbi/libcxgbi.c 2018-10-20 04:57:21.000000000 +0000
927ca606 1380@@ -768,7 +768,8 @@ static struct cxgbi_sock *cxgbi_check_ro
bb20add7
AM
1381 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
1382
1383 err = ipv6_dev_get_saddr(&init_net, idev ? idev->dev : NULL,
1384- &daddr6->sin6_addr, 0, &pref_saddr);
1385+ &daddr6->sin6_addr, 0, &pref_saddr,
1386+ NULL);
1387 if (err) {
1388 pr_info("failed to get source address to reach %pI6\n",
1389 &daddr6->sin6_addr);
8931d859
AM
1390diff -NurpP --minimal linux-4.4.161/drivers/tty/sysrq.c linux-4.4.161-vs2.3.9.8/drivers/tty/sysrq.c
1391--- linux-4.4.161/drivers/tty/sysrq.c 2018-10-20 02:34:29.000000000 +0000
1392+++ linux-4.4.161-vs2.3.9.8/drivers/tty/sysrq.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 1393@@ -47,6 +47,7 @@
c2e5f7c8
JR
1394 #include <linux/syscalls.h>
1395 #include <linux/of.h>
bb20add7 1396 #include <linux/rcupdate.h>
ab30d09f
AM
1397+#include <linux/vserver/debug.h>
1398
1399 #include <asm/ptrace.h>
1400 #include <asm/irq_regs.h>
1d9ad342 1401@@ -427,6 +428,21 @@ static struct sysrq_key_op sysrq_unrt_op
ab30d09f
AM
1402 .enable_mask = SYSRQ_ENABLE_RTNICE,
1403 };
1404
1405+
1406+#ifdef CONFIG_VSERVER_DEBUG
1407+static void sysrq_handle_vxinfo(int key)
1408+{
1409+ dump_vx_info_inactive((key == 'x') ? 0 : 1);
1410+}
1411+
1412+static struct sysrq_key_op sysrq_showvxinfo_op = {
1413+ .handler = sysrq_handle_vxinfo,
1414+ .help_msg = "conteXt",
1415+ .action_msg = "Show Context Info",
1416+ .enable_mask = SYSRQ_ENABLE_DUMP,
1417+};
1418+#endif
1419+
1420 /* Key Operations table and lock */
1421 static DEFINE_SPINLOCK(sysrq_key_table_lock);
1422
1d9ad342 1423@@ -483,7 +499,11 @@ static struct sysrq_key_op *sysrq_key_ta
927ca606 1424 /* x: May be registered on mips for TLB dump */
ab30d09f 1425 /* x: May be registered on ppc/powerpc for xmon */
537831f9 1426 /* x: May be registered on sparc64 for global PMU dump */
ab30d09f
AM
1427+#ifdef CONFIG_VSERVER_DEBUG
1428+ &sysrq_showvxinfo_op, /* x */
1429+#else
4bf69007 1430 NULL, /* x */
ab30d09f
AM
1431+#endif
1432 /* y: May be registered on sparc64 for global register dump */
1433 NULL, /* y */
1434 &sysrq_ftrace_dump_op, /* z */
1d9ad342 1435@@ -498,6 +518,8 @@ static int sysrq_key_table_key2index(int
ab30d09f
AM
1436 retval = key - '0';
1437 else if ((key >= 'a') && (key <= 'z'))
1438 retval = key + 10 - 'a';
1439+ else if ((key >= 'A') && (key <= 'Z'))
1440+ retval = key + 10 - 'A';
1441 else
1442 retval = -1;
1443 return retval;
8931d859
AM
1444diff -NurpP --minimal linux-4.4.161/drivers/tty/tty_io.c linux-4.4.161-vs2.3.9.8/drivers/tty/tty_io.c
1445--- linux-4.4.161/drivers/tty/tty_io.c 2018-10-20 02:34:29.000000000 +0000
1446+++ linux-4.4.161-vs2.3.9.8/drivers/tty/tty_io.c 2018-10-20 04:57:21.000000000 +0000
1e8b8f9b 1447@@ -104,6 +104,7 @@
ab30d09f
AM
1448
1449 #include <linux/kmod.h>
1450 #include <linux/nsproxy.h>
1451+#include <linux/vs_pid.h>
1452
1453 #undef TTY_DEBUG_HANGUP
927ca606 1454 #ifdef TTY_DEBUG_HANGUP
8931d859 1455@@ -2291,7 +2292,8 @@ static int tiocsti(struct tty_struct *tt
ab30d09f
AM
1456 char ch, mbz = 0;
1457 struct tty_ldisc *ld;
1458
1459- if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
1460+ if (((current->signal->tty != tty) &&
1461+ !vx_capable(CAP_SYS_ADMIN, VXC_TIOCSTI)))
1462 return -EPERM;
1463 if (get_user(ch, p))
1464 return -EFAULT;
8931d859 1465@@ -2604,6 +2606,7 @@ static int tiocspgrp(struct tty_struct *
ab30d09f
AM
1466 return -ENOTTY;
1467 if (get_user(pgrp_nr, p))
1468 return -EFAULT;
1469+ pgrp_nr = vx_rmap_pid(pgrp_nr);
1470 if (pgrp_nr < 0)
1471 return -EINVAL;
1472 rcu_read_lock();
8931d859
AM
1473diff -NurpP --minimal linux-4.4.161/fs/attr.c linux-4.4.161-vs2.3.9.8/fs/attr.c
1474--- linux-4.4.161/fs/attr.c 2018-10-20 02:34:30.000000000 +0000
1475+++ linux-4.4.161-vs2.3.9.8/fs/attr.c 2018-10-20 04:57:21.000000000 +0000
537831f9 1476@@ -15,6 +15,9 @@
d337f35e 1477 #include <linux/security.h>
f6c5ef8b 1478 #include <linux/evm.h>
537831f9 1479 #include <linux/ima.h>
d337f35e
JR
1480+#include <linux/proc_fs.h>
1481+#include <linux/devpts_fs.h>
2380c486 1482+#include <linux/vs_tag.h>
d337f35e 1483
93de0823
AM
1484 /**
1485 * inode_change_ok - check if attribute changes to an inode are allowed
b00e13aa 1486@@ -77,6 +80,10 @@ int inode_change_ok(const struct inode *
93de0823 1487 return -EPERM;
d337f35e 1488 }
93de0823
AM
1489
1490+ /* check for inode tag permission */
2380c486 1491+ if (dx_permission(inode, MAY_WRITE))
93de0823 1492+ return -EACCES;
2380c486 1493+
93de0823
AM
1494 return 0;
1495 }
1496 EXPORT_SYMBOL(inode_change_ok);
b00e13aa 1497@@ -147,6 +154,8 @@ void setattr_copy(struct inode *inode, c
d337f35e
JR
1498 inode->i_uid = attr->ia_uid;
1499 if (ia_valid & ATTR_GID)
1500 inode->i_gid = attr->ia_gid;
1501+ if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
1502+ inode->i_tag = attr->ia_tag;
1503 if (ia_valid & ATTR_ATIME)
1504 inode->i_atime = timespec_trunc(attr->ia_atime,
1505 inode->i_sb->s_time_gran);
c2e5f7c8 1506@@ -197,7 +206,8 @@ int notify_change(struct dentry * dentry
92598135
AM
1507
1508 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
78865d5b
AM
1509
1510- if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
1511+ if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
1512+ ATTR_TAG | ATTR_TIMES_SET)) {
1513 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1514 return -EPERM;
1515 }
8931d859
AM
1516diff -NurpP --minimal linux-4.4.161/fs/block_dev.c linux-4.4.161-vs2.3.9.8/fs/block_dev.c
1517--- linux-4.4.161/fs/block_dev.c 2018-10-20 02:34:30.000000000 +0000
1518+++ linux-4.4.161-vs2.3.9.8/fs/block_dev.c 2018-10-20 04:57:21.000000000 +0000
927ca606 1519@@ -29,6 +29,7 @@
2380c486 1520 #include <linux/log2.h>
db55b927 1521 #include <linux/cleancache.h>
927ca606 1522 #include <linux/dax.h>
2380c486
JR
1523+#include <linux/vs_device.h>
1524 #include <asm/uaccess.h>
1525 #include "internal.h"
1526
927ca606 1527@@ -645,6 +646,7 @@ struct block_device *bdget(dev_t dev)
2380c486
JR
1528 bdev->bd_invalidated = 0;
1529 inode->i_mode = S_IFBLK;
1530 inode->i_rdev = dev;
1531+ inode->i_mdev = dev;
1532 inode->i_bdev = bdev;
1533 inode->i_data.a_ops = &def_blk_aops;
1534 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
927ca606 1535@@ -691,6 +693,11 @@ EXPORT_SYMBOL(bdput);
2380c486
JR
1536 static struct block_device *bd_acquire(struct inode *inode)
1537 {
1538 struct block_device *bdev;
1539+ dev_t mdev;
1540+
1541+ if (!vs_map_blkdev(inode->i_rdev, &mdev, DATTR_OPEN))
1542+ return NULL;
1543+ inode->i_mdev = mdev;
1544
1545 spin_lock(&bdev_lock);
1546 bdev = inode->i_bdev;
927ca606 1547@@ -701,7 +708,7 @@ static struct block_device *bd_acquire(s
2380c486
JR
1548 }
1549 spin_unlock(&bdev_lock);
1550
1551- bdev = bdget(inode->i_rdev);
1552+ bdev = bdget(mdev);
1553 if (bdev) {
1554 spin_lock(&bdev_lock);
1555 if (!inode->i_bdev) {
8931d859
AM
1556diff -NurpP --minimal linux-4.4.161/fs/btrfs/ctree.h linux-4.4.161-vs2.3.9.8/fs/btrfs/ctree.h
1557--- linux-4.4.161/fs/btrfs/ctree.h 2018-10-20 02:34:30.000000000 +0000
1558+++ linux-4.4.161-vs2.3.9.8/fs/btrfs/ctree.h 2018-10-20 04:57:21.000000000 +0000
927ca606 1559@@ -731,11 +731,14 @@ struct btrfs_inode_item {
e22b5178
AM
1560 /* modification sequence number for NFS */
1561 __le64 sequence;
1562
1563+ __le16 tag;
1564 /*
1565 * a little future expansion, for more than this we can
1566 * just grow the inode item and version it
1567 */
1568- __le64 reserved[4];
1569+ __le16 reserved16;
1570+ __le32 reserved32;
1571+ __le64 reserved[3];
1572 struct btrfs_timespec atime;
1573 struct btrfs_timespec ctime;
1574 struct btrfs_timespec mtime;
927ca606 1575@@ -2189,6 +2192,8 @@ struct btrfs_ioctl_defrag_range_args {
c2e5f7c8 1576 #define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
bb20add7 1577 #define BTRFS_DEFAULT_MAX_INLINE (8192)
e22b5178
AM
1578
1579+#define BTRFS_MOUNT_TAGGED (1 << 24)
1580+
1581 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1582 #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
b00e13aa 1583 #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
927ca606 1584@@ -2528,6 +2533,7 @@ BTRFS_SETGET_FUNCS(inode_block_group, st
e22b5178
AM
1585 BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
1586 BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
1587 BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
1588+BTRFS_SETGET_FUNCS(inode_tag, struct btrfs_inode_item, tag, 16);
1589 BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
1590 BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
1591 BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
927ca606 1592@@ -2575,6 +2581,10 @@ BTRFS_SETGET_FUNCS(extent_flags, struct
78865d5b
AM
1593
1594 BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
1595
1596+#define BTRFS_INODE_IXUNLINK (1 << 24)
1597+#define BTRFS_INODE_BARRIER (1 << 25)
1598+#define BTRFS_INODE_COW (1 << 26)
1599+
1600
1601 BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
1602
927ca606 1603@@ -4022,6 +4032,7 @@ long btrfs_ioctl(struct file *file, unsi
d4263eb0
JR
1604 void btrfs_update_iflags(struct inode *inode);
1605 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
c2e5f7c8 1606 int btrfs_is_empty_uuid(u8 *uuid);
d4263eb0 1607+int btrfs_sync_flags(struct inode *inode, int, int);
763640ca
JR
1608 int btrfs_defrag_file(struct inode *inode, struct file *file,
1609 struct btrfs_ioctl_defrag_range_args *range,
1610 u64 newer_than, unsigned long max_pages);
8931d859
AM
1611diff -NurpP --minimal linux-4.4.161/fs/btrfs/disk-io.c linux-4.4.161-vs2.3.9.8/fs/btrfs/disk-io.c
1612--- linux-4.4.161/fs/btrfs/disk-io.c 2018-10-20 02:34:30.000000000 +0000
1613+++ linux-4.4.161-vs2.3.9.8/fs/btrfs/disk-io.c 2018-10-20 05:50:20.000000000 +0000
1614@@ -2737,6 +2737,9 @@ int open_ctree(struct super_block *sb,
763640ca 1615 goto fail_alloc;
e22b5178
AM
1616 }
1617
1618+ if (btrfs_test_opt(tree_root, TAGGED))
1619+ sb->s_flags |= MS_TAGGED;
1620+
1621 features = btrfs_super_incompat_flags(disk_super) &
1622 ~BTRFS_FEATURE_INCOMPAT_SUPP;
1623 if (features) {
8931d859
AM
1624diff -NurpP --minimal linux-4.4.161/fs/btrfs/inode.c linux-4.4.161-vs2.3.9.8/fs/btrfs/inode.c
1625--- linux-4.4.161/fs/btrfs/inode.c 2018-10-20 02:34:30.000000000 +0000
1626+++ linux-4.4.161-vs2.3.9.8/fs/btrfs/inode.c 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 1627@@ -43,6 +43,7 @@
b00e13aa 1628 #include <linux/blkdev.h>
c2e5f7c8 1629 #include <linux/posix_acl_xattr.h>
927ca606 1630 #include <linux/uio.h>
e22b5178 1631+#include <linux/vs_tag.h>
e22b5178
AM
1632 #include "ctree.h"
1633 #include "disk-io.h"
c2e5f7c8 1634 #include "transaction.h"
8931d859 1635@@ -3649,6 +3650,9 @@ static void btrfs_read_locked_inode(stru
bb20add7 1636 unsigned long ptr;
e22b5178 1637 int maybe_acls;
e22b5178 1638 u32 rdev;
a4a22af8
AM
1639+ kuid_t kuid;
1640+ kgid_t kgid;
1641+ ktag_t ktag;
e22b5178 1642 int ret;
763640ca 1643 bool filled = false;
bb20add7 1644 int first_xattr_slot;
8931d859 1645@@ -3676,8 +3680,14 @@ static void btrfs_read_locked_inode(stru
a168f21d 1646 struct btrfs_inode_item);
e22b5178 1647 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
f6c5ef8b 1648 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
537831f9
AM
1649- i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
1650- i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
e22b5178 1651+
a4a22af8
AM
1652+ kuid = make_kuid(&init_user_ns, btrfs_inode_uid(leaf, inode_item));
1653+ kgid = make_kgid(&init_user_ns, btrfs_inode_gid(leaf, inode_item));
1654+ ktag = make_ktag(&init_user_ns, btrfs_inode_tag(leaf, inode_item));
1655+
1656+ inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
1657+ inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
1658+ inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, ktag);
e22b5178
AM
1659 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
1660
927ca606 1661 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
8931d859 1662@@ -3833,11 +3843,18 @@ static void fill_inode_item(struct btrfs
e22b5178
AM
1663 struct inode *inode)
1664 {
b00e13aa 1665 struct btrfs_map_token token;
a4a22af8
AM
1666+ uid_t uid = from_kuid(&init_user_ns,
1667+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
1668+ gid_t gid = from_kgid(&init_user_ns,
1669+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
b00e13aa
AM
1670
1671 btrfs_init_map_token(&token);
1672
1673- btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
1674- btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
1675+ btrfs_set_token_inode_uid(leaf, item, uid, &token);
1676+ btrfs_set_token_inode_gid(leaf, item, gid, &token);
e22b5178 1677+#ifdef CONFIG_TAGGING_INTERN
b00e13aa 1678+ btrfs_set_token_inode_tag(leaf, item, i_tag_read(inode), &token);
e22b5178 1679+#endif
b00e13aa
AM
1680 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
1681 &token);
1682 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
8931d859 1683@@ -10092,6 +10109,7 @@ static const struct inode_operations btr
d4263eb0
JR
1684 .listxattr = btrfs_listxattr,
1685 .removexattr = btrfs_removexattr,
d4263eb0
JR
1686 .permission = btrfs_permission,
1687+ .sync_flags = btrfs_sync_flags,
a168f21d 1688 .get_acl = btrfs_get_acl,
f15949f2 1689 .set_acl = btrfs_set_acl,
c2e5f7c8 1690 .update_time = btrfs_update_time,
8931d859 1691@@ -10100,6 +10118,7 @@ static const struct inode_operations btr
7e46296a 1692 static const struct inode_operations btrfs_dir_ro_inode_operations = {
d4263eb0 1693 .lookup = btrfs_lookup,
d4263eb0 1694 .permission = btrfs_permission,
d4263eb0 1695+ .sync_flags = btrfs_sync_flags,
a168f21d 1696 .get_acl = btrfs_get_acl,
f15949f2 1697 .set_acl = btrfs_set_acl,
c2e5f7c8 1698 .update_time = btrfs_update_time,
8931d859 1699@@ -10170,6 +10189,7 @@ static const struct inode_operations btr
c2e5f7c8
JR
1700 .removexattr = btrfs_removexattr,
1701 .permission = btrfs_permission,
1702 .fiemap = btrfs_fiemap,
1703+ .sync_flags = btrfs_sync_flags,
1704 .get_acl = btrfs_get_acl,
bb20add7 1705 .set_acl = btrfs_set_acl,
c2e5f7c8 1706 .update_time = btrfs_update_time,
8931d859
AM
1707diff -NurpP --minimal linux-4.4.161/fs/btrfs/ioctl.c linux-4.4.161-vs2.3.9.8/fs/btrfs/ioctl.c
1708--- linux-4.4.161/fs/btrfs/ioctl.c 2018-10-20 02:34:30.000000000 +0000
1709+++ linux-4.4.161-vs2.3.9.8/fs/btrfs/ioctl.c 2018-10-20 04:57:21.000000000 +0000
927ca606 1710@@ -108,10 +108,13 @@ static unsigned int btrfs_flags_to_ioctl
d4263eb0
JR
1711 {
1712 unsigned int iflags = 0;
1713
1714- if (flags & BTRFS_INODE_SYNC)
1715- iflags |= FS_SYNC_FL;
1716 if (flags & BTRFS_INODE_IMMUTABLE)
1717 iflags |= FS_IMMUTABLE_FL;
1718+ if (flags & BTRFS_INODE_IXUNLINK)
1719+ iflags |= FS_IXUNLINK_FL;
1720+
1721+ if (flags & BTRFS_INODE_SYNC)
1722+ iflags |= FS_SYNC_FL;
1723 if (flags & BTRFS_INODE_APPEND)
1724 iflags |= FS_APPEND_FL;
1725 if (flags & BTRFS_INODE_NODUMP)
927ca606 1726@@ -128,34 +131,84 @@ static unsigned int btrfs_flags_to_ioctl
763640ca
JR
1727 else if (flags & BTRFS_INODE_NOCOMPRESS)
1728 iflags |= FS_NOCOMP_FL;
d4263eb0
JR
1729
1730+ if (flags & BTRFS_INODE_BARRIER)
1731+ iflags |= FS_BARRIER_FL;
1732+ if (flags & BTRFS_INODE_COW)
1733+ iflags |= FS_COW_FL;
1734 return iflags;
1735 }
1736
1737 /*
1738- * Update inode->i_flags based on the btrfs internal flags.
1739+ * Update inode->i_(v)flags based on the btrfs internal flags.
1740 */
1741 void btrfs_update_iflags(struct inode *inode)
1742 {
1743 struct btrfs_inode *ip = BTRFS_I(inode);
bb20add7 1744 unsigned int new_fl = 0;
d4263eb0
JR
1745
1746- if (ip->flags & BTRFS_INODE_SYNC)
bb20add7 1747- new_fl |= S_SYNC;
d4263eb0 1748 if (ip->flags & BTRFS_INODE_IMMUTABLE)
bb20add7 1749 new_fl |= S_IMMUTABLE;
d4263eb0 1750+ if (ip->flags & BTRFS_INODE_IXUNLINK)
bb20add7 1751+ new_fl |= S_IXUNLINK;
d4263eb0
JR
1752+
1753+ if (ip->flags & BTRFS_INODE_SYNC)
bb20add7 1754+ new_fl |= S_SYNC;
d4263eb0 1755 if (ip->flags & BTRFS_INODE_APPEND)
bb20add7 1756 new_fl |= S_APPEND;
d4263eb0 1757 if (ip->flags & BTRFS_INODE_NOATIME)
bb20add7 1758 new_fl |= S_NOATIME;
d4263eb0 1759 if (ip->flags & BTRFS_INODE_DIRSYNC)
bb20add7
AM
1760 new_fl |= S_DIRSYNC;
1761-
1762 set_mask_bits(&inode->i_flags,
1763- S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
1764+ S_SYNC | S_APPEND | S_IMMUTABLE | S_IXUNLINK | S_NOATIME | S_DIRSYNC,
1765 new_fl);
d4263eb0 1766+
bb20add7 1767+ new_fl = 0;
d4263eb0 1768+ if (ip->flags & BTRFS_INODE_BARRIER)
bb20add7 1769+ new_fl |= V_BARRIER;
d4263eb0 1770+ if (ip->flags & BTRFS_INODE_COW)
bb20add7 1771+ new_fl |= V_COW;
d4263eb0 1772+
bb20add7
AM
1773+ set_mask_bits(&inode->i_vflags,
1774+ V_BARRIER | V_COW, new_fl);
1775 }
1776
1777 /*
d4263eb0
JR
1778+ * Update btrfs internal flags from inode->i_(v)flags.
1779+ */
1780+void btrfs_update_flags(struct inode *inode)
1781+{
1782+ struct btrfs_inode *ip = BTRFS_I(inode);
1783+
1784+ unsigned int flags = inode->i_flags;
1785+ unsigned int vflags = inode->i_vflags;
1786+
1787+ ip->flags &= ~(BTRFS_INODE_SYNC | BTRFS_INODE_APPEND |
1788+ BTRFS_INODE_IMMUTABLE | BTRFS_INODE_IXUNLINK |
1789+ BTRFS_INODE_NOATIME | BTRFS_INODE_DIRSYNC |
1790+ BTRFS_INODE_BARRIER | BTRFS_INODE_COW);
1791+
1792+ if (flags & S_IMMUTABLE)
1793+ ip->flags |= BTRFS_INODE_IMMUTABLE;
1794+ if (flags & S_IXUNLINK)
1795+ ip->flags |= BTRFS_INODE_IXUNLINK;
1796+
1797+ if (flags & S_SYNC)
1798+ ip->flags |= BTRFS_INODE_SYNC;
1799+ if (flags & S_APPEND)
1800+ ip->flags |= BTRFS_INODE_APPEND;
1801+ if (flags & S_NOATIME)
1802+ ip->flags |= BTRFS_INODE_NOATIME;
1803+ if (flags & S_DIRSYNC)
1804+ ip->flags |= BTRFS_INODE_DIRSYNC;
1805+
1806+ if (vflags & V_BARRIER)
1807+ ip->flags |= BTRFS_INODE_BARRIER;
1808+ if (vflags & V_COW)
1809+ ip->flags |= BTRFS_INODE_COW;
bb20add7
AM
1810+ }
1811+
1812+/*
1813 * Inherit flags from the parent inode.
1814 *
1815 * Currently only the compression flags and the cow flags are inherited.
927ca606 1816@@ -168,6 +221,7 @@ void btrfs_inherit_iflags(struct inode *
f6c5ef8b 1817 return;
d4263eb0 1818
f6c5ef8b
AM
1819 flags = BTRFS_I(dir)->flags;
1820+ flags &= ~BTRFS_INODE_BARRIER;
d4263eb0 1821
f6c5ef8b
AM
1822 if (flags & BTRFS_INODE_NOCOMPRESS) {
1823 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
927ca606 1824@@ -186,6 +240,30 @@ void btrfs_inherit_iflags(struct inode *
d4263eb0
JR
1825 btrfs_update_iflags(inode);
1826 }
1827
1828+int btrfs_sync_flags(struct inode *inode, int flags, int vflags)
1829+{
1830+ struct btrfs_inode *ip = BTRFS_I(inode);
1831+ struct btrfs_root *root = ip->root;
1832+ struct btrfs_trans_handle *trans;
1833+ int ret;
1834+
763640ca 1835+ trans = btrfs_join_transaction(root);
d4263eb0
JR
1836+ BUG_ON(!trans);
1837+
d4263eb0
JR
1838+ inode->i_flags = flags;
1839+ inode->i_vflags = vflags;
1840+ btrfs_update_flags(inode);
e22b5178
AM
1841+
1842+ ret = btrfs_update_inode(trans, root, inode);
1843+ BUG_ON(ret);
1844+
1845+ btrfs_update_iflags(inode);
d4263eb0
JR
1846+ inode->i_ctime = CURRENT_TIME;
1847+ btrfs_end_transaction(trans, root);
1848+
1849+ return 0;
1850+}
1851+
1852 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
1853 {
b00e13aa 1854 struct btrfs_inode *ip = BTRFS_I(file_inode(file));
927ca606 1855@@ -248,21 +326,27 @@ static int btrfs_ioctl_setflags(struct f
d4263eb0
JR
1856
1857 flags = btrfs_mask_flags(inode->i_mode, flags);
1858 oldflags = btrfs_flags_to_ioctl(ip->flags);
1859- if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
1860+ if ((flags ^ oldflags) & (FS_APPEND_FL |
1861+ FS_IMMUTABLE_FL | FS_IXUNLINK_FL)) {
1862 if (!capable(CAP_LINUX_IMMUTABLE)) {
1863 ret = -EPERM;
1864 goto out_unlock;
92598135
AM
1865 }
1866 }
d4263eb0
JR
1867
1868- if (flags & FS_SYNC_FL)
1869- ip->flags |= BTRFS_INODE_SYNC;
1870- else
1871- ip->flags &= ~BTRFS_INODE_SYNC;
1872 if (flags & FS_IMMUTABLE_FL)
1873 ip->flags |= BTRFS_INODE_IMMUTABLE;
1874 else
1875 ip->flags &= ~BTRFS_INODE_IMMUTABLE;
1876+ if (flags & FS_IXUNLINK_FL)
1877+ ip->flags |= BTRFS_INODE_IXUNLINK;
1878+ else
1879+ ip->flags &= ~BTRFS_INODE_IXUNLINK;
1880+
1881+ if (flags & FS_SYNC_FL)
1882+ ip->flags |= BTRFS_INODE_SYNC;
1883+ else
1884+ ip->flags &= ~BTRFS_INODE_SYNC;
1885 if (flags & FS_APPEND_FL)
1886 ip->flags |= BTRFS_INODE_APPEND;
1887 else
8931d859
AM
1888diff -NurpP --minimal linux-4.4.161/fs/btrfs/super.c linux-4.4.161-vs2.3.9.8/fs/btrfs/super.c
1889--- linux-4.4.161/fs/btrfs/super.c 2018-10-20 02:34:30.000000000 +0000
1890+++ linux-4.4.161-vs2.3.9.8/fs/btrfs/super.c 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
1891@@ -306,7 +306,7 @@ enum {
1892 #ifdef CONFIG_BTRFS_DEBUG
1893 Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
1894 #endif
db55b927 1895- Opt_err,
f6c5ef8b 1896+ Opt_tag, Opt_notag, Opt_tagid, Opt_err,
e22b5178
AM
1897 };
1898
1899 static match_table_t tokens = {
927ca606
AM
1900@@ -363,6 +363,9 @@ static match_table_t tokens = {
1901 {Opt_fragment_metadata, "fragment=metadata"},
1902 {Opt_fragment_all, "fragment=all"},
1903 #endif
e22b5178
AM
1904+ {Opt_tag, "tag"},
1905+ {Opt_notag, "notag"},
1906+ {Opt_tagid, "tagid=%u"},
1907 {Opt_err, NULL},
1908 };
1909
927ca606
AM
1910@@ -745,6 +748,22 @@ int btrfs_parse_options(struct btrfs_roo
1911 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
1e8b8f9b 1912 break;
927ca606 1913 #endif
e22b5178
AM
1914+#ifndef CONFIG_TAGGING_NONE
1915+ case Opt_tag:
1916+ printk(KERN_INFO "btrfs: use tagging\n");
1917+ btrfs_set_opt(info->mount_opt, TAGGED);
1918+ break;
1919+ case Opt_notag:
1920+ printk(KERN_INFO "btrfs: disabled tagging\n");
1921+ btrfs_clear_opt(info->mount_opt, TAGGED);
1922+ break;
1923+#endif
1924+#ifdef CONFIG_PROPAGATE
1925+ case Opt_tagid:
1926+ /* use args[0] */
1927+ btrfs_set_opt(info->mount_opt, TAGGED);
1928+ break;
1929+#endif
2bf5ad28 1930 case Opt_err:
bb20add7
AM
1931 btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
1932 ret = -EINVAL;
927ca606 1933@@ -1647,6 +1666,12 @@ static int btrfs_remount(struct super_bl
42bc425c
AM
1934 btrfs_resize_thread_pool(fs_info,
1935 fs_info->thread_pool_size, old_thread_pool_size);
e22b5178
AM
1936
1937+ if (btrfs_test_opt(root, TAGGED) && !(sb->s_flags & MS_TAGGED)) {
1938+ printk("btrfs: %s: tagging not permitted on remount.\n",
1939+ sb->s_id);
1940+ return -EINVAL;
1941+ }
1942+
1943 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
b00e13aa 1944 goto out;
e22b5178 1945
8931d859
AM
1946diff -NurpP --minimal linux-4.4.161/fs/char_dev.c linux-4.4.161-vs2.3.9.8/fs/char_dev.c
1947--- linux-4.4.161/fs/char_dev.c 2016-01-10 23:01:32.000000000 +0000
1948+++ linux-4.4.161-vs2.3.9.8/fs/char_dev.c 2018-10-20 04:57:21.000000000 +0000
4744a4b1 1949@@ -21,6 +21,8 @@
2380c486
JR
1950 #include <linux/mutex.h>
1951 #include <linux/backing-dev.h>
7942c842 1952 #include <linux/tty.h>
2380c486
JR
1953+#include <linux/vs_context.h>
1954+#include <linux/vs_device.h>
1955
ec22aa5c
AM
1956 #include "internal.h"
1957
927ca606 1958@@ -350,14 +352,21 @@ static int chrdev_open(struct inode *ino
2380c486
JR
1959 struct cdev *p;
1960 struct cdev *new = NULL;
1961 int ret = 0;
1962+ dev_t mdev;
1963+
1964+ if (!vs_map_chrdev(inode->i_rdev, &mdev, DATTR_OPEN))
1965+ return -EPERM;
1966+ inode->i_mdev = mdev;
1967
1968 spin_lock(&cdev_lock);
1969 p = inode->i_cdev;
1970 if (!p) {
1971 struct kobject *kobj;
1972 int idx;
1973+
1974 spin_unlock(&cdev_lock);
1975- kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
1976+
1977+ kobj = kobj_lookup(cdev_map, mdev, &idx);
1978 if (!kobj)
1979 return -ENXIO;
1980 new = container_of(kobj, struct cdev, kobj);
8931d859
AM
1981diff -NurpP --minimal linux-4.4.161/fs/dcache.c linux-4.4.161-vs2.3.9.8/fs/dcache.c
1982--- linux-4.4.161/fs/dcache.c 2018-10-20 02:34:30.000000000 +0000
1983+++ linux-4.4.161-vs2.3.9.8/fs/dcache.c 2018-10-20 05:50:20.000000000 +0000
927ca606 1984@@ -39,6 +39,7 @@
f6c5ef8b 1985 #include <linux/ratelimit.h>
c2e5f7c8 1986 #include <linux/list_lru.h>
927ca606 1987 #include <linux/kasan.h>
d337f35e 1988+#include <linux/vs_limit.h>
927ca606 1989
d337f35e 1990 #include "internal.h"
db55b927 1991 #include "mount.h"
8931d859 1992@@ -683,6 +684,7 @@ static inline bool fast_dput(struct dent
927ca606
AM
1993 spin_lock(&dentry->d_lock);
1994 if (dentry->d_lockref.count > 1) {
1995 dentry->d_lockref.count--;
1996+ vx_dentry_dec(dentry);
1997 spin_unlock(&dentry->d_lock);
1998 return 1;
1999 }
8931d859 2000@@ -812,6 +814,7 @@ repeat:
927ca606
AM
2001 dentry_lru_add(dentry);
2002
2003 dentry->d_lockref.count--;
2004+ vx_dentry_dec(dentry);
2005 spin_unlock(&dentry->d_lock);
2006 return;
d337f35e 2007
8931d859 2008@@ -829,6 +832,7 @@ EXPORT_SYMBOL(dput);
d33d7b00 2009 static inline void __dget_dlock(struct dentry *dentry)
2380c486 2010 {
c2e5f7c8 2011 dentry->d_lockref.count++;
2380c486 2012+ vx_dentry_inc(dentry);
d337f35e 2013 }
2380c486 2014
d33d7b00 2015 static inline void __dget(struct dentry *dentry)
8931d859 2016@@ -841,6 +845,8 @@ struct dentry *dget_parent(struct dentry
bb20add7
AM
2017 int gotref;
2018 struct dentry *ret;
2019
2020+ vx_dentry_dec(dentry);
2021+
2022 /*
2023 * Do optimistic parent lookup without any
2024 * locking.
8931d859 2025@@ -871,6 +877,7 @@ repeat:
927ca606
AM
2026 rcu_read_unlock();
2027 BUG_ON(!ret->d_lockref.count);
2028 ret->d_lockref.count++;
2029+ vx_dentry_inc(ret);
2030 spin_unlock(&ret->d_lock);
2031 return ret;
2032 }
8931d859 2033@@ -1025,6 +1032,7 @@ static void shrink_dentry_list(struct li
927ca606
AM
2034 parent = lock_parent(dentry);
2035 if (dentry->d_lockref.count != 1) {
2036 dentry->d_lockref.count--;
2037+ vx_dentry_dec(dentry);
2038 spin_unlock(&dentry->d_lock);
2039 if (parent)
2040 spin_unlock(&parent->d_lock);
8931d859 2041@@ -1587,6 +1595,9 @@ struct dentry *__d_alloc(struct super_bl
d337f35e
JR
2042 struct dentry *dentry;
2043 char *dname;
2044
2045+ if (!vx_dentry_avail(1))
2046+ return NULL;
2047+
2380c486 2048 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
d337f35e
JR
2049 if (!dentry)
2050 return NULL;
8931d859 2051@@ -1625,6 +1636,7 @@ struct dentry *__d_alloc(struct super_bl
d337f35e 2052
c2e5f7c8 2053 dentry->d_lockref.count = 1;
763640ca 2054 dentry->d_flags = 0;
ab30d09f 2055+ vx_dentry_inc(dentry);
ab30d09f 2056 spin_lock_init(&dentry->d_lock);
d33d7b00 2057 seqcount_init(&dentry->d_seq);
763640ca 2058 dentry->d_inode = NULL;
8931d859 2059@@ -2385,6 +2397,7 @@ struct dentry *__d_lookup(const struct d
d337f35e 2060 }
2380c486 2061
c2e5f7c8 2062 dentry->d_lockref.count++;
2380c486
JR
2063+ vx_dentry_inc(dentry);
2064 found = dentry;
d337f35e 2065 spin_unlock(&dentry->d_lock);
2380c486 2066 break;
8931d859 2067@@ -3401,6 +3414,7 @@ static enum d_walk_ret d_genocide_kill(v
927ca606
AM
2068 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2069 dentry->d_flags |= DCACHE_GENOCIDE;
2070 dentry->d_lockref.count--;
2071+ vx_dentry_dec(dentry);
2072 }
2073 }
2074 return D_WALK_CONTINUE;
8931d859
AM
2075diff -NurpP --minimal linux-4.4.161/fs/devpts/inode.c linux-4.4.161-vs2.3.9.8/fs/devpts/inode.c
2076--- linux-4.4.161/fs/devpts/inode.c 2018-10-20 02:34:30.000000000 +0000
2077+++ linux-4.4.161-vs2.3.9.8/fs/devpts/inode.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 2078@@ -27,6 +27,7 @@
d337f35e 2079 #include <linux/parser.h>
2380c486
JR
2080 #include <linux/fsnotify.h>
2081 #include <linux/seq_file.h>
d337f35e
JR
2082+#include <linux/vs_base.h>
2083
2380c486 2084 #define DEVPTS_DEFAULT_MODE 0600
ec22aa5c 2085 /*
bb20add7 2086@@ -38,6 +39,21 @@
ec22aa5c
AM
2087 #define DEVPTS_DEFAULT_PTMX_MODE 0000
2088 #define PTMX_MINOR 2
2380c486 2089
a168f21d 2090+static int devpts_permission(struct inode *inode, int mask)
d337f35e
JR
2091+{
2092+ int ret = -EACCES;
2093+
2094+ /* devpts is xid tagged */
61333608 2095+ if (vx_check((vxid_t)i_tag_read(inode), VS_WATCH_P | VS_IDENT))
a168f21d 2096+ ret = generic_permission(inode, mask);
d337f35e
JR
2097+ return ret;
2098+}
2099+
2100+static struct inode_operations devpts_file_inode_operations = {
2101+ .permission = devpts_permission,
2102+};
2380c486 2103+
1e8b8f9b
AM
2104+
2105 /*
2106 * sysctl support for setting limits on the number of Unix98 ptys allocated.
2107 * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
927ca606 2108@@ -353,6 +369,34 @@ static int devpts_show_options(struct se
d337f35e
JR
2109 return 0;
2110 }
2111
2112+static int devpts_filter(struct dentry *de)
2113+{
61333608 2114+ vxid_t xid = 0;
b3b0d4fd 2115+
d337f35e 2116+ /* devpts is xid tagged */
b3b0d4fd 2117+ if (de && de->d_inode)
61333608 2118+ xid = (vxid_t)i_tag_read(de->d_inode);
b3b0d4fd
AM
2119+#ifdef CONFIG_VSERVER_WARN_DEVPTS
2120+ else
2121+ vxwprintk_task(1, "devpts " VS_Q("%.*s") " without inode.",
2122+ de->d_name.len, de->d_name.name);
2123+#endif
2124+ return vx_check(xid, VS_WATCH_P | VS_IDENT);
d337f35e
JR
2125+}
2126+
c2e5f7c8 2127+static int devpts_readdir(struct file * filp, struct dir_context *ctx)
d337f35e 2128+{
c2e5f7c8 2129+ return dcache_readdir_filter(filp, ctx, devpts_filter);
d337f35e
JR
2130+}
2131+
2132+static struct file_operations devpts_dir_operations = {
2133+ .open = dcache_dir_open,
2134+ .release = dcache_dir_close,
2135+ .llseek = dcache_dir_lseek,
2136+ .read = generic_read_dir,
c2e5f7c8 2137+ .iterate = devpts_readdir,
d337f35e
JR
2138+};
2139+
2380c486 2140 static const struct super_operations devpts_sops = {
d337f35e
JR
2141 .statfs = simple_statfs,
2142 .remount_fs = devpts_remount,
927ca606 2143@@ -397,8 +441,10 @@ devpts_fill_super(struct super_block *s,
ec22aa5c 2144 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
d337f35e
JR
2145 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
2146 inode->i_op = &simple_dir_inode_operations;
2147- inode->i_fop = &simple_dir_operations;
2148+ inode->i_fop = &devpts_dir_operations;
f6c5ef8b 2149 set_nlink(inode, 2);
d337f35e 2150+ /* devpts is xid tagged */
61333608 2151+ i_tag_write(inode, (vtag_t)vx_current_xid());
d337f35e 2152
1e8b8f9b 2153 s->s_root = d_make_root(inode);
d337f35e 2154 if (s->s_root)
927ca606 2155@@ -630,6 +676,9 @@ struct inode *devpts_pty_new(struct pts_
ec22aa5c 2156 inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
d337f35e 2157 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
ec22aa5c 2158 init_special_inode(inode, S_IFCHR|opts->mode, device);
d337f35e 2159+ /* devpts is xid tagged */
61333608 2160+ i_tag_write(inode, (vtag_t)vx_current_xid());
d337f35e 2161+ inode->i_op = &devpts_file_inode_operations;
b00e13aa 2162 inode->i_private = priv;
d337f35e 2163
b00e13aa 2164 sprintf(s, "%d", index);
8931d859
AM
2165diff -NurpP --minimal linux-4.4.161/fs/ext2/balloc.c linux-4.4.161-vs2.3.9.8/fs/ext2/balloc.c
2166--- linux-4.4.161/fs/ext2/balloc.c 2016-01-10 23:01:32.000000000 +0000
2167+++ linux-4.4.161-vs2.3.9.8/fs/ext2/balloc.c 2018-10-20 04:57:21.000000000 +0000
b00e13aa 2168@@ -693,7 +693,6 @@ ext2_try_to_allocate(struct super_block
2380c486
JR
2169 start = 0;
2170 end = EXT2_BLOCKS_PER_GROUP(sb);
d337f35e 2171 }
2380c486
JR
2172-
2173 BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
2174
2175 repeat:
8931d859
AM
2176diff -NurpP --minimal linux-4.4.161/fs/ext2/ext2.h linux-4.4.161-vs2.3.9.8/fs/ext2/ext2.h
2177--- linux-4.4.161/fs/ext2/ext2.h 2016-01-10 23:01:32.000000000 +0000
2178+++ linux-4.4.161-vs2.3.9.8/fs/ext2/ext2.h 2018-10-20 04:57:21.000000000 +0000
1e8b8f9b
AM
2179@@ -244,8 +244,12 @@ struct ext2_group_desc
2180 #define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
2181 #define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
2182 #define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
2183+#define EXT2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
2184 #define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
2185
2186+#define EXT2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
2187+#define EXT2_COW_FL FS_COW_FL /* Copy on Write marker */
2188+
2189 #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
2190 #define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
2191
2192@@ -329,7 +333,8 @@ struct ext2_inode {
2193 __u16 i_pad1;
2194 __le16 l_i_uid_high; /* these 2 fields */
2195 __le16 l_i_gid_high; /* were reserved2[0] */
2196- __u32 l_i_reserved2;
2197+ __le16 l_i_tag; /* Context Tag */
2198+ __u16 l_i_reserved2;
2199 } linux2;
2200 struct {
2201 __u8 h_i_frag; /* Fragment number */
2202@@ -357,6 +362,7 @@ struct ext2_inode {
2203 #define i_gid_low i_gid
2204 #define i_uid_high osd2.linux2.l_i_uid_high
2205 #define i_gid_high osd2.linux2.l_i_gid_high
2206+#define i_raw_tag osd2.linux2.l_i_tag
2207 #define i_reserved2 osd2.linux2.l_i_reserved2
2208
2209 /*
927ca606
AM
2210@@ -389,6 +395,7 @@ struct ext2_inode {
2211 #else
2212 #define EXT2_MOUNT_DAX 0
2213 #endif
2214+#define EXT2_MOUNT_TAGGED 0x200000 /* Enable Context Tags */
1e8b8f9b
AM
2215
2216
2217 #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
927ca606 2218@@ -776,6 +783,7 @@ extern void ext2_set_inode_flags(struct
93de0823
AM
2219 extern void ext2_get_inode_flags(struct ext2_inode_info *);
2220 extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2221 u64 start, u64 len);
d4263eb0
JR
2222+extern int ext2_sync_flags(struct inode *, int, int);
2223
2224 /* ioctl.c */
2225 extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
8931d859
AM
2226diff -NurpP --minimal linux-4.4.161/fs/ext2/file.c linux-4.4.161-vs2.3.9.8/fs/ext2/file.c
2227--- linux-4.4.161/fs/ext2/file.c 2016-01-10 23:01:32.000000000 +0000
2228+++ linux-4.4.161-vs2.3.9.8/fs/ext2/file.c 2018-10-20 04:57:21.000000000 +0000
927ca606 2229@@ -202,4 +202,5 @@ const struct inode_operations ext2_file_
a168f21d 2230 .get_acl = ext2_get_acl,
bb20add7 2231 .set_acl = ext2_set_acl,
ec22aa5c 2232 .fiemap = ext2_fiemap,
d337f35e
JR
2233+ .sync_flags = ext2_sync_flags,
2234 };
8931d859
AM
2235diff -NurpP --minimal linux-4.4.161/fs/ext2/ialloc.c linux-4.4.161-vs2.3.9.8/fs/ext2/ialloc.c
2236--- linux-4.4.161/fs/ext2/ialloc.c 2016-01-10 23:01:32.000000000 +0000
2237+++ linux-4.4.161-vs2.3.9.8/fs/ext2/ialloc.c 2018-10-20 04:57:21.000000000 +0000
e22b5178
AM
2238@@ -17,6 +17,7 @@
2239 #include <linux/backing-dev.h>
2240 #include <linux/buffer_head.h>
2241 #include <linux/random.h>
2242+#include <linux/vs_tag.h>
2243 #include "ext2.h"
2244 #include "xattr.h"
2245 #include "acl.h"
a4a22af8 2246@@ -546,6 +547,7 @@ got:
76514441
AM
2247 inode->i_mode = mode;
2248 inode->i_uid = current_fsuid();
2249 inode->i_gid = dir->i_gid;
a4a22af8 2250+ i_tag_write(inode, dx_current_fstag(sb));
e22b5178 2251 } else
76514441 2252 inode_init_owner(inode, dir, mode);
e22b5178 2253
8931d859
AM
2254diff -NurpP --minimal linux-4.4.161/fs/ext2/inode.c linux-4.4.161-vs2.3.9.8/fs/ext2/inode.c
2255--- linux-4.4.161/fs/ext2/inode.c 2018-10-20 02:34:30.000000000 +0000
2256+++ linux-4.4.161-vs2.3.9.8/fs/ext2/inode.c 2018-10-20 04:57:21.000000000 +0000
927ca606 2257@@ -33,6 +33,7 @@
ec22aa5c
AM
2258 #include <linux/fiemap.h>
2259 #include <linux/namei.h>
927ca606 2260 #include <linux/uio.h>
d337f35e
JR
2261+#include <linux/vs_tag.h>
2262 #include "ext2.h"
2263 #include "acl.h"
927ca606 2264 #include "xattr.h"
8931d859 2265@@ -1274,39 +1275,62 @@ void ext2_set_inode_flags(struct inode *
d337f35e
JR
2266 {
2267 unsigned int flags = EXT2_I(inode)->i_flags;
2268
927ca606
AM
2269- inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
2270- S_DIRSYNC | S_DAX);
2271+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK | S_DAX |
d337f35e
JR
2272+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2273+
2274+ if (flags & EXT2_IMMUTABLE_FL)
2275+ inode->i_flags |= S_IMMUTABLE;
2380c486
JR
2276+ if (flags & EXT2_IXUNLINK_FL)
2277+ inode->i_flags |= S_IXUNLINK;
d337f35e
JR
2278+
2279 if (flags & EXT2_SYNC_FL)
2280 inode->i_flags |= S_SYNC;
2281 if (flags & EXT2_APPEND_FL)
2282 inode->i_flags |= S_APPEND;
2283- if (flags & EXT2_IMMUTABLE_FL)
2284- inode->i_flags |= S_IMMUTABLE;
2285 if (flags & EXT2_NOATIME_FL)
2286 inode->i_flags |= S_NOATIME;
2287 if (flags & EXT2_DIRSYNC_FL)
2288 inode->i_flags |= S_DIRSYNC;
927ca606
AM
2289 if (test_opt(inode->i_sb, DAX))
2290 inode->i_flags |= S_DAX;
2380c486
JR
2291+
2292+ inode->i_vflags &= ~(V_BARRIER | V_COW);
2293+
2294+ if (flags & EXT2_BARRIER_FL)
2295+ inode->i_vflags |= V_BARRIER;
2296+ if (flags & EXT2_COW_FL)
2297+ inode->i_vflags |= V_COW;
2298 }
2299
2300 /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
2301 void ext2_get_inode_flags(struct ext2_inode_info *ei)
2302 {
2303 unsigned int flags = ei->vfs_inode.i_flags;
2304+ unsigned int vflags = ei->vfs_inode.i_vflags;
2305+
2306+ ei->i_flags &= ~(EXT2_SYNC_FL | EXT2_APPEND_FL |
2307+ EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL |
2308+ EXT2_NOATIME_FL | EXT2_DIRSYNC_FL |
2309+ EXT2_BARRIER_FL | EXT2_COW_FL);
2310+
2311+ if (flags & S_IMMUTABLE)
2312+ ei->i_flags |= EXT2_IMMUTABLE_FL;
2313+ if (flags & S_IXUNLINK)
2314+ ei->i_flags |= EXT2_IXUNLINK_FL;
2315
2316- ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
2317- EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
2318 if (flags & S_SYNC)
2319 ei->i_flags |= EXT2_SYNC_FL;
2320 if (flags & S_APPEND)
2321 ei->i_flags |= EXT2_APPEND_FL;
2322- if (flags & S_IMMUTABLE)
2323- ei->i_flags |= EXT2_IMMUTABLE_FL;
2324 if (flags & S_NOATIME)
2325 ei->i_flags |= EXT2_NOATIME_FL;
2326 if (flags & S_DIRSYNC)
2327 ei->i_flags |= EXT2_DIRSYNC_FL;
2328+
2329+ if (vflags & V_BARRIER)
2330+ ei->i_flags |= EXT2_BARRIER_FL;
2331+ if (vflags & V_COW)
2332+ ei->i_flags |= EXT2_COW_FL;
d337f35e
JR
2333 }
2334
2380c486 2335 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
8931d859 2336@@ -1342,8 +1366,10 @@ struct inode *ext2_iget (struct super_bl
42bc425c
AM
2337 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2338 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
d337f35e 2339 }
42bc425c
AM
2340- i_uid_write(inode, i_uid);
2341- i_gid_write(inode, i_gid);
2342+ i_uid_write(inode, INOTAG_UID(DX_TAG(inode), i_uid, i_gid));
2343+ i_gid_write(inode, INOTAG_GID(DX_TAG(inode), i_uid, i_gid));
537831f9
AM
2344+ i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), i_uid, i_gid,
2345+ le16_to_cpu(raw_inode->i_raw_tag)));
f6c5ef8b 2346 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
d337f35e 2347 inode->i_size = le32_to_cpu(raw_inode->i_size);
2380c486 2348 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
8931d859 2349@@ -1439,8 +1465,10 @@ static int __ext2_write_inode(struct ino
d337f35e
JR
2350 struct ext2_inode_info *ei = EXT2_I(inode);
2351 struct super_block *sb = inode->i_sb;
2352 ino_t ino = inode->i_ino;
42bc425c
AM
2353- uid_t uid = i_uid_read(inode);
2354- gid_t gid = i_gid_read(inode);
a4a22af8
AM
2355+ uid_t uid = from_kuid(&init_user_ns,
2356+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
2357+ gid_t gid = from_kgid(&init_user_ns,
2358+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
d337f35e
JR
2359 struct buffer_head * bh;
2360 struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
2361 int n;
8931d859 2362@@ -1476,6 +1504,9 @@ static int __ext2_write_inode(struct ino
d337f35e
JR
2363 raw_inode->i_uid_high = 0;
2364 raw_inode->i_gid_high = 0;
2365 }
2366+#ifdef CONFIG_TAGGING_INTERN
537831f9 2367+ raw_inode->i_raw_tag = cpu_to_le16(i_tag_read(inode));
d337f35e
JR
2368+#endif
2369 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2370 raw_inode->i_size = cpu_to_le32(inode->i_size);
2371 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
8931d859 2372@@ -1559,7 +1590,8 @@ int ext2_setattr(struct dentry *dentry,
927ca606
AM
2373 return error;
2374 }
42bc425c
AM
2375 if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
2376- (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
2377+ (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) ||
537831f9 2378+ (iattr->ia_valid & ATTR_TAG && !tag_eq(iattr->ia_tag, inode->i_tag))) {
78865d5b 2379 error = dquot_transfer(inode, iattr);
d337f35e
JR
2380 if (error)
2381 return error;
8931d859
AM
2382diff -NurpP --minimal linux-4.4.161/fs/ext2/ioctl.c linux-4.4.161-vs2.3.9.8/fs/ext2/ioctl.c
2383--- linux-4.4.161/fs/ext2/ioctl.c 2016-01-10 23:01:32.000000000 +0000
2384+++ linux-4.4.161-vs2.3.9.8/fs/ext2/ioctl.c 2018-10-20 04:57:21.000000000 +0000
d4263eb0
JR
2385@@ -17,6 +17,16 @@
2386 #include <asm/uaccess.h>
2387
2388
2389+int ext2_sync_flags(struct inode *inode, int flags, int vflags)
2390+{
2391+ inode->i_flags = flags;
2392+ inode->i_vflags = vflags;
2393+ ext2_get_inode_flags(EXT2_I(inode));
2394+ inode->i_ctime = CURRENT_TIME_SEC;
2395+ mark_inode_dirty(inode);
2396+ return 0;
2397+}
2398+
2399 long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2400 {
b00e13aa 2401 struct inode *inode = file_inode(filp);
d4263eb0 2402@@ -51,6 +61,11 @@ long ext2_ioctl(struct file *filp, unsig
d337f35e 2403
ec22aa5c 2404 flags = ext2_mask_flags(inode->i_mode, flags);
d337f35e 2405
2380c486
JR
2406+ if (IS_BARRIER(inode)) {
2407+ vxwprintk_task(1, "messing with the barrier.");
2408+ return -EACCES;
2409+ }
2410+
2411 mutex_lock(&inode->i_mutex);
2412 /* Is it quota file? Do not allow user to mess with it */
2413 if (IS_NOQUOTA(inode)) {
d4263eb0 2414@@ -66,7 +81,9 @@ long ext2_ioctl(struct file *filp, unsig
d337f35e
JR
2415 *
2416 * This test looks nicer. Thanks to Pauline Middelink
2417 */
2418- if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
2419+ if ((oldflags & EXT2_IMMUTABLE_FL) ||
2420+ ((flags ^ oldflags) & (EXT2_APPEND_FL |
2380c486
JR
2421+ EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL))) {
2422 if (!capable(CAP_LINUX_IMMUTABLE)) {
2423 mutex_unlock(&inode->i_mutex);
2424 ret = -EPERM;
d4263eb0
JR
2425@@ -74,7 +91,7 @@ long ext2_ioctl(struct file *filp, unsig
2426 }
2427 }
2428
2429- flags = flags & EXT2_FL_USER_MODIFIABLE;
2430+ flags &= EXT2_FL_USER_MODIFIABLE;
2431 flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
2432 ei->i_flags = flags;
db55b927 2433
8931d859
AM
2434diff -NurpP --minimal linux-4.4.161/fs/ext2/namei.c linux-4.4.161-vs2.3.9.8/fs/ext2/namei.c
2435--- linux-4.4.161/fs/ext2/namei.c 2018-10-20 02:34:30.000000000 +0000
2436+++ linux-4.4.161-vs2.3.9.8/fs/ext2/namei.c 2018-10-20 04:57:21.000000000 +0000
78865d5b 2437@@ -32,6 +32,7 @@
d337f35e
JR
2438
2439 #include <linux/pagemap.h>
78865d5b 2440 #include <linux/quotaops.h>
d337f35e
JR
2441+#include <linux/vs_tag.h>
2442 #include "ext2.h"
2443 #include "xattr.h"
2444 #include "acl.h"
8931d859 2445@@ -71,6 +72,7 @@ static struct dentry *ext2_lookup(struct
a168f21d
AM
2446 (unsigned long) ino);
2447 return ERR_PTR(-EIO);
ec22aa5c 2448 }
a168f21d 2449+ dx_propagate_tag(nd, inode);
d337f35e 2450 }
a168f21d
AM
2451 return d_splice_alias(inode, dentry);
2452 }
8931d859 2453@@ -444,6 +446,7 @@ const struct inode_operations ext2_speci
a168f21d 2454 .removexattr = generic_removexattr,
d337f35e
JR
2455 #endif
2456 .setattr = ext2_setattr,
d337f35e 2457+ .sync_flags = ext2_sync_flags,
a168f21d 2458 .get_acl = ext2_get_acl,
bb20add7 2459 .set_acl = ext2_set_acl,
d337f35e 2460 };
8931d859
AM
2461diff -NurpP --minimal linux-4.4.161/fs/ext2/super.c linux-4.4.161-vs2.3.9.8/fs/ext2/super.c
2462--- linux-4.4.161/fs/ext2/super.c 2016-01-10 23:01:32.000000000 +0000
2463+++ linux-4.4.161-vs2.3.9.8/fs/ext2/super.c 2018-10-20 04:57:21.000000000 +0000
927ca606 2464@@ -408,7 +408,8 @@ enum {
d337f35e
JR
2465 Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
2466 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
927ca606 2467 Opt_acl, Opt_noacl, Opt_xip, Opt_dax, Opt_ignore, Opt_err, Opt_quota,
2380c486
JR
2468- Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
2469+ Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation,
2470+ Opt_tag, Opt_notag, Opt_tagid
d337f35e
JR
2471 };
2472
ec22aa5c 2473 static const match_table_t tokens = {
927ca606 2474@@ -436,6 +437,9 @@ static const match_table_t tokens = {
d337f35e
JR
2475 {Opt_acl, "acl"},
2476 {Opt_noacl, "noacl"},
2477 {Opt_xip, "xip"},
2478+ {Opt_tag, "tag"},
2479+ {Opt_notag, "notag"},
2480+ {Opt_tagid, "tagid=%u"},
927ca606 2481 {Opt_dax, "dax"},
d337f35e
JR
2482 {Opt_grpquota, "grpquota"},
2483 {Opt_ignore, "noquota"},
927ca606 2484@@ -520,6 +524,20 @@ static int parse_options(char *options,
d337f35e
JR
2485 case Opt_nouid32:
2486 set_opt (sbi->s_mount_opt, NO_UID32);
2487 break;
2488+#ifndef CONFIG_TAGGING_NONE
2489+ case Opt_tag:
2490+ set_opt (sbi->s_mount_opt, TAGGED);
2491+ break;
2492+ case Opt_notag:
2493+ clear_opt (sbi->s_mount_opt, TAGGED);
2494+ break;
2495+#endif
2496+#ifdef CONFIG_PROPAGATE
2497+ case Opt_tagid:
2498+ /* use args[0] */
2499+ set_opt (sbi->s_mount_opt, TAGGED);
2500+ break;
2501+#endif
2502 case Opt_nocheck:
2503 clear_opt (sbi->s_mount_opt, CHECK);
2504 break;
927ca606 2505@@ -884,6 +902,8 @@ static int ext2_fill_super(struct super_
2bf5ad28 2506 if (!parse_options((char *) data, sb))
d337f35e
JR
2507 goto failed_mount;
2508
2509+ if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
2510+ sb->s_flags |= MS_TAGGED;
2511 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2512 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
2513 MS_POSIXACL : 0);
927ca606 2514@@ -1294,6 +1314,14 @@ static int ext2_remount (struct super_bl
537831f9 2515 err = -EINVAL;
d337f35e
JR
2516 goto restore_opts;
2517 }
537831f9 2518+
d337f35e
JR
2519+ if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
2520+ !(sb->s_flags & MS_TAGGED)) {
2521+ printk("EXT2-fs: %s: tagging not permitted on remount.\n",
2522+ sb->s_id);
d4263eb0
JR
2523+ err = -EINVAL;
2524+ goto restore_opts;
d337f35e 2525+ }
537831f9 2526
d337f35e
JR
2527 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2528 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
8931d859
AM
2529diff -NurpP --minimal linux-4.4.161/fs/ext4/ext4.h linux-4.4.161-vs2.3.9.8/fs/ext4/ext4.h
2530--- linux-4.4.161/fs/ext4/ext4.h 2018-10-20 02:34:30.000000000 +0000
2531+++ linux-4.4.161-vs2.3.9.8/fs/ext4/ext4.h 2018-10-20 04:57:21.000000000 +0000
927ca606 2532@@ -375,8 +375,11 @@ struct flex_groups {
2380c486 2533 #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
78865d5b
AM
2534 #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
2535 #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
b00e13aa 2536+#define EXT4_BARRIER_FL 0x04000000 /* Barrier for chroot() */
2380c486 2537+#define EXT4_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
b00e13aa 2538 #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
927ca606
AM
2539 #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
2540+#define EXT4_COW_FL 0x40000000 /* Copy on Write marker */
2380c486 2541 #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
d337f35e 2542
78865d5b 2543 #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
927ca606 2544@@ -674,7 +677,7 @@ struct ext4_inode {
ec22aa5c
AM
2545 __le16 l_i_uid_high; /* these 2 fields */
2546 __le16 l_i_gid_high; /* were reserved2[0] */
42bc425c
AM
2547 __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
2548- __le16 l_i_reserved;
ec22aa5c 2549+ __le16 l_i_tag; /* Context Tag */
ec22aa5c
AM
2550 } linux2;
2551 struct {
2552 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
927ca606 2553@@ -831,6 +834,7 @@ do { \
ec22aa5c
AM
2554 #define i_gid_low i_gid
2555 #define i_uid_high osd2.linux2.l_i_uid_high
2556 #define i_gid_high osd2.linux2.l_i_gid_high
2557+#define i_raw_tag osd2.linux2.l_i_tag
42bc425c 2558 #define i_checksum_lo osd2.linux2.l_i_checksum_lo
d337f35e 2559
ec22aa5c 2560 #elif defined(__GNU__)
927ca606 2561@@ -1068,6 +1072,7 @@ struct ext4_inode_info {
ab30d09f
AM
2562 #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
2563 #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
2564 #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
2565+#define EXT4_MOUNT_TAGGED 0x40000 /* Enable Context Tags */
2566 #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
2567 #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
2568 #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
8931d859 2569@@ -2496,6 +2501,7 @@ extern int ext4_punch_hole(struct inode
927ca606
AM
2570 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
2571 extern void ext4_set_inode_flags(struct inode *);
2572 extern void ext4_get_inode_flags(struct ext4_inode_info *);
d4263eb0 2573+extern int ext4_sync_flags(struct inode *, int, int);
927ca606
AM
2574 extern int ext4_alloc_da_blocks(struct inode *inode);
2575 extern void ext4_set_aops(struct inode *inode);
2576 extern int ext4_writepage_trans_blocks(struct inode *);
8931d859
AM
2577diff -NurpP --minimal linux-4.4.161/fs/ext4/file.c linux-4.4.161-vs2.3.9.8/fs/ext4/file.c
2578--- linux-4.4.161/fs/ext4/file.c 2018-10-20 02:34:30.000000000 +0000
2579+++ linux-4.4.161-vs2.3.9.8/fs/ext4/file.c 2018-10-20 04:57:21.000000000 +0000
927ca606 2580@@ -749,5 +749,6 @@ const struct inode_operations ext4_file_
a168f21d 2581 .get_acl = ext4_get_acl,
bb20add7 2582 .set_acl = ext4_set_acl,
ec22aa5c 2583 .fiemap = ext4_fiemap,
d337f35e
JR
2584+ .sync_flags = ext4_sync_flags,
2585 };
2586
8931d859
AM
2587diff -NurpP --minimal linux-4.4.161/fs/ext4/ialloc.c linux-4.4.161-vs2.3.9.8/fs/ext4/ialloc.c
2588--- linux-4.4.161/fs/ext4/ialloc.c 2018-10-20 02:34:30.000000000 +0000
2589+++ linux-4.4.161-vs2.3.9.8/fs/ext4/ialloc.c 2018-10-20 05:50:20.000000000 +0000
927ca606 2590@@ -21,6 +21,7 @@
e22b5178
AM
2591 #include <linux/random.h>
2592 #include <linux/bitops.h>
2593 #include <linux/blkdev.h>
2594+#include <linux/vs_tag.h>
2595 #include <asm/byteorder.h>
2596
2597 #include "ext4.h"
8931d859 2598@@ -777,6 +778,7 @@ struct inode *__ext4_new_inode(handle_t
76514441
AM
2599 inode->i_mode = mode;
2600 inode->i_uid = current_fsuid();
2601 inode->i_gid = dir->i_gid;
a4a22af8 2602+ i_tag_write(inode, dx_current_fstag(sb));
e22b5178 2603 } else
76514441 2604 inode_init_owner(inode, dir, mode);
927ca606 2605 err = dquot_initialize(inode);
8931d859
AM
2606diff -NurpP --minimal linux-4.4.161/fs/ext4/inode.c linux-4.4.161-vs2.3.9.8/fs/ext4/inode.c
2607--- linux-4.4.161/fs/ext4/inode.c 2018-10-20 02:34:30.000000000 +0000
2608+++ linux-4.4.161-vs2.3.9.8/fs/ext4/inode.c 2018-10-20 05:50:20.000000000 +0000
927ca606
AM
2609@@ -37,6 +37,7 @@
2610 #include <linux/printk.h>
2611 #include <linux/slab.h>
52afa9bd 2612 #include <linux/bitops.h>
d337f35e 2613+#include <linux/vs_tag.h>
ec22aa5c 2614
2380c486 2615 #include "ext4_jbd2.h"
d337f35e 2616 #include "xattr.h"
8931d859 2617@@ -4129,12 +4130,15 @@ void ext4_set_inode_flags(struct inode *
d337f35e 2618 unsigned int flags = EXT4_I(inode)->i_flags;
52afa9bd 2619 unsigned int new_fl = 0;
978063ce 2620
d337f35e 2621+ if (flags & EXT4_IMMUTABLE_FL)
52afa9bd 2622+ new_fl |= S_IMMUTABLE;
2380c486 2623+ if (flags & EXT4_IXUNLINK_FL)
52afa9bd 2624+ new_fl |= S_IXUNLINK;
978063ce 2625+
d337f35e 2626 if (flags & EXT4_SYNC_FL)
52afa9bd 2627 new_fl |= S_SYNC;
d337f35e 2628 if (flags & EXT4_APPEND_FL)
52afa9bd 2629 new_fl |= S_APPEND;
d337f35e 2630- if (flags & EXT4_IMMUTABLE_FL)
52afa9bd 2631- new_fl |= S_IMMUTABLE;
d337f35e 2632 if (flags & EXT4_NOATIME_FL)
52afa9bd 2633 new_fl |= S_NOATIME;
d337f35e 2634 if (flags & EXT4_DIRSYNC_FL)
8931d859 2635@@ -4142,31 +4146,52 @@ void ext4_set_inode_flags(struct inode *
927ca606
AM
2636 if (test_opt(inode->i_sb, DAX))
2637 new_fl |= S_DAX;
ca5d134c 2638 inode_set_flags(inode, new_fl,
927ca606
AM
2639- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
2640+ S_IXUNLINK | S_IMMUTABLE | S_DAX |
ca5d134c 2641+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2380c486 2642+
978063ce 2643+ new_fl = 0;
2380c486 2644+ if (flags & EXT4_BARRIER_FL)
978063ce 2645+ new_fl |= V_BARRIER;
2380c486 2646+ if (flags & EXT4_COW_FL)
978063ce
JR
2647+ new_fl |= V_COW;
2648+
2649+ set_mask_bits(&inode->i_vflags,
2650+ V_BARRIER | V_COW, new_fl);
d337f35e
JR
2651 }
2652
2380c486
JR
2653 /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
2654 void ext4_get_inode_flags(struct ext4_inode_info *ei)
2655 {
76514441
AM
2656- unsigned int vfs_fl;
2657+ unsigned int vfs_fl, vfs_vf;
2658 unsigned long old_fl, new_fl;
2380c486 2659
76514441
AM
2660 do {
2661 vfs_fl = ei->vfs_inode.i_flags;
2662+ vfs_vf = ei->vfs_inode.i_vflags;
2663 old_fl = ei->i_flags;
2664 new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
2665 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
2666- EXT4_DIRSYNC_FL);
2667+ EXT4_DIRSYNC_FL|EXT4_BARRIER_FL|
2668+ EXT4_COW_FL);
2669+
2670+ if (vfs_fl & S_IMMUTABLE)
2671+ new_fl |= EXT4_IMMUTABLE_FL;
2672+ if (vfs_fl & S_IXUNLINK)
2673+ new_fl |= EXT4_IXUNLINK_FL;
2674+
2675 if (vfs_fl & S_SYNC)
2676 new_fl |= EXT4_SYNC_FL;
2677 if (vfs_fl & S_APPEND)
2678 new_fl |= EXT4_APPEND_FL;
2679- if (vfs_fl & S_IMMUTABLE)
2680- new_fl |= EXT4_IMMUTABLE_FL;
2681 if (vfs_fl & S_NOATIME)
2682 new_fl |= EXT4_NOATIME_FL;
2683 if (vfs_fl & S_DIRSYNC)
2684 new_fl |= EXT4_DIRSYNC_FL;
2685+
2686+ if (vfs_vf & V_BARRIER)
2687+ new_fl |= EXT4_BARRIER_FL;
2688+ if (vfs_vf & V_COW)
2689+ new_fl |= EXT4_COW_FL;
2690 } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
ec22aa5c
AM
2691 }
2692
8931d859 2693@@ -4276,8 +4301,10 @@ struct inode *ext4_iget(struct super_blo
42bc425c
AM
2694 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2695 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
d337f35e 2696 }
42bc425c
AM
2697- i_uid_write(inode, i_uid);
2698- i_gid_write(inode, i_gid);
2699+ i_uid_write(inode, INOTAG_UID(DX_TAG(inode), i_uid, i_gid));
2700+ i_gid_write(inode, INOTAG_GID(DX_TAG(inode), i_uid, i_gid));
537831f9
AM
2701+ i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), i_uid, i_gid,
2702+ le16_to_cpu(raw_inode->i_raw_tag)));
f6c5ef8b 2703 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2380c486 2704
d33d7b00 2705 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
8931d859 2706@@ -4591,8 +4618,10 @@ static int ext4_do_update_inode(handle_t
d337f35e 2707
2380c486 2708 ext4_get_inode_flags(ei);
d337f35e 2709 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
42bc425c
AM
2710- i_uid = i_uid_read(inode);
2711- i_gid = i_gid_read(inode);
a4a22af8
AM
2712+ i_uid = from_kuid(&init_user_ns,
2713+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
2714+ i_gid = from_kgid(&init_user_ns,
2715+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
ec22aa5c 2716 if (!(test_opt(inode->i_sb, NO_UID32))) {
42bc425c
AM
2717 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
2718 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
8931d859 2719@@ -4615,6 +4644,9 @@ static int ext4_do_update_inode(handle_t
d337f35e
JR
2720 raw_inode->i_uid_high = 0;
2721 raw_inode->i_gid_high = 0;
2722 }
2723+#ifdef CONFIG_TAGGING_INTERN
537831f9 2724+ raw_inode->i_raw_tag = cpu_to_le16(i_tag_read(inode));
d337f35e
JR
2725+#endif
2726 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2380c486
JR
2727
2728 EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
8931d859 2729@@ -4860,7 +4892,8 @@ int ext4_setattr(struct dentry *dentry,
927ca606
AM
2730 return error;
2731 }
42bc425c
AM
2732 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
2733- (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
2734+ (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid)) ||
537831f9 2735+ (ia_valid & ATTR_TAG && !tag_eq(attr->ia_tag, inode->i_tag))) {
d337f35e
JR
2736 handle_t *handle;
2737
2738 /* (user+group)*(old+new) structure, inode write (sb,
8931d859 2739@@ -4883,6 +4916,8 @@ int ext4_setattr(struct dentry *dentry,
d337f35e
JR
2740 inode->i_uid = attr->ia_uid;
2741 if (attr->ia_valid & ATTR_GID)
2742 inode->i_gid = attr->ia_gid;
2743+ if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
2744+ inode->i_tag = attr->ia_tag;
2745 error = ext4_mark_inode_dirty(handle, inode);
2746 ext4_journal_stop(handle);
2747 }
8931d859
AM
2748diff -NurpP --minimal linux-4.4.161/fs/ext4/ioctl.c linux-4.4.161-vs2.3.9.8/fs/ext4/ioctl.c
2749--- linux-4.4.161/fs/ext4/ioctl.c 2018-10-20 02:34:30.000000000 +0000
2750+++ linux-4.4.161-vs2.3.9.8/fs/ext4/ioctl.c 2018-10-20 04:57:21.000000000 +0000
09be7631 2751@@ -14,6 +14,7 @@
2380c486 2752 #include <linux/mount.h>
ec22aa5c 2753 #include <linux/file.h>
927ca606 2754 #include <linux/random.h>
d337f35e
JR
2755+#include <linux/vs_tag.h>
2756 #include <asm/uaccess.h>
2380c486
JR
2757 #include "ext4_jbd2.h"
2758 #include "ext4.h"
927ca606
AM
2759@@ -202,6 +203,33 @@ static int uuid_is_zero(__u8 u[16])
2760 return 1;
09be7631 2761 }
db55b927 2762
d4263eb0
JR
2763+int ext4_sync_flags(struct inode *inode, int flags, int vflags)
2764+{
2765+ handle_t *handle = NULL;
2766+ struct ext4_iloc iloc;
2767+ int err;
2768+
b00e13aa 2769+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
d4263eb0
JR
2770+ if (IS_ERR(handle))
2771+ return PTR_ERR(handle);
2772+
2773+ if (IS_SYNC(inode))
2774+ ext4_handle_sync(handle);
2775+ err = ext4_reserve_inode_write(handle, inode, &iloc);
2776+ if (err)
2777+ goto flags_err;
2778+
2779+ inode->i_flags = flags;
2780+ inode->i_vflags = vflags;
2781+ ext4_get_inode_flags(EXT4_I(inode));
2782+ inode->i_ctime = ext4_current_time(inode);
2783+
2784+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2785+flags_err:
2786+ ext4_journal_stop(handle);
2787+ return err;
2788+}
2789+
2790 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2791 {
b00e13aa 2792 struct inode *inode = file_inode(filp);
927ca606 2793@@ -235,6 +263,11 @@ long ext4_ioctl(struct file *filp, unsig
ec22aa5c
AM
2794
2795 flags = ext4_mask_flags(inode->i_mode, flags);
2380c486
JR
2796
2797+ if (IS_BARRIER(inode)) {
2798+ vxwprintk_task(1, "messing with the barrier.");
2799+ return -EACCES;
2800+ }
2801+
2802 err = -EPERM;
ec22aa5c
AM
2803 mutex_lock(&inode->i_mutex);
2804 /* Is it quota file? Do not allow user to mess with it */
927ca606 2805@@ -252,7 +285,9 @@ long ext4_ioctl(struct file *filp, unsig
d337f35e
JR
2806 *
2807 * This test looks nicer. Thanks to Pauline Middelink
2808 */
2809- if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
2810+ if ((oldflags & EXT4_IMMUTABLE_FL) ||
2811+ ((flags ^ oldflags) & (EXT4_APPEND_FL |
2380c486
JR
2812+ EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL))) {
2813 if (!capable(CAP_LINUX_IMMUTABLE))
2814 goto flags_out;
2815 }
8931d859
AM
2816diff -NurpP --minimal linux-4.4.161/fs/ext4/namei.c linux-4.4.161-vs2.3.9.8/fs/ext4/namei.c
2817--- linux-4.4.161/fs/ext4/namei.c 2018-10-20 02:34:30.000000000 +0000
2818+++ linux-4.4.161-vs2.3.9.8/fs/ext4/namei.c 2018-10-20 05:50:20.000000000 +0000
927ca606 2819@@ -33,6 +33,7 @@
2380c486 2820 #include <linux/quotaops.h>
d337f35e
JR
2821 #include <linux/buffer_head.h>
2822 #include <linux/bio.h>
d337f35e 2823+#include <linux/vs_tag.h>
2380c486
JR
2824 #include "ext4.h"
2825 #include "ext4_jbd2.h"
d337f35e 2826
8931d859 2827@@ -1445,6 +1446,7 @@ restart:
a168f21d
AM
2828 ll_rw_block(READ | REQ_META | REQ_PRIO,
2829 1, &bh);
2380c486 2830 }
d337f35e 2831+ dx_propagate_tag(nd, inode);
2380c486
JR
2832 }
2833 if ((bh = bh_use[ra_ptr++]) == NULL)
2834 goto next;
8931d859 2835@@ -3881,6 +3883,7 @@ const struct inode_operations ext4_dir_i
a168f21d 2836 .get_acl = ext4_get_acl,
bb20add7 2837 .set_acl = ext4_set_acl,
d4263eb0 2838 .fiemap = ext4_fiemap,
d337f35e
JR
2839+ .sync_flags = ext4_sync_flags,
2840 };
d4263eb0
JR
2841
2842 const struct inode_operations ext4_special_inode_operations = {
8931d859
AM
2843diff -NurpP --minimal linux-4.4.161/fs/ext4/super.c linux-4.4.161-vs2.3.9.8/fs/ext4/super.c
2844--- linux-4.4.161/fs/ext4/super.c 2018-10-20 02:34:30.000000000 +0000
2845+++ linux-4.4.161-vs2.3.9.8/fs/ext4/super.c 2018-10-20 05:50:20.000000000 +0000
2846@@ -1166,6 +1166,7 @@ enum {
78865d5b 2847 Opt_dioread_nolock, Opt_dioread_lock,
dd5f3080 2848 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
927ca606
AM
2849 Opt_max_dir_size_kb, Opt_nojournal_checksum,
2850+ Opt_tag, Opt_notag, Opt_tagid
d337f35e
JR
2851 };
2852
ec22aa5c 2853 static const match_table_t tokens = {
8931d859 2854@@ -1251,6 +1252,9 @@ static const match_table_t tokens = {
1e8b8f9b
AM
2855 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
2856 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
2857 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
d337f35e
JR
2858+ {Opt_tag, "tag"},
2859+ {Opt_notag, "notag"},
2860+ {Opt_tagid, "tagid=%u"},
d337f35e 2861 {Opt_err, NULL},
d337f35e 2862 };
2380c486 2863
8931d859 2864@@ -1493,6 +1497,20 @@ static int handle_mount_opt(struct super
927ca606
AM
2865 case Opt_nolazytime:
2866 sb->s_flags &= ~MS_LAZYTIME;
1e8b8f9b 2867 return 1;
d337f35e 2868+#ifndef CONFIG_TAGGING_NONE
1e8b8f9b
AM
2869+ case Opt_tag:
2870+ set_opt(sb, TAGGED);
2871+ return 1;
2872+ case Opt_notag:
2873+ clear_opt(sb, TAGGED);
2874+ return 1;
d337f35e
JR
2875+#endif
2876+#ifdef CONFIG_PROPAGATE
1e8b8f9b
AM
2877+ case Opt_tagid:
2878+ /* use args[0] */
2879+ set_opt(sb, TAGGED);
2880+ return 1;
d337f35e 2881+#endif
1e8b8f9b
AM
2882 }
2883
b00e13aa 2884 for (m = ext4_mount_opts; m->token != Opt_err; m++)
8931d859 2885@@ -3414,6 +3432,9 @@ static int ext4_fill_super(struct super_
927ca606 2886 sb->s_iflags |= SB_I_CGROUPWB;
f6c5ef8b 2887 }
d337f35e
JR
2888
2889+ if (EXT4_SB(sb)->s_mount_opt & EXT4_MOUNT_TAGGED)
2890+ sb->s_flags |= MS_TAGGED;
2891+
2892 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
78865d5b 2893 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
d337f35e 2894
8931d859 2895@@ -4792,6 +4813,14 @@ static int ext4_remount(struct super_blo
ec22aa5c 2896 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
93de0823 2897 ext4_abort(sb, "Abort forced by user");
2380c486 2898
d337f35e
JR
2899+ if ((sbi->s_mount_opt & EXT4_MOUNT_TAGGED) &&
2900+ !(sb->s_flags & MS_TAGGED)) {
2901+ printk("EXT4-fs: %s: tagging not permitted on remount.\n",
2902+ sb->s_id);
d4263eb0
JR
2903+ err = -EINVAL;
2904+ goto restore_opts;
d337f35e 2905+ }
2380c486 2906+
d337f35e 2907 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
78865d5b 2908 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
d337f35e 2909
8931d859
AM
2910diff -NurpP --minimal linux-4.4.161/fs/fcntl.c linux-4.4.161-vs2.3.9.8/fs/fcntl.c
2911--- linux-4.4.161/fs/fcntl.c 2018-10-20 02:34:30.000000000 +0000
2912+++ linux-4.4.161-vs2.3.9.8/fs/fcntl.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 2913@@ -22,6 +22,7 @@
2380c486 2914 #include <linux/pid_namespace.h>
92598135 2915 #include <linux/user_namespace.h>
bb20add7 2916 #include <linux/shmem_fs.h>
d337f35e
JR
2917+#include <linux/vs_limit.h>
2918
2919 #include <asm/poll.h>
2920 #include <asm/siginfo.h>
0e1bbc97 2921@@ -389,6 +390,8 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, f
d337f35e 2922
537831f9 2923 if (!f.file)
2380c486
JR
2924 goto out;
2925+ if (!vx_files_avail(1))
2926+ goto out;
2927
537831f9 2928 if (unlikely(f.file->f_mode & FMODE_PATH)) {
42bc425c 2929 if (!check_fcntl_cmd(cmd))
8931d859
AM
2930diff -NurpP --minimal linux-4.4.161/fs/file.c linux-4.4.161-vs2.3.9.8/fs/file.c
2931--- linux-4.4.161/fs/file.c 2016-01-10 23:01:32.000000000 +0000
2932+++ linux-4.4.161-vs2.3.9.8/fs/file.c 2018-10-20 04:57:21.000000000 +0000
537831f9 2933@@ -22,6 +22,7 @@
2380c486
JR
2934 #include <linux/spinlock.h>
2935 #include <linux/rcupdate.h>
2936 #include <linux/workqueue.h>
2937+#include <linux/vs_limit.h>
2938
09be7631
JR
2939 int sysctl_nr_open __read_mostly = 1024*1024;
2940 int sysctl_nr_open_min = BITS_PER_LONG;
927ca606 2941@@ -356,6 +357,8 @@ struct files_struct *dup_fd(struct files
2380c486
JR
2942 struct file *f = *old_fds++;
2943 if (f) {
2944 get_file(f);
2945+ /* TODO: sum it first for check and performance */
2946+ vx_openfd_inc(open_files - i);
2947 } else {
2948 /*
2949 * The fd may be claimed in the fd bitmap but not yet
927ca606 2950@@ -405,9 +408,11 @@ static struct fdtable *close_files(struc
537831f9 2951 filp_close(file, files);
bb20add7 2952 cond_resched_rcu_qs();
537831f9
AM
2953 }
2954+ vx_openfd_dec(i);
2955 }
2956 i++;
2957 set >>= 1;
2958+ cond_resched();
2959 }
2960 }
bb20add7 2961
927ca606 2962@@ -538,6 +543,7 @@ repeat:
2380c486 2963 else
1e8b8f9b 2964 __clear_close_on_exec(fd, fdt);
2380c486
JR
2965 error = fd;
2966+ vx_openfd_inc(fd);
2967 #if 1
2968 /* Sanity check */
bb20add7 2969 if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
927ca606 2970@@ -568,6 +574,7 @@ static void __put_unused_fd(struct files
537831f9
AM
2971 __clear_open_fd(fd, fdt);
2972 if (fd < files->next_fd)
2973 files->next_fd = fd;
2974+ vx_openfd_dec(fd);
2975 }
2976
2977 void put_unused_fd(unsigned int fd)
927ca606 2978@@ -850,6 +857,8 @@ __releases(&files->file_lock)
537831f9
AM
2979
2980 if (tofree)
2981 filp_close(tofree, files);
2982+ else
2983+ vx_openfd_inc(fd); /* fd was unused */
2984
2985 return fd;
2986
8931d859
AM
2987diff -NurpP --minimal linux-4.4.161/fs/file_table.c linux-4.4.161-vs2.3.9.8/fs/file_table.c
2988--- linux-4.4.161/fs/file_table.c 2016-01-10 23:01:32.000000000 +0000
2989+++ linux-4.4.161-vs2.3.9.8/fs/file_table.c 2018-10-20 04:57:21.000000000 +0000
92598135 2990@@ -26,6 +26,8 @@
92598135 2991 #include <linux/task_work.h>
2bf5ad28 2992 #include <linux/ima.h>
927ca606 2993 #include <linux/swap.h>
d337f35e
JR
2994+#include <linux/vs_limit.h>
2995+#include <linux/vs_context.h>
2996
a168f21d 2997 #include <linux/atomic.h>
d337f35e 2998
c2e5f7c8 2999@@ -137,6 +139,8 @@ struct file *get_empty_filp(void)
bb20add7 3000 mutex_init(&f->f_pos_lock);
d337f35e
JR
3001 eventpoll_init_file(f);
3002 /* f->f_version: 0 */
3003+ f->f_xid = vx_current_xid();
3004+ vx_files_inc(f);
3005 return f;
3006
3007 over:
bb20add7 3008@@ -219,6 +223,8 @@ static void __fput(struct file *file)
265de2f7
JR
3009 put_write_access(inode);
3010 __mnt_drop_write(mnt);
3011 }
d337f35e
JR
3012+ vx_files_dec(file);
3013+ file->f_xid = 0;
92598135
AM
3014 file->f_path.dentry = NULL;
3015 file->f_path.mnt = NULL;
b00e13aa 3016 file->f_inode = NULL;
bb20add7 3017@@ -305,6 +311,8 @@ void put_filp(struct file *file)
d337f35e 3018 {
2380c486 3019 if (atomic_long_dec_and_test(&file->f_count)) {
d337f35e
JR
3020 security_file_free(file);
3021+ vx_files_dec(file);
3022+ file->f_xid = 0;
d337f35e
JR
3023 file_free(file);
3024 }
c2e5f7c8 3025 }
8931d859
AM
3026diff -NurpP --minimal linux-4.4.161/fs/fs_struct.c linux-4.4.161-vs2.3.9.8/fs/fs_struct.c
3027--- linux-4.4.161/fs/fs_struct.c 2016-01-10 23:01:32.000000000 +0000
3028+++ linux-4.4.161-vs2.3.9.8/fs/fs_struct.c 2018-10-20 04:57:21.000000000 +0000
ec22aa5c
AM
3029@@ -4,6 +4,7 @@
3030 #include <linux/path.h>
3031 #include <linux/slab.h>
3032 #include <linux/fs_struct.h>
3033+#include <linux/vserver/global.h>
d33d7b00 3034 #include "internal.h"
ec22aa5c 3035
92598135
AM
3036 /*
3037@@ -87,6 +88,7 @@ void free_fs_struct(struct fs_struct *fs
ec22aa5c 3038 {
92598135
AM
3039 path_put(&fs->root);
3040 path_put(&fs->pwd);
ec22aa5c
AM
3041+ atomic_dec(&vs_global_fs);
3042 kmem_cache_free(fs_cachep, fs);
3043 }
3044
537831f9 3045@@ -124,6 +126,7 @@ struct fs_struct *copy_fs_struct(struct
d33d7b00 3046 fs->pwd = old->pwd;
92598135 3047 path_get(&fs->pwd);
d33d7b00 3048 spin_unlock(&old->lock);
ec22aa5c
AM
3049+ atomic_inc(&vs_global_fs);
3050 }
3051 return fs;
3052 }
8931d859
AM
3053diff -NurpP --minimal linux-4.4.161/fs/gfs2/file.c linux-4.4.161-vs2.3.9.8/fs/gfs2/file.c
3054--- linux-4.4.161/fs/gfs2/file.c 2018-10-20 02:34:30.000000000 +0000
3055+++ linux-4.4.161-vs2.3.9.8/fs/gfs2/file.c 2018-10-20 04:57:21.000000000 +0000
927ca606 3056@@ -137,6 +137,9 @@ static const u32 fsflags_to_gfs2[32] = {
e22b5178
AM
3057 [12] = GFS2_DIF_EXHASH,
3058 [14] = GFS2_DIF_INHERIT_JDATA,
92598135 3059 [17] = GFS2_DIF_TOPDIR,
e22b5178
AM
3060+ [27] = GFS2_DIF_IXUNLINK,
3061+ [26] = GFS2_DIF_BARRIER,
3062+ [29] = GFS2_DIF_COW,
3063 };
3064
3065 static const u32 gfs2_to_fsflags[32] = {
927ca606 3066@@ -147,6 +150,9 @@ static const u32 gfs2_to_fsflags[32] = {
e22b5178 3067 [gfs2fl_ExHash] = FS_INDEX_FL,
92598135 3068 [gfs2fl_TopLevel] = FS_TOPDIR_FL,
e22b5178
AM
3069 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
3070+ [gfs2fl_IXUnlink] = FS_IXUNLINK_FL,
3071+ [gfs2fl_Barrier] = FS_BARRIER_FL,
3072+ [gfs2fl_Cow] = FS_COW_FL,
3073 };
3074
3075 static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
927ca606 3076@@ -177,12 +183,17 @@ void gfs2_set_inode_flags(struct inode *
e22b5178
AM
3077 {
3078 struct gfs2_inode *ip = GFS2_I(inode);
3079 unsigned int flags = inode->i_flags;
3080+ unsigned int vflags = inode->i_vflags;
8931d859 3081+
927ca606
AM
3082+ flags &= ~(S_IMMUTABLE | S_IXUNLINK |
3083+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC | S_NOSEC);
8931d859
AM
3084
3085- flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
a168f21d 3086 if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
927ca606 3087 flags |= S_NOSEC;
e22b5178
AM
3088 if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
3089 flags |= S_IMMUTABLE;
3090+ if (ip->i_diskflags & GFS2_DIF_IXUNLINK)
3091+ flags |= S_IXUNLINK;
e22b5178
AM
3092 if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
3093 flags |= S_APPEND;
3094 if (ip->i_diskflags & GFS2_DIF_NOATIME)
927ca606 3095@@ -190,6 +201,43 @@ void gfs2_set_inode_flags(struct inode *
e22b5178
AM
3096 if (ip->i_diskflags & GFS2_DIF_SYNC)
3097 flags |= S_SYNC;
3098 inode->i_flags = flags;
3099+
3100+ vflags &= ~(V_BARRIER | V_COW);
3101+
3102+ if (ip->i_diskflags & GFS2_DIF_BARRIER)
3103+ vflags |= V_BARRIER;
3104+ if (ip->i_diskflags & GFS2_DIF_COW)
3105+ vflags |= V_COW;
3106+ inode->i_vflags = vflags;
3107+}
3108+
3109+void gfs2_get_inode_flags(struct inode *inode)
3110+{
3111+ struct gfs2_inode *ip = GFS2_I(inode);
3112+ unsigned int flags = inode->i_flags;
3113+ unsigned int vflags = inode->i_vflags;
3114+
3115+ ip->i_diskflags &= ~(GFS2_DIF_APPENDONLY |
3116+ GFS2_DIF_NOATIME | GFS2_DIF_SYNC |
3117+ GFS2_DIF_IMMUTABLE | GFS2_DIF_IXUNLINK |
3118+ GFS2_DIF_BARRIER | GFS2_DIF_COW);
3119+
3120+ if (flags & S_IMMUTABLE)
3121+ ip->i_diskflags |= GFS2_DIF_IMMUTABLE;
3122+ if (flags & S_IXUNLINK)
3123+ ip->i_diskflags |= GFS2_DIF_IXUNLINK;
3124+
3125+ if (flags & S_APPEND)
3126+ ip->i_diskflags |= GFS2_DIF_APPENDONLY;
3127+ if (flags & S_NOATIME)
3128+ ip->i_diskflags |= GFS2_DIF_NOATIME;
3129+ if (flags & S_SYNC)
3130+ ip->i_diskflags |= GFS2_DIF_SYNC;
3131+
3132+ if (vflags & V_BARRIER)
3133+ ip->i_diskflags |= GFS2_DIF_BARRIER;
3134+ if (vflags & V_COW)
3135+ ip->i_diskflags |= GFS2_DIF_COW;
3136 }
3137
3138 /* Flags that can be set by user space */
927ca606 3139@@ -305,6 +353,37 @@ static int gfs2_set_flags(struct file *f
e22b5178
AM
3140 return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
3141 }
3142
3143+int gfs2_sync_flags(struct inode *inode, int flags, int vflags)
3144+{
3145+ struct gfs2_inode *ip = GFS2_I(inode);
3146+ struct gfs2_sbd *sdp = GFS2_SB(inode);
3147+ struct buffer_head *bh;
3148+ struct gfs2_holder gh;
3149+ int error;
3150+
3151+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
3152+ if (error)
3153+ return error;
3154+ error = gfs2_trans_begin(sdp, RES_DINODE, 0);
3155+ if (error)
3156+ goto out;
3157+ error = gfs2_meta_inode_buffer(ip, &bh);
3158+ if (error)
3159+ goto out_trans_end;
b00e13aa 3160+ gfs2_trans_add_meta(ip->i_gl, bh);
e22b5178
AM
3161+ inode->i_flags = flags;
3162+ inode->i_vflags = vflags;
3163+ gfs2_get_inode_flags(inode);
3164+ gfs2_dinode_out(ip, bh->b_data);
3165+ brelse(bh);
3166+ gfs2_set_aops(inode);
3167+out_trans_end:
3168+ gfs2_trans_end(sdp);
3169+out:
3170+ gfs2_glock_dq_uninit(&gh);
3171+ return error;
3172+}
3173+
3174 static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3175 {
3176 switch(cmd) {
8931d859
AM
3177diff -NurpP --minimal linux-4.4.161/fs/gfs2/inode.h linux-4.4.161-vs2.3.9.8/fs/gfs2/inode.h
3178--- linux-4.4.161/fs/gfs2/inode.h 2016-01-10 23:01:32.000000000 +0000
3179+++ linux-4.4.161-vs2.3.9.8/fs/gfs2/inode.h 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 3180@@ -118,6 +118,7 @@ extern const struct file_operations gfs2
e22b5178
AM
3181 extern const struct file_operations gfs2_dir_fops_nolock;
3182
3183 extern void gfs2_set_inode_flags(struct inode *inode);
3184+extern int gfs2_sync_flags(struct inode *inode, int flags, int vflags);
3185
3186 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
3187 extern const struct file_operations gfs2_file_fops;
8931d859
AM
3188diff -NurpP --minimal linux-4.4.161/fs/hostfs/hostfs.h linux-4.4.161-vs2.3.9.8/fs/hostfs/hostfs.h
3189--- linux-4.4.161/fs/hostfs/hostfs.h 2016-01-10 23:01:32.000000000 +0000
3190+++ linux-4.4.161-vs2.3.9.8/fs/hostfs/hostfs.h 2018-10-20 04:57:21.000000000 +0000
537831f9
AM
3191@@ -42,6 +42,7 @@ struct hostfs_iattr {
3192 unsigned short ia_mode;
3193 uid_t ia_uid;
3194 gid_t ia_gid;
61333608 3195+ vtag_t ia_tag;
537831f9
AM
3196 loff_t ia_size;
3197 struct timespec ia_atime;
3198 struct timespec ia_mtime;
8931d859
AM
3199diff -NurpP --minimal linux-4.4.161/fs/inode.c linux-4.4.161-vs2.3.9.8/fs/inode.c
3200--- linux-4.4.161/fs/inode.c 2018-10-20 02:34:30.000000000 +0000
3201+++ linux-4.4.161-vs2.3.9.8/fs/inode.c 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 3202@@ -18,6 +18,7 @@
763640ca 3203 #include <linux/buffer_head.h> /* for inode_has_buffers */
db55b927 3204 #include <linux/ratelimit.h>
c2e5f7c8 3205 #include <linux/list_lru.h>
76514441 3206+#include <linux/vs_tag.h>
927ca606 3207 #include <trace/events/writeback.h>
763640ca 3208 #include "internal.h"
76514441 3209
927ca606 3210@@ -133,6 +134,8 @@ int inode_init_always(struct super_block
ec22aa5c
AM
3211 struct address_space *const mapping = &inode->i_data;
3212
3213 inode->i_sb = sb;
3214+
3215+ /* essential because of inode slab reuse */
ec22aa5c
AM
3216 inode->i_blkbits = sb->s_blocksize_bits;
3217 inode->i_flags = 0;
3218 atomic_set(&inode->i_count, 1);
927ca606 3219@@ -142,6 +145,7 @@ int inode_init_always(struct super_block
537831f9
AM
3220 inode->i_opflags = 0;
3221 i_uid_write(inode, 0);
3222 i_gid_write(inode, 0);
3223+ i_tag_write(inode, 0);
3224 atomic_set(&inode->i_writecount, 0);
3225 inode->i_size = 0;
3226 inode->i_blocks = 0;
927ca606 3227@@ -152,6 +156,7 @@ int inode_init_always(struct super_block
ec22aa5c 3228 inode->i_cdev = NULL;
927ca606 3229 inode->i_link = NULL;
ec22aa5c
AM
3230 inode->i_rdev = 0;
3231+ inode->i_mdev = 0;
3232 inode->dirtied_when = 0;
3233
3234 if (security_inode_alloc(inode))
927ca606 3235@@ -469,6 +474,8 @@ void __insert_inode_hash(struct inode *i
d337f35e 3236 }
763640ca 3237 EXPORT_SYMBOL(__insert_inode_hash);
d337f35e
JR
3238
3239+EXPORT_SYMBOL_GPL(__iget);
3240+
3241 /**
a168f21d 3242 * __remove_inode_hash - remove an inode from the hash
ab30d09f 3243 * @inode: inode to unhash
927ca606 3244@@ -1911,9 +1918,11 @@ void init_special_inode(struct inode *in
2380c486
JR
3245 if (S_ISCHR(mode)) {
3246 inode->i_fop = &def_chr_fops;
3247 inode->i_rdev = rdev;
3248+ inode->i_mdev = rdev;
3249 } else if (S_ISBLK(mode)) {
3250 inode->i_fop = &def_blk_fops;
3251 inode->i_rdev = rdev;
3252+ inode->i_mdev = rdev;
3253 } else if (S_ISFIFO(mode))
09be7631 3254 inode->i_fop = &pipefifo_fops;
2380c486 3255 else if (S_ISSOCK(mode))
8931d859 3256@@ -1948,6 +1957,7 @@ void inode_init_owner(struct inode *inod
76514441
AM
3257 } else
3258 inode->i_gid = current_fsgid();
3259 inode->i_mode = mode;
8ce283e1 3260+ i_tag_write(inode, dx_current_fstag(inode->i_sb));
76514441
AM
3261 }
3262 EXPORT_SYMBOL(inode_init_owner);
763640ca 3263
8931d859
AM
3264diff -NurpP --minimal linux-4.4.161/fs/ioctl.c linux-4.4.161-vs2.3.9.8/fs/ioctl.c
3265--- linux-4.4.161/fs/ioctl.c 2016-01-10 23:01:32.000000000 +0000
3266+++ linux-4.4.161-vs2.3.9.8/fs/ioctl.c 2018-10-20 04:57:21.000000000 +0000
ab30d09f 3267@@ -15,6 +15,9 @@
ec22aa5c
AM
3268 #include <linux/writeback.h>
3269 #include <linux/buffer_head.h>
3270 #include <linux/falloc.h>
d337f35e
JR
3271+#include <linux/proc_fs.h>
3272+#include <linux/vserver/inode.h>
3273+#include <linux/vs_tag.h>
3274
d337f35e
JR
3275 #include <asm/ioctls.h>
3276
8931d859
AM
3277diff -NurpP --minimal linux-4.4.161/fs/jfs/file.c linux-4.4.161-vs2.3.9.8/fs/jfs/file.c
3278--- linux-4.4.161/fs/jfs/file.c 2016-01-10 23:01:32.000000000 +0000
3279+++ linux-4.4.161-vs2.3.9.8/fs/jfs/file.c 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
3280@@ -113,7 +113,8 @@ int jfs_setattr(struct dentry *dentry, s
3281 return rc;
3282 }
537831f9
AM
3283 if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
3284- (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
3285+ (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) ||
3286+ (iattr->ia_valid & ATTR_TAG && !tag_eq(iattr->ia_tag, inode->i_tag))) {
78865d5b
AM
3287 rc = dquot_transfer(inode, iattr);
3288 if (rc)
3289 return rc;
927ca606 3290@@ -149,6 +150,7 @@ const struct inode_operations jfs_file_i
a168f21d 3291 .get_acl = jfs_get_acl,
bb20add7 3292 .set_acl = jfs_set_acl,
d337f35e
JR
3293 #endif
3294+ .sync_flags = jfs_sync_flags,
3295 };
3296
3297 const struct file_operations jfs_file_operations = {
8931d859
AM
3298diff -NurpP --minimal linux-4.4.161/fs/jfs/ioctl.c linux-4.4.161-vs2.3.9.8/fs/jfs/ioctl.c
3299--- linux-4.4.161/fs/jfs/ioctl.c 2016-01-10 23:01:32.000000000 +0000
3300+++ linux-4.4.161-vs2.3.9.8/fs/jfs/ioctl.c 2018-10-20 04:57:21.000000000 +0000
537831f9 3301@@ -12,6 +12,7 @@
d337f35e 3302 #include <linux/time.h>
2380c486 3303 #include <linux/sched.h>
537831f9 3304 #include <linux/blkdev.h>
d337f35e
JR
3305+#include <linux/mount.h>
3306 #include <asm/current.h>
3307 #include <asm/uaccess.h>
3308
537831f9 3309@@ -56,6 +57,16 @@ static long jfs_map_ext2(unsigned long f
d4263eb0
JR
3310 }
3311
3312
3313+int jfs_sync_flags(struct inode *inode, int flags, int vflags)
3314+{
3315+ inode->i_flags = flags;
3316+ inode->i_vflags = vflags;
3317+ jfs_get_inode_flags(JFS_IP(inode));
3318+ inode->i_ctime = CURRENT_TIME_SEC;
3319+ mark_inode_dirty(inode);
3320+ return 0;
3321+}
3322+
3323 long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3324 {
b00e13aa 3325 struct inode *inode = file_inode(filp);
537831f9 3326@@ -89,6 +100,11 @@ long jfs_ioctl(struct file *filp, unsign
2380c486
JR
3327 if (!S_ISDIR(inode->i_mode))
3328 flags &= ~JFS_DIRSYNC_FL;
d337f35e 3329
2380c486
JR
3330+ if (IS_BARRIER(inode)) {
3331+ vxwprintk_task(1, "messing with the barrier.");
3332+ return -EACCES;
3333+ }
3334+
3335 /* Is it quota file? Do not allow user to mess with it */
3336 if (IS_NOQUOTA(inode)) {
3337 err = -EPERM;
537831f9 3338@@ -106,8 +122,8 @@ long jfs_ioctl(struct file *filp, unsign
d337f35e
JR
3339 * the relevant capability.
3340 */
3341 if ((oldflags & JFS_IMMUTABLE_FL) ||
3342- ((flags ^ oldflags) &
3343- (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
3344+ ((flags ^ oldflags) & (JFS_APPEND_FL |
2380c486
JR
3345+ JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL))) {
3346 if (!capable(CAP_LINUX_IMMUTABLE)) {
3347 mutex_unlock(&inode->i_mutex);
3348 err = -EPERM;
537831f9 3349@@ -115,7 +131,7 @@ long jfs_ioctl(struct file *filp, unsign
d4263eb0
JR
3350 }
3351 }
3352
3353- flags = flags & JFS_FL_USER_MODIFIABLE;
3354+ flags &= JFS_FL_USER_MODIFIABLE;
3355 flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
3356 jfs_inode->mode2 = flags;
3357
8931d859
AM
3358diff -NurpP --minimal linux-4.4.161/fs/jfs/jfs_dinode.h linux-4.4.161-vs2.3.9.8/fs/jfs/jfs_dinode.h
3359--- linux-4.4.161/fs/jfs/jfs_dinode.h 2016-01-10 23:01:32.000000000 +0000
3360+++ linux-4.4.161-vs2.3.9.8/fs/jfs/jfs_dinode.h 2018-10-20 04:57:21.000000000 +0000
2380c486
JR
3361@@ -161,9 +161,13 @@ struct dinode {
3362
d337f35e
JR
3363 #define JFS_APPEND_FL 0x01000000 /* writes to file may only append */
3364 #define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */
2380c486 3365+#define JFS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
d337f35e
JR
3366
3367-#define JFS_FL_USER_VISIBLE 0x03F80000
2380c486 3368-#define JFS_FL_USER_MODIFIABLE 0x03F80000
d337f35e 3369+#define JFS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
2380c486 3370+#define JFS_COW_FL 0x20000000 /* Copy on Write marker */
d337f35e 3371+
2380c486
JR
3372+#define JFS_FL_USER_VISIBLE 0x07F80000
3373+#define JFS_FL_USER_MODIFIABLE 0x07F80000
3374 #define JFS_FL_INHERIT 0x03C80000
d337f35e
JR
3375
3376 /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
8931d859
AM
3377diff -NurpP --minimal linux-4.4.161/fs/jfs/jfs_filsys.h linux-4.4.161-vs2.3.9.8/fs/jfs/jfs_filsys.h
3378--- linux-4.4.161/fs/jfs/jfs_filsys.h 2016-01-10 23:01:32.000000000 +0000
3379+++ linux-4.4.161-vs2.3.9.8/fs/jfs/jfs_filsys.h 2018-10-20 04:57:21.000000000 +0000
537831f9 3380@@ -266,6 +266,7 @@
ec22aa5c
AM
3381 #define JFS_NAME_MAX 255
3382 #define JFS_PATH_MAX BPSIZE
bd427b06 3383
ec22aa5c 3384+#define JFS_TAGGED 0x00800000 /* Context Tagging */
bd427b06 3385
ec22aa5c
AM
3386 /*
3387 * file system state (superblock state)
8931d859
AM
3388diff -NurpP --minimal linux-4.4.161/fs/jfs/jfs_imap.c linux-4.4.161-vs2.3.9.8/fs/jfs/jfs_imap.c
3389--- linux-4.4.161/fs/jfs/jfs_imap.c 2016-01-10 23:01:32.000000000 +0000
3390+++ linux-4.4.161-vs2.3.9.8/fs/jfs/jfs_imap.c 2018-10-20 04:57:21.000000000 +0000
78865d5b 3391@@ -46,6 +46,7 @@
ec22aa5c
AM
3392 #include <linux/pagemap.h>
3393 #include <linux/quotaops.h>
78865d5b 3394 #include <linux/slab.h>
ec22aa5c 3395+#include <linux/vs_tag.h>
bd427b06 3396
ec22aa5c
AM
3397 #include "jfs_incore.h"
3398 #include "jfs_inode.h"
c2e5f7c8 3399@@ -3047,6 +3048,8 @@ static int copy_from_dinode(struct dinod
ec22aa5c
AM
3400 {
3401 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3402 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
a4a22af8
AM
3403+ kuid_t kuid;
3404+ kgid_t kgid;
bd427b06 3405
ec22aa5c
AM
3406 jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3407 jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
c2e5f7c8 3408@@ -3067,14 +3070,18 @@ static int copy_from_dinode(struct dinod
d337f35e 3409 }
f6c5ef8b 3410 set_nlink(ip, le32_to_cpu(dip->di_nlink));
bd427b06 3411
537831f9 3412- jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
a4a22af8
AM
3413+ kuid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3414+ kgid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3415+ ip->i_tag = INOTAG_KTAG(DX_TAG(ip), kuid, kgid, GLOBAL_ROOT_TAG);
ec22aa5c 3416+
a4a22af8 3417+ jfs_ip->saved_uid = INOTAG_KUID(DX_TAG(ip), kuid, kgid);
537831f9 3418 if (!uid_valid(sbi->uid))
ec22aa5c
AM
3419 ip->i_uid = jfs_ip->saved_uid;
3420 else {
3421 ip->i_uid = sbi->uid;
bd427b06
AM
3422 }
3423
537831f9 3424- jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
a4a22af8 3425+ jfs_ip->saved_gid = INOTAG_KGID(DX_TAG(ip), kuid, kgid);
537831f9 3426 if (!gid_valid(sbi->gid))
d337f35e
JR
3427 ip->i_gid = jfs_ip->saved_gid;
3428 else {
c2e5f7c8 3429@@ -3139,16 +3146,14 @@ static void copy_to_dinode(struct dinode
d337f35e
JR
3430 dip->di_size = cpu_to_le64(ip->i_size);
3431 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3432 dip->di_nlink = cpu_to_le32(ip->i_nlink);
537831f9
AM
3433- if (!uid_valid(sbi->uid))
3434- dip->di_uid = cpu_to_le32(i_uid_read(ip));
d337f35e 3435- else
537831f9
AM
3436- dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns,
3437- jfs_ip->saved_uid));
3438- if (!gid_valid(sbi->gid))
3439- dip->di_gid = cpu_to_le32(i_gid_read(ip));
d337f35e 3440- else
537831f9
AM
3441- dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3442- jfs_ip->saved_gid));
3443+ dip->di_uid = cpu_to_le32(from_kuid(&init_user_ns,
a4a22af8 3444+ TAGINO_KUID(DX_TAG(ip),
537831f9
AM
3445+ !uid_valid(sbi->uid) ? ip->i_uid : jfs_ip->saved_uid,
3446+ ip->i_tag)));
a4a22af8
AM
3447+ dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3448+ TAGINO_KGID(DX_TAG(ip),
537831f9
AM
3449+ !gid_valid(sbi->gid) ? ip->i_gid : jfs_ip->saved_gid,
3450+ ip->i_tag)));
2380c486 3451 jfs_get_inode_flags(jfs_ip);
d337f35e
JR
3452 /*
3453 * mode2 is only needed for storing the higher order bits.
8931d859
AM
3454diff -NurpP --minimal linux-4.4.161/fs/jfs/jfs_inode.c linux-4.4.161-vs2.3.9.8/fs/jfs/jfs_inode.c
3455--- linux-4.4.161/fs/jfs/jfs_inode.c 2016-01-10 23:01:32.000000000 +0000
3456+++ linux-4.4.161-vs2.3.9.8/fs/jfs/jfs_inode.c 2018-10-20 04:57:21.000000000 +0000
e22b5178
AM
3457@@ -18,6 +18,7 @@
3458
3459 #include <linux/fs.h>
3460 #include <linux/quotaops.h>
3461+#include <linux/vs_tag.h>
3462 #include "jfs_incore.h"
3463 #include "jfs_inode.h"
3464 #include "jfs_filsys.h"
8de2f54c 3465@@ -33,6 +34,9 @@ void jfs_set_inode_flags(struct inode *i
d337f35e
JR
3466
3467 if (flags & JFS_IMMUTABLE_FL)
bb20add7 3468 new_fl |= S_IMMUTABLE;
2380c486 3469+ if (flags & JFS_IXUNLINK_FL)
8de2f54c 3470+ new_fl |= S_IXUNLINK;
d337f35e 3471+
d337f35e 3472 if (flags & JFS_APPEND_FL)
bb20add7 3473 new_fl |= S_APPEND;
d337f35e 3474 if (flags & JFS_NOATIME_FL)
8de2f54c 3475@@ -41,18 +45,35 @@ void jfs_set_inode_flags(struct inode *i
bb20add7 3476 new_fl |= S_DIRSYNC;
8de2f54c
AM
3477 if (flags & JFS_SYNC_FL)
3478 new_fl |= S_SYNC;
bb20add7 3479- inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND | S_NOATIME |
8de2f54c
AM
3480- S_DIRSYNC | S_SYNC);
3481+
3482+ inode_set_flags(inode, new_fl, S_IMMUTABLE | S_IXUNLINK |
3483+ S_APPEND | S_NOATIME | S_DIRSYNC | S_SYNC);
2380c486 3484+
bb20add7 3485+ new_fl = 0;
2380c486 3486+ if (flags & JFS_BARRIER_FL)
bb20add7 3487+ new_fl |= V_BARRIER;
2380c486 3488+ if (flags & JFS_COW_FL)
bb20add7
AM
3489+ new_fl |= V_COW;
3490+
3491+ set_mask_bits(&inode->i_vflags,
3492+ V_BARRIER | V_COW, new_fl);
2380c486
JR
3493 }
3494
3495 void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
3496 {
3497 unsigned int flags = jfs_ip->vfs_inode.i_flags;
3498+ unsigned int vflags = jfs_ip->vfs_inode.i_vflags;
3499+
3500+ jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL |
3501+ JFS_APPEND_FL | JFS_NOATIME_FL |
3502+ JFS_DIRSYNC_FL | JFS_SYNC_FL |
3503+ JFS_BARRIER_FL | JFS_COW_FL);
3504
3505- jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
3506- JFS_DIRSYNC_FL | JFS_SYNC_FL);
3507 if (flags & S_IMMUTABLE)
3508 jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
3509+ if (flags & S_IXUNLINK)
3510+ jfs_ip->mode2 |= JFS_IXUNLINK_FL;
3511+
3512 if (flags & S_APPEND)
3513 jfs_ip->mode2 |= JFS_APPEND_FL;
3514 if (flags & S_NOATIME)
8de2f54c 3515@@ -61,6 +82,11 @@ void jfs_get_inode_flags(struct jfs_inod
2380c486
JR
3516 jfs_ip->mode2 |= JFS_DIRSYNC_FL;
3517 if (flags & S_SYNC)
3518 jfs_ip->mode2 |= JFS_SYNC_FL;
3519+
3520+ if (vflags & V_BARRIER)
3521+ jfs_ip->mode2 |= JFS_BARRIER_FL;
3522+ if (vflags & V_COW)
3523+ jfs_ip->mode2 |= JFS_COW_FL;
d337f35e
JR
3524 }
3525
3526 /*
8931d859
AM
3527diff -NurpP --minimal linux-4.4.161/fs/jfs/jfs_inode.h linux-4.4.161-vs2.3.9.8/fs/jfs/jfs_inode.h
3528--- linux-4.4.161/fs/jfs/jfs_inode.h 2016-01-10 23:01:32.000000000 +0000
3529+++ linux-4.4.161-vs2.3.9.8/fs/jfs/jfs_inode.h 2018-10-20 04:57:21.000000000 +0000
2380c486
JR
3530@@ -39,6 +39,7 @@ extern struct dentry *jfs_fh_to_dentry(s
3531 extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
3532 int fh_len, int fh_type);
d337f35e 3533 extern void jfs_set_inode_flags(struct inode *);
d4263eb0 3534+extern int jfs_sync_flags(struct inode *, int, int);
d337f35e 3535 extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
78865d5b 3536 extern int jfs_setattr(struct dentry *, struct iattr *);
d337f35e 3537
8931d859
AM
3538diff -NurpP --minimal linux-4.4.161/fs/jfs/namei.c linux-4.4.161-vs2.3.9.8/fs/jfs/namei.c
3539--- linux-4.4.161/fs/jfs/namei.c 2018-10-20 02:34:30.000000000 +0000
3540+++ linux-4.4.161-vs2.3.9.8/fs/jfs/namei.c 2018-10-20 04:57:21.000000000 +0000
d33d7b00 3541@@ -22,6 +22,7 @@
d337f35e
JR
3542 #include <linux/ctype.h>
3543 #include <linux/quotaops.h>
2380c486 3544 #include <linux/exportfs.h>
d337f35e
JR
3545+#include <linux/vs_tag.h>
3546 #include "jfs_incore.h"
3547 #include "jfs_superblock.h"
3548 #include "jfs_inode.h"
8931d859 3549@@ -1476,6 +1477,7 @@ static struct dentry *jfs_lookup(struct
a168f21d 3550 jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum);
d337f35e
JR
3551 }
3552
3553+ dx_propagate_tag(nd, ip);
d33d7b00
AM
3554 return d_splice_alias(ip, dentry);
3555 }
d337f35e 3556
8931d859 3557@@ -1541,6 +1543,7 @@ const struct inode_operations jfs_dir_in
a168f21d 3558 .get_acl = jfs_get_acl,
bb20add7 3559 .set_acl = jfs_set_acl,
d337f35e
JR
3560 #endif
3561+ .sync_flags = jfs_sync_flags,
3562 };
3563
3564 const struct file_operations jfs_dir_operations = {
8931d859
AM
3565diff -NurpP --minimal linux-4.4.161/fs/jfs/super.c linux-4.4.161-vs2.3.9.8/fs/jfs/super.c
3566--- linux-4.4.161/fs/jfs/super.c 2018-10-20 02:34:30.000000000 +0000
3567+++ linux-4.4.161-vs2.3.9.8/fs/jfs/super.c 2018-10-20 04:57:21.000000000 +0000
927ca606 3568@@ -206,7 +206,8 @@ enum {
d337f35e
JR
3569 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
3570 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
537831f9
AM
3571 Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
3572- Opt_discard, Opt_nodiscard, Opt_discard_minblk
3573+ Opt_discard, Opt_nodiscard, Opt_discard_minblk,
d337f35e
JR
3574+ Opt_tag, Opt_notag, Opt_tagid
3575 };
3576
ec22aa5c 3577 static const match_table_t tokens = {
927ca606 3578@@ -216,6 +217,10 @@ static const match_table_t tokens = {
d337f35e
JR
3579 {Opt_resize, "resize=%u"},
3580 {Opt_resize_nosize, "resize"},
3581 {Opt_errors, "errors=%s"},
3582+ {Opt_tag, "tag"},
3583+ {Opt_notag, "notag"},
3584+ {Opt_tagid, "tagid=%u"},
3585+ {Opt_tag, "tagxid"},
3586 {Opt_ignore, "noquota"},
3587 {Opt_ignore, "quota"},
3588 {Opt_usrquota, "usrquota"},
927ca606 3589@@ -405,7 +410,20 @@ static int parse_options(char *options,
bb20add7 3590 pr_err("JFS: discard option not supported on device\n");
d337f35e
JR
3591 break;
3592 }
537831f9 3593-
d337f35e
JR
3594+#ifndef CONFIG_TAGGING_NONE
3595+ case Opt_tag:
3596+ *flag |= JFS_TAGGED;
3597+ break;
3598+ case Opt_notag:
3599+ *flag &= JFS_TAGGED;
3600+ break;
3601+#endif
3602+#ifdef CONFIG_PROPAGATE
3603+ case Opt_tagid:
3604+ /* use args[0] */
3605+ *flag |= JFS_TAGGED;
3606+ break;
3607+#endif
3608 default:
bb20add7
AM
3609 printk("jfs: Unrecognized mount option \"%s\" or missing value\n",
3610 p);
927ca606 3611@@ -437,6 +455,12 @@ static int jfs_remount(struct super_bloc
bb20add7 3612 if (!parse_options(data, sb, &newLVSize, &flag))
d337f35e 3613 return -EINVAL;
ab30d09f 3614
d337f35e
JR
3615+ if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
3616+ printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
3617+ sb->s_id);
3618+ return -EINVAL;
3619+ }
3620+
3621 if (newLVSize) {
3622 if (sb->s_flags & MS_RDONLY) {
bb20add7
AM
3623 pr_err("JFS: resize requires volume to be mounted read-write\n");
3624@@ -517,6 +541,9 @@ static int jfs_fill_super(struct super_b
d337f35e
JR
3625 #ifdef CONFIG_JFS_POSIX_ACL
3626 sb->s_flags |= MS_POSIXACL;
3627 #endif
3628+ /* map mount option tagxid */
3629+ if (sbi->flag & JFS_TAGGED)
3630+ sb->s_flags |= MS_TAGGED;
3631
3632 if (newLVSize) {
537831f9 3633 pr_err("resize option for remount only\n");
8931d859
AM
3634diff -NurpP --minimal linux-4.4.161/fs/libfs.c linux-4.4.161-vs2.3.9.8/fs/libfs.c
3635--- linux-4.4.161/fs/libfs.c 2016-01-10 23:01:32.000000000 +0000
3636+++ linux-4.4.161-vs2.3.9.8/fs/libfs.c 2018-10-20 04:57:21.000000000 +0000
927ca606 3637@@ -141,13 +141,14 @@ static inline unsigned char dt_type(stru
d337f35e
JR
3638 * both impossible due to the lock on directory.
3639 */
3640
c2e5f7c8 3641-int dcache_readdir(struct file *file, struct dir_context *ctx)
2380c486 3642+static inline int do_dcache_readdir_filter(struct file *filp,
c2e5f7c8 3643+ struct dir_context *ctx, int (*filter)(struct dentry *dentry))
d337f35e 3644 {
c2e5f7c8
JR
3645- struct dentry *dentry = file->f_path.dentry;
3646- struct dentry *cursor = file->private_data;
3647+ struct dentry *dentry = filp->f_path.dentry;
3648+ struct dentry *cursor = filp->private_data;
bb20add7 3649 struct list_head *p, *q = &cursor->d_child;
c2e5f7c8
JR
3650
3651- if (!dir_emit_dots(file, ctx))
3652+ if (!dir_emit_dots(filp, ctx))
3653 return 0;
3654 spin_lock(&dentry->d_lock);
3655 if (ctx->pos == 2)
927ca606 3656@@ -155,6 +156,8 @@ int dcache_readdir(struct file *file, st
c2e5f7c8
JR
3657
3658 for (p = q->next; p != &dentry->d_subdirs; p = p->next) {
bb20add7 3659 struct dentry *next = list_entry(p, struct dentry, d_child);
c2e5f7c8
JR
3660+ if (filter && !filter(next))
3661+ continue;
3662 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
3663 if (!simple_positive(next)) {
3664 spin_unlock(&next->d_lock);
927ca606 3665@@ -177,8 +180,22 @@ int dcache_readdir(struct file *file, st
c2e5f7c8 3666 spin_unlock(&dentry->d_lock);
d337f35e
JR
3667 return 0;
3668 }
c2e5f7c8
JR
3669+
3670 EXPORT_SYMBOL(dcache_readdir);
d337f35e 3671
c2e5f7c8 3672+int dcache_readdir(struct file *filp, struct dir_context *ctx)
d337f35e 3673+{
c2e5f7c8 3674+ return do_dcache_readdir_filter(filp, ctx, NULL);
d337f35e
JR
3675+}
3676+
c2e5f7c8
JR
3677+EXPORT_SYMBOL(dcache_readdir_filter);
3678+
3679+int dcache_readdir_filter(struct file *filp, struct dir_context *ctx,
d337f35e
JR
3680+ int (*filter)(struct dentry *))
3681+{
c2e5f7c8 3682+ return do_dcache_readdir_filter(filp, ctx, filter);
d337f35e 3683+}
d337f35e
JR
3684+
3685 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
3686 {
3687 return -EISDIR;
8931d859
AM
3688diff -NurpP --minimal linux-4.4.161/fs/locks.c linux-4.4.161-vs2.3.9.8/fs/locks.c
3689--- linux-4.4.161/fs/locks.c 2018-10-20 02:34:30.000000000 +0000
3690+++ linux-4.4.161-vs2.3.9.8/fs/locks.c 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8
JR
3691@@ -129,6 +129,8 @@
3692 #include <linux/hashtable.h>
3693 #include <linux/percpu.h>
3694 #include <linux/lglock.h>
d337f35e
JR
3695+#include <linux/vs_base.h>
3696+#include <linux/vs_limit.h>
3697
bb20add7
AM
3698 #define CREATE_TRACE_POINTS
3699 #include <trace/events/filelock.h>
927ca606 3700@@ -255,11 +257,15 @@ static void locks_init_lock_heads(struct
d337f35e 3701 /* Allocate an empty lock structure. */
ab30d09f 3702 struct file_lock *locks_alloc_lock(void)
d337f35e 3703 {
a168f21d 3704- struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
a0a3e0cf 3705+ struct file_lock *fl;
a168f21d
AM
3706
3707- if (fl)
3708- locks_init_lock_heads(fl);
a168f21d 3709+ fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
927ca606 3710
a168f21d
AM
3711+ if (fl) {
3712+ locks_init_lock_heads(fl);
927ca606 3713+ vx_locks_inc(fl);
a168f21d
AM
3714+ fl->fl_xid = -1;
3715+ }
3716 return fl;
3717 }
3718 EXPORT_SYMBOL_GPL(locks_alloc_lock);
927ca606 3719@@ -311,6 +317,7 @@ void locks_init_lock(struct file_lock *f
a168f21d
AM
3720 {
3721 memset(fl, 0, sizeof(struct file_lock));
3722 locks_init_lock_heads(fl);
3723+ fl->fl_xid = -1;
3724 }
3725
3726 EXPORT_SYMBOL(locks_init_lock);
927ca606 3727@@ -328,6 +335,7 @@ void locks_copy_conflock(struct file_loc
bb20add7
AM
3728 new->fl_start = fl->fl_start;
3729 new->fl_end = fl->fl_end;
d337f35e
JR
3730 new->fl_lmops = fl->fl_lmops;
3731+ new->fl_xid = fl->fl_xid;
bb20add7 3732 new->fl_ops = NULL;
d337f35e 3733
bb20add7 3734 if (fl->fl_lmops) {
927ca606 3735@@ -389,7 +397,10 @@ flock_make_lock(struct file *filp, unsig
d337f35e
JR
3736 fl->fl_flags = FL_FLOCK;
3737 fl->fl_type = type;
3738 fl->fl_end = OFFSET_MAX;
927ca606 3739-
d337f35e
JR
3740+
3741+ vxd_assert(filp->f_xid == vx_current_xid(),
3742+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3743+ fl->fl_xid = filp->f_xid;
bb20add7
AM
3744 return fl;
3745 }
927ca606
AM
3746
3747@@ -511,6 +522,7 @@ static int lease_init(struct file *filp,
d337f35e 3748
bb20add7 3749 fl->fl_owner = filp;
d337f35e
JR
3750 fl->fl_pid = current->tgid;
3751+ fl->fl_xid = vx_current_xid();
3752
3753 fl->fl_file = filp;
3754 fl->fl_flags = FL_LEASE;
927ca606 3755@@ -530,6 +542,10 @@ static struct file_lock *lease_alloc(str
d337f35e 3756 if (fl == NULL)
2380c486 3757 return ERR_PTR(error);
d337f35e
JR
3758
3759+ fl->fl_xid = vx_current_xid();
3760+ if (filp)
3761+ vxd_assert(filp->f_xid == fl->fl_xid,
3762+ "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
d337f35e
JR
3763 error = lease_init(filp, type, fl);
3764 if (error) {
3765 locks_free_lock(fl);
927ca606
AM
3766@@ -908,6 +924,7 @@ static int flock_lock_inode(struct inode
3767 goto out;
ab30d09f 3768 }
2380c486
JR
3769
3770+ new_fl->fl_xid = -1;
3771 find_conflict:
927ca606
AM
3772 list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
3773 if (!flock_locks_conflict(request, fl))
3774@@ -934,7 +951,8 @@ out:
d337f35e
JR
3775 return error;
3776 }
3777
2380c486
JR
3778-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
3779+static int __posix_lock_file(struct inode *inode, struct file_lock *request,
61333608 3780+ struct file_lock *conflock, vxid_t xid)
d337f35e 3781 {
927ca606 3782 struct file_lock *fl, *tmp;
d337f35e 3783 struct file_lock *new_fl = NULL;
927ca606
AM
3784@@ -950,6 +968,9 @@ static int __posix_lock_file(struct inod
3785 if (!ctx)
3786 return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
d337f35e 3787
927ca606
AM
3788+ if (xid)
3789+ vxd_assert(xid == vx_current_xid(),
3790+ "xid(%d) == current(%d)", xid, vx_current_xid());
d337f35e
JR
3791 /*
3792 * We may need two file_lock structures for this operation,
3793 * so we get them in advance to avoid races.
927ca606 3794@@ -960,7 +981,11 @@ static int __posix_lock_file(struct inod
d337f35e
JR
3795 (request->fl_type != F_UNLCK ||
3796 request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
3797 new_fl = locks_alloc_lock();
3798+ new_fl->fl_xid = xid;
927ca606 3799+ // vx_locks_inc(new_fl);
d337f35e
JR
3800 new_fl2 = locks_alloc_lock();
3801+ new_fl2->fl_xid = xid;
927ca606 3802+ // vx_locks_inc(new_fl2);
d337f35e
JR
3803 }
3804
927ca606
AM
3805 spin_lock(&ctx->flc_lock);
3806@@ -1162,7 +1187,8 @@ static int __posix_lock_file(struct inod
2380c486 3807 int posix_lock_file(struct file *filp, struct file_lock *fl,
d337f35e
JR
3808 struct file_lock *conflock)
3809 {
b00e13aa
AM
3810- return __posix_lock_file(file_inode(filp), fl, conflock);
3811+ return __posix_lock_file(file_inode(filp),
d337f35e
JR
3812+ fl, conflock, filp->f_xid);
3813 }
2380c486 3814 EXPORT_SYMBOL(posix_lock_file);
d337f35e 3815
927ca606
AM
3816@@ -1178,7 +1204,7 @@ static int posix_lock_inode_wait(struct
3817 int error;
3818 might_sleep ();
3819 for (;;) {
3820- error = __posix_lock_file(inode, fl, NULL);
3821+ error = __posix_lock_file(inode, fl, NULL, 0);
3822 if (error != FILE_LOCK_DEFERRED)
3823 break;
3824 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
3825@@ -1257,10 +1283,13 @@ int locks_mandatory_area(int read_write,
3826 fl.fl_end = offset + count - 1;
3827
3828 for (;;) {
3829+ vxid_t f_xid = 0;
3830+
ca5d134c 3831 if (filp) {
bb20add7 3832 fl.fl_owner = filp;
ca5d134c
JR
3833 fl.fl_flags &= ~FL_SLEEP;
3834- error = __posix_lock_file(inode, &fl, NULL);
927ca606
AM
3835+ f_xid = filp->f_xid;
3836+ error = __posix_lock_file(inode, &fl, NULL, f_xid);
ca5d134c
JR
3837 if (!error)
3838 break;
3839 }
927ca606 3840@@ -1268,7 +1297,7 @@ int locks_mandatory_area(int read_write,
ca5d134c
JR
3841 if (sleep)
3842 fl.fl_flags |= FL_SLEEP;
3843 fl.fl_owner = current->files;
2380c486 3844- error = __posix_lock_file(inode, &fl, NULL);
927ca606 3845+ error = __posix_lock_file(inode, &fl, NULL, f_xid);
2380c486 3846 if (error != FILE_LOCK_DEFERRED)
d337f35e 3847 break;
2380c486 3848 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
927ca606 3849@@ -2165,6 +2194,11 @@ int fcntl_setlk(unsigned int fd, struct
d337f35e
JR
3850 if (file_lock == NULL)
3851 return -ENOLCK;
3852
3853+ vxd_assert(filp->f_xid == vx_current_xid(),
3854+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3855+ file_lock->fl_xid = filp->f_xid;
927ca606 3856+ // vx_locks_inc(file_lock);
d337f35e
JR
3857+
3858 /*
3859 * This might block, so we do it before checking the inode.
3860 */
1d9ad342 3861@@ -2309,6 +2343,11 @@ int fcntl_setlk64(unsigned int fd, struc
d337f35e
JR
3862 if (file_lock == NULL)
3863 return -ENOLCK;
3864
3865+ vxd_assert(filp->f_xid == vx_current_xid(),
3866+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3867+ file_lock->fl_xid = filp->f_xid;
927ca606 3868+ // vx_locks_inc(file_lock);
d337f35e
JR
3869+
3870 /*
3871 * This might block, so we do it before checking the inode.
3872 */
1d9ad342 3873@@ -2624,8 +2663,11 @@ static int locks_show(struct seq_file *f
2380c486 3874
c2e5f7c8 3875 lock_get_status(f, fl, iter->li_pos, "");
2380c486
JR
3876
3877- list_for_each_entry(bfl, &fl->fl_block, fl_block)
3878+ list_for_each_entry(bfl, &fl->fl_block, fl_block) {
3879+ if (!vx_check(fl->fl_xid, VS_WATCH_P | VS_IDENT))
d337f35e 3880+ continue;
bb20add7 3881 lock_get_status(f, bfl, iter->li_pos, " ->");
2380c486 3882+ }
d337f35e 3883
2380c486 3884 return 0;
ab30d09f 3885 }
8931d859
AM
3886diff -NurpP --minimal linux-4.4.161/fs/mount.h linux-4.4.161-vs2.3.9.8/fs/mount.h
3887--- linux-4.4.161/fs/mount.h 2018-10-20 02:34:30.000000000 +0000
3888+++ linux-4.4.161-vs2.3.9.8/fs/mount.h 2018-10-20 04:57:21.000000000 +0000
927ca606 3889@@ -68,6 +68,7 @@ struct mount {
bb20add7 3890 struct hlist_head mnt_pins;
927ca606
AM
3891 struct fs_pin mnt_umount;
3892 struct dentry *mnt_ex_mountpoint;
61333608 3893+ vtag_t mnt_tag; /* tagging used for vfsmount */
db55b927
AM
3894 };
3895
92598135 3896 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
8931d859
AM
3897diff -NurpP --minimal linux-4.4.161/fs/namei.c linux-4.4.161-vs2.3.9.8/fs/namei.c
3898--- linux-4.4.161/fs/namei.c 2018-10-20 02:34:30.000000000 +0000
3899+++ linux-4.4.161-vs2.3.9.8/fs/namei.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 3900@@ -34,10 +34,20 @@
2380c486 3901 #include <linux/device_cgroup.h>
ec22aa5c 3902 #include <linux/fs_struct.h>
a168f21d 3903 #include <linux/posix_acl.h>
d337f35e 3904+#include <linux/proc_fs.h>
09be7631 3905+#include <linux/magic.h>
d337f35e
JR
3906+#include <linux/vserver/inode.h>
3907+#include <linux/vs_base.h>
3908+#include <linux/vs_tag.h>
3909+#include <linux/vs_cowbl.h>
2380c486
JR
3910+#include <linux/vs_device.h>
3911+#include <linux/vs_context.h>
3912+#include <linux/pid_namespace.h>
bb20add7 3913 #include <linux/hash.h>
d337f35e
JR
3914 #include <asm/uaccess.h>
3915
2bf5ad28 3916 #include "internal.h"
09be7631
JR
3917+#include "proc/internal.h"
3918 #include "mount.h"
3919
3920 /* [Feb-1997 T. Schoebel-Theuer]
8931d859 3921@@ -284,6 +294,93 @@ static int check_acl(struct inode *inode
a168f21d
AM
3922 return -EAGAIN;
3923 }
d337f35e 3924
7e46296a 3925+static inline int dx_barrier(const struct inode *inode)
d337f35e 3926+{
2380c486
JR
3927+ if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN | VS_WATCH)) {
3928+ vxwprintk_task(1, "did hit the barrier.");
d337f35e
JR
3929+ return 1;
3930+ }
3931+ return 0;
3932+}
3933+
7e46296a 3934+static int __dx_permission(const struct inode *inode, int mask)
d337f35e
JR
3935+{
3936+ if (dx_barrier(inode))
3937+ return -EACCES;
d337f35e 3938+
2380c486
JR
3939+ if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) {
3940+ /* devpts is xid tagged */
3941+ if (S_ISDIR(inode->i_mode) ||
61333608 3942+ vx_check((vxid_t)i_tag_read(inode), VS_IDENT | VS_WATCH_P))
2380c486 3943+ return 0;
ba86f833 3944+
adc1caaa 3945+ /* just pretend we didn't find anything */
ba86f833 3946+ return -ENOENT;
2380c486
JR
3947+ }
3948+ else if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
3949+ struct proc_dir_entry *de = PDE(inode);
3950+
bb20add7
AM
3951+ if (de && !vx_hide_check(0, de->vx_flags)) {
3952+ vxdprintk(VXD_CBIT(misc, 9),
3953+ VS_Q("%*s") " hidden by _dx_permission",
3954+ de->namelen, de->name);
2380c486 3955+ goto out;
bb20add7 3956+ }
2380c486
JR
3957+
3958+ if ((mask & (MAY_WRITE | MAY_APPEND))) {
3959+ struct pid *pid;
3960+ struct task_struct *tsk;
3961+
3962+ if (vx_check(0, VS_ADMIN | VS_WATCH_P) ||
3963+ vx_flags(VXF_STATE_SETUP, 0))
3964+ return 0;
3965+
3966+ pid = PROC_I(inode)->pid;
3967+ if (!pid)
3968+ goto out;
3969+
c6ceaf95 3970+ rcu_read_lock();
2380c486
JR
3971+ tsk = pid_task(pid, PIDTYPE_PID);
3972+ vxdprintk(VXD_CBIT(tag, 0), "accessing %p[#%u]",
3973+ tsk, (tsk ? vx_task_xid(tsk) : 0));
c6ceaf95
AM
3974+ if (tsk &&
3975+ vx_check(vx_task_xid(tsk), VS_IDENT | VS_WATCH_P)) {
3976+ rcu_read_unlock();
2380c486 3977+ return 0;
c6ceaf95
AM
3978+ }
3979+ rcu_read_unlock();
2380c486
JR
3980+ }
3981+ else {
3982+ /* FIXME: Should we block some entries here? */
3983+ return 0;
3984+ }
3985+ }
3986+ else {
3987+ if (dx_notagcheck(inode->i_sb) ||
61333608 3988+ dx_check((vxid_t)i_tag_read(inode),
537831f9 3989+ DX_HOSTID | DX_ADMIN | DX_WATCH | DX_IDENT))
2380c486
JR
3990+ return 0;
3991+ }
3992+
3993+out:
d337f35e
JR
3994+ return -EACCES;
3995+}
3996+
7e46296a 3997+int dx_permission(const struct inode *inode, int mask)
2380c486
JR
3998+{
3999+ int ret = __dx_permission(inode, mask);
4000+ if (unlikely(ret)) {
ba86f833
AM
4001+#ifndef CONFIG_VSERVER_WARN_DEVPTS
4002+ if (inode->i_sb->s_magic != DEVPTS_SUPER_MAGIC)
4003+#endif
4004+ vxwprintk_task(1,
4005+ "denied [0x%x] access to inode %s:%p[#%d,%lu]",
8ce283e1
AM
4006+ mask, inode->i_sb->s_id, inode,
4007+ i_tag_read(inode), inode->i_ino);
2380c486
JR
4008+ }
4009+ return ret;
4010+}
4011+
7e46296a 4012 /*
f6c5ef8b 4013 * This does the basic permission checking
7e46296a 4014 */
8931d859 4015@@ -408,10 +505,14 @@ int __inode_permission(struct inode *ino
d337f35e
JR
4016 /*
4017 * Nobody gets write access to an immutable file.
4018 */
4019- if (IS_IMMUTABLE(inode))
4020+ if (IS_IMMUTABLE(inode) && !IS_COW(inode))
4021 return -EACCES;
4022 }
4023
2380c486
JR
4024+ retval = dx_permission(inode, mask);
4025+ if (retval)
d337f35e 4026+ return retval;
2380c486 4027+
a168f21d
AM
4028 retval = do_inode_permission(inode, mask);
4029 if (retval)
4030 return retval;
8931d859 4031@@ -1585,6 +1686,9 @@ static int lookup_fast(struct nameidata
927ca606
AM
4032 */
4033 if (negative)
4034 return -ENOENT;
be261992
AM
4035+
4036+ /* FIXME: check dx permission */
4037+
4038 path->mnt = mnt;
4039 path->dentry = dentry;
927ca606 4040 if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
8931d859 4041@@ -1615,6 +1719,8 @@ unlazy:
927ca606
AM
4042 dput(dentry);
4043 return -ENOENT;
be261992 4044 }
be261992 4045+
927ca606 4046+ /* FIXME: check dx permission */
be261992
AM
4047 path->mnt = mnt;
4048 path->dentry = dentry;
927ca606 4049 err = follow_managed(path, nd);
8931d859 4050@@ -2576,7 +2682,7 @@ static int may_delete(struct inode *dir,
d337f35e 4051 return -EPERM;
c2e5f7c8
JR
4052
4053 if (check_sticky(dir, inode) || IS_APPEND(inode) ||
4054- IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
4055+ IS_IXORUNLINK(inode) || IS_SWAPFILE(inode))
d337f35e
JR
4056 return -EPERM;
4057 if (isdir) {
bb20add7 4058 if (!d_is_dir(victim))
8931d859 4059@@ -2658,19 +2764,25 @@ int vfs_create(struct inode *dir, struct
92598135 4060 bool want_excl)
a168f21d
AM
4061 {
4062 int error = may_create(dir, dentry);
a168f21d
AM
4063- if (error)
4064+ if (error) {
4065+ vxdprintk(VXD_CBIT(misc, 3), "may_create failed with %d", error);
537831f9 4066 return error;
a168f21d
AM
4067+ }
4068
4069 if (!dir->i_op->create)
4070 return -EACCES; /* shouldn't it be ENOSYS? */
4071 mode &= S_IALLUGO;
4072 mode |= S_IFREG;
4073 error = security_inode_create(dir, dentry, mode);
4074- if (error)
4075+ if (error) {
4076+ vxdprintk(VXD_CBIT(misc, 3), "security_inode_create failed with %d", error);
537831f9 4077 return error;
a168f21d 4078+ }
92598135 4079 error = dir->i_op->create(dir, dentry, mode, want_excl);
a168f21d
AM
4080 if (!error)
4081 fsnotify_create(dir, dentry);
4082+ else
4083+ vxdprintk(VXD_CBIT(misc, 3), "i_op->create failed with %d", error);
4084 return error;
4085 }
bb20add7 4086 EXPORT_SYMBOL(vfs_create);
8931d859 4087@@ -2706,6 +2818,15 @@ static int may_open(struct path *path, i
ec22aa5c 4088 break;
2380c486 4089 }
d337f35e
JR
4090
4091+#ifdef CONFIG_VSERVER_COWBL
763640ca
JR
4092+ if (IS_COW(inode) &&
4093+ ((flag & O_ACCMODE) != O_RDONLY)) {
d337f35e
JR
4094+ if (IS_COW_LINK(inode))
4095+ return -EMLINK;
2380c486 4096+ inode->i_flags &= ~(S_IXUNLINK|S_IMMUTABLE);
d337f35e
JR
4097+ mark_inode_dirty(inode);
4098+ }
4099+#endif
ec22aa5c 4100 error = inode_permission(inode, acc_mode);
d337f35e
JR
4101 if (error)
4102 return error;
8931d859 4103@@ -3183,6 +3304,16 @@ finish_open:
7b17263b 4104 }
92598135 4105 finish_open_created:
7b17263b
AM
4106 error = may_open(&nd->path, acc_mode, open_flag);
4107+#ifdef CONFIG_VSERVER_COWBL
4108+ if (error == -EMLINK) {
4109+ struct dentry *dentry;
f19bd705 4110+ dentry = cow_break_link(nd->name->name);
7b17263b
AM
4111+ if (IS_ERR(dentry))
4112+ error = PTR_ERR(dentry);
4113+ else
4114+ dput(dentry);
4115+ }
4116+#endif
4117 if (error)
92598135 4118 goto out;
bb20add7 4119
8931d859 4120@@ -3307,6 +3438,9 @@ static struct file *path_openat(struct n
92598135 4121 int opened = 0;
7b17263b
AM
4122 int error;
4123
927ca606 4124+#ifdef CONFIG_VSERVER_COWBL
7b17263b 4125+restart:
927ca606 4126+#endif
92598135 4127 file = get_empty_filp();
b00e13aa
AM
4128 if (IS_ERR(file))
4129 return file;
8931d859 4130@@ -3333,6 +3467,12 @@ static struct file *path_openat(struct n
f19bd705
AM
4131 }
4132 }
4133 terminate_walk(nd);
4134+#ifdef CONFIG_VSERVER_COWBL
4135+ if (error == -EMLINK) {
4136+ // path_cleanup(nd);
4137+ goto restart;
4138+ }
4139+#endif
4140 out2:
4141 if (!(opened & FILE_OPENED)) {
4142 BUG_ON(!error);
8931d859 4143@@ -3453,6 +3593,11 @@ static struct dentry *filename_create(in
a168f21d
AM
4144 goto fail;
4145 }
927ca606
AM
4146 putname(name);
4147+ vxdprintk(VXD_CBIT(misc, 3), "filename_create path.dentry = %p (%.*s), dentry = %p (%.*s), d_inode = %p",
a168f21d
AM
4148+ path->dentry, path->dentry->d_name.len,
4149+ path->dentry->d_name.name, dentry,
4150+ dentry->d_name.len, dentry->d_name.name,
4151+ path->dentry->d_inode);
4152 return dentry;
92598135 4153 fail:
a168f21d 4154 dput(dentry);
8931d859 4155@@ -3569,6 +3714,7 @@ retry:
927ca606
AM
4156 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
4157 break;
4158 }
4159+
927ca606
AM
4160 out:
4161 done_path_create(&path, dentry);
4162 if (retry_estale(error, lookup_flags)) {
8931d859 4163@@ -4015,7 +4161,7 @@ int vfs_link(struct dentry *old_dentry,
d337f35e
JR
4164 /*
4165 * A link to an append-only or immutable file cannot be created.
4166 */
4167- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4168+ if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
4169 return -EPERM;
ec22aa5c 4170 if (!dir->i_op->link)
d337f35e 4171 return -EPERM;
8931d859 4172@@ -4524,6 +4670,330 @@ int generic_readlink(struct dentry *dent
d337f35e 4173 }
bb20add7 4174 EXPORT_SYMBOL(generic_readlink);
d337f35e
JR
4175
4176+
4177+#ifdef CONFIG_VSERVER_COWBL
4178+
2380c486 4179+static inline
8de2f54c
AM
4180+void dump_path(const char *name, struct path *path)
4181+{
4182+ vxdprintk(VXD_CBIT(misc, 3),
4183+ "%s: path=%p mnt=%p dentry=%p", name, path,
4184+ path ? path->mnt : NULL,
4185+ path ? path->dentry : NULL);
4186+
4187+ if (path && path->mnt)
4188+ vxdprintk(VXD_CBIT(misc, 3),
4189+ "%s: path mnt_sb=%p[#%d,#%d] mnt_root=%p[#%d]", name,
4190+ path->mnt->mnt_sb,
4191+ path->mnt->mnt_sb ? path->mnt->mnt_sb->s_count : -1,
4192+ path->mnt->mnt_sb ? atomic_read(&path->mnt->mnt_sb->s_active) : -1,
4193+ path->mnt->mnt_root,
4194+ path->mnt->mnt_root ? path->mnt->mnt_root->d_lockref.count : -1);
4195+
4196+ if (path && path->dentry)
4197+ vxdprintk(VXD_CBIT(misc, 3),
4198+ "%s: path dentry=%p[#%d]", name,
4199+ path->dentry,
4200+ path->dentry ? path->dentry->d_lockref.count : -1);
4201+}
4202+
4203+static inline
2380c486
JR
4204+long do_cow_splice(struct file *in, struct file *out, size_t len)
4205+{
4206+ loff_t ppos = 0;
09be7631 4207+ loff_t opos = 0;
2380c486 4208+
09be7631 4209+ return do_splice_direct(in, &ppos, out, &opos, len, 0);
2380c486
JR
4210+}
4211+
d337f35e
JR
4212+struct dentry *cow_break_link(const char *pathname)
4213+{
b00e13aa 4214+ int ret, mode, pathlen, redo = 0, drop = 1;
8de2f54c 4215+ struct path old_path = {}, par_path = {}, dir_path = {}, *new_path = NULL;
a168f21d 4216+ struct dentry *dir, *old_dentry, *new_dentry = NULL;
d337f35e
JR
4217+ struct file *old_file;
4218+ struct file *new_file;
8de2f54c
AM
4219+ struct qstr new_qstr;
4220+ int new_type;
d337f35e
JR
4221+ char *to, *path, pad='\251';
4222+ loff_t size;
927ca606
AM
4223+ struct filename *filename = getname_kernel(pathname);
4224+ struct filename *to_filename;
d337f35e 4225+
ba86f833
AM
4226+ vxdprintk(VXD_CBIT(misc, 1),
4227+ "cow_break_link(" VS_Q("%s") ")", pathname);
e915af4e 4228+
d337f35e 4229+ path = kmalloc(PATH_MAX, GFP_KERNEL);
2380c486 4230+ ret = -ENOMEM;
927ca606 4231+ if (!path || IS_ERR(filename))
2380c486 4232+ goto out;
d337f35e 4233+
8de2f54c
AM
4234+ /* old_path will have refs to dentry and mnt */
4235+ ret = filename_lookup(AT_FDCWD, filename, LOOKUP_FOLLOW, &old_path, NULL);
a168f21d 4236+ vxdprintk(VXD_CBIT(misc, 2),
e915af4e 4237+ "do_path_lookup(old): %d", ret);
2380c486
JR
4238+ if (ret < 0)
4239+ goto out_free_path;
d337f35e 4240+
8de2f54c
AM
4241+ dump_path("cow (old)", &old_path);
4242+
e915af4e 4243+ /* no explicit reference for old_dentry here */
8de2f54c
AM
4244+ old_dentry = old_path.dentry;
4245+
4246+ /* speculative put */
4247+ // dput(old_dentry);
2380c486 4248+
e915af4e 4249+ mode = old_dentry->d_inode->i_mode;
8de2f54c 4250+ to = d_path(&old_path, path, PATH_MAX-2);
d337f35e 4251+ pathlen = strlen(to);
ba86f833 4252+ vxdprintk(VXD_CBIT(misc, 2),
a168f21d
AM
4253+ "old path " VS_Q("%s") " [%p:" VS_Q("%.*s") ":%d]", to,
4254+ old_dentry,
4255+ old_dentry->d_name.len, old_dentry->d_name.name,
4256+ old_dentry->d_name.len);
d337f35e 4257+
2380c486 4258+ to[pathlen + 1] = 0;
d337f35e 4259+retry:
a168f21d 4260+ new_dentry = NULL;
d337f35e 4261+ to[pathlen] = pad--;
a168f21d 4262+ ret = -ELOOP;
d337f35e
JR
4263+ if (pad <= '\240')
4264+ goto out_rel_old;
4265+
ba86f833 4266+ vxdprintk(VXD_CBIT(misc, 1), "temp copy " VS_Q("%s"), to);
e915af4e 4267+
8de2f54c 4268+ /* dir_path will have refs to dentry and mnt */
927ca606 4269+ to_filename = getname_kernel(to);
8de2f54c
AM
4270+ to_filename = filename_parentat(AT_FDCWD, to_filename,
4271+ LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE, &par_path, &new_qstr, &new_type);
4272+ vxdprintk(VXD_CBIT(misc, 2), "filename_parentat(new): %p", to_filename);
4273+ dump_path("cow (par)", &par_path);
4274+ if (IS_ERR(to_filename))
2380c486
JR
4275+ goto retry;
4276+
8de2f54c
AM
4277+ vxdprintk(VXD_CBIT(misc, 2), "to_filename refcnt=%d", to_filename->refcnt);
4278+ // putname(to_filename);
4279+
e915af4e
AM
4280+ /* this puppy downs the dir inode mutex if successful.
4281+ dir_path will hold refs to dentry and mnt and
b00e13aa 4282+ we'll have write access to the mnt */
8de2f54c 4283+ new_dentry = filename_create(AT_FDCWD, to_filename, &dir_path, 0);
a168f21d 4284+ if (!new_dentry || IS_ERR(new_dentry)) {
8de2f54c 4285+ path_put(&par_path);
a168f21d 4286+ vxdprintk(VXD_CBIT(misc, 2),
8de2f54c 4287+ "filename_create(new) failed with %ld",
a168f21d 4288+ PTR_ERR(new_dentry));
d337f35e
JR
4289+ goto retry;
4290+ }
2380c486 4291+ vxdprintk(VXD_CBIT(misc, 2),
8de2f54c 4292+ "filename_create(new): %p [" VS_Q("%.*s") ":%d]",
a168f21d
AM
4293+ new_dentry,
4294+ new_dentry->d_name.len, new_dentry->d_name.name,
4295+ new_dentry->d_name.len);
4296+
8de2f54c
AM
4297+ dump_path("cow (dir)", &dir_path);
4298+
e915af4e
AM
4299+ /* take a reference on new_dentry */
4300+ dget(new_dentry);
4301+
4302+ /* dentry/mnt refs handed over to new_path */
4303+ new_path = &dir_path;
4304+
4305+ /* dentry for old/new dir */
8de2f54c 4306+ dir = par_path.dentry;
d337f35e 4307+
e915af4e
AM
4308+ /* give up reference on dir */
4309+ dput(new_path->dentry);
4310+
4311+ /* new_dentry already has a reference */
4312+ new_path->dentry = new_dentry;
4313+
4314+ ret = vfs_create(dir->d_inode, new_dentry, mode, 1);
d337f35e
JR
4315+ vxdprintk(VXD_CBIT(misc, 2),
4316+ "vfs_create(new): %d", ret);
4317+ if (ret == -EEXIST) {
8de2f54c 4318+ path_put(&par_path);
b00e13aa 4319+ mutex_unlock(&dir->d_inode->i_mutex);
e915af4e
AM
4320+ mnt_drop_write(new_path->mnt);
4321+ path_put(new_path);
4322+ new_dentry = NULL;
d337f35e
JR
4323+ goto retry;
4324+ }
2380c486
JR
4325+ else if (ret < 0)
4326+ goto out_unlock_new;
4327+
927ca606 4328+ /* the old file went away */
2380c486 4329+ ret = -ENOENT;
a168f21d 4330+ if ((redo = d_unhashed(old_dentry)))
2380c486
JR
4331+ goto out_unlock_new;
4332+
e915af4e 4333+ /* doesn't change refs for old_path */
8de2f54c 4334+ old_file = dentry_open(&old_path, O_RDONLY, current_cred());
d337f35e
JR
4335+ vxdprintk(VXD_CBIT(misc, 2),
4336+ "dentry_open(old): %p", old_file);
a168f21d
AM
4337+ if (IS_ERR(old_file)) {
4338+ ret = PTR_ERR(old_file);
2380c486
JR
4339+ goto out_unlock_new;
4340+ }
d337f35e 4341+
e915af4e
AM
4342+ /* doesn't change refs for new_path */
4343+ new_file = dentry_open(new_path, O_WRONLY, current_cred());
d337f35e
JR
4344+ vxdprintk(VXD_CBIT(misc, 2),
4345+ "dentry_open(new): %p", new_file);
a168f21d
AM
4346+ if (IS_ERR(new_file)) {
4347+ ret = PTR_ERR(new_file);
d337f35e 4348+ goto out_fput_old;
a168f21d 4349+ }
d337f35e 4350+
8de2f54c 4351+ /* unlock the inode mutex from filename_create() */
b00e13aa
AM
4352+ mutex_unlock(&dir->d_inode->i_mutex);
4353+
4354+ /* drop write access to mnt */
4355+ mnt_drop_write(new_path->mnt);
4356+
4357+ drop = 0;
4358+
927ca606 4359+ size = i_size_read(old_file->f_path.dentry->d_inode);
2380c486
JR
4360+ ret = do_cow_splice(old_file, new_file, size);
4361+ vxdprintk(VXD_CBIT(misc, 2), "do_splice_direct: %d", ret);
4362+ if (ret < 0) {
d337f35e 4363+ goto out_fput_both;
2380c486
JR
4364+ } else if (ret < size) {
4365+ ret = -ENOSPC;
4366+ goto out_fput_both;
4367+ } else {
a168f21d
AM
4368+ struct inode *old_inode = old_dentry->d_inode;
4369+ struct inode *new_inode = new_dentry->d_inode;
2380c486
JR
4370+ struct iattr attr = {
4371+ .ia_uid = old_inode->i_uid,
4372+ .ia_gid = old_inode->i_gid,
4373+ .ia_valid = ATTR_UID | ATTR_GID
4374+ };
4375+
93de0823
AM
4376+ setattr_copy(new_inode, &attr);
4377+ mark_inode_dirty(new_inode);
2380c486 4378+ }
d337f35e 4379+
e915af4e 4380+ /* lock rename mutex */
a168f21d 4381+ mutex_lock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
2380c486
JR
4382+
4383+ /* drop out late */
4384+ ret = -ENOENT;
a168f21d 4385+ if ((redo = d_unhashed(old_dentry)))
2380c486
JR
4386+ goto out_unlock;
4387+
4388+ vxdprintk(VXD_CBIT(misc, 2),
ba86f833 4389+ "vfs_rename: [" VS_Q("%*s") ":%d] -> [" VS_Q("%*s") ":%d]",
a168f21d
AM
4390+ new_dentry->d_name.len, new_dentry->d_name.name,
4391+ new_dentry->d_name.len,
4392+ old_dentry->d_name.len, old_dentry->d_name.name,
4393+ old_dentry->d_name.len);
8de2f54c 4394+ ret = vfs_rename(par_path.dentry->d_inode, new_dentry,
eafa5b1d 4395+ old_dentry->d_parent->d_inode, old_dentry, NULL, 0);
d337f35e 4396+ vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
2380c486
JR
4397+
4398+out_unlock:
a168f21d 4399+ mutex_unlock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
d337f35e
JR
4400+
4401+out_fput_both:
4402+ vxdprintk(VXD_CBIT(misc, 3),
2380c486 4403+ "fput(new_file=%p[#%ld])", new_file,
4a036bed 4404+ atomic_long_read(&new_file->f_count));
d337f35e
JR
4405+ fput(new_file);
4406+
4407+out_fput_old:
4408+ vxdprintk(VXD_CBIT(misc, 3),
2380c486 4409+ "fput(old_file=%p[#%ld])", old_file,
4a036bed 4410+ atomic_long_read(&old_file->f_count));
d337f35e
JR
4411+ fput(old_file);
4412+
2380c486 4413+out_unlock_new:
8de2f54c
AM
4414+ /* drop references from par_path */
4415+ path_put(&par_path);
e915af4e 4416+
b00e13aa 4417+ if (drop) {
8de2f54c 4418+ /* unlock the inode mutex from filename_create() */
b00e13aa
AM
4419+ mutex_unlock(&dir->d_inode->i_mutex);
4420+
4421+ /* drop write access to mnt */
4422+ mnt_drop_write(new_path->mnt);
4423+ }
e915af4e 4424+
2380c486
JR
4425+ if (!ret)
4426+ goto out_redo;
4427+
4428+ /* error path cleanup */
c2e5f7c8 4429+ vfs_unlink(dir->d_inode, new_dentry, NULL);
2380c486
JR
4430+
4431+out_redo:
4432+ if (!redo)
4433+ goto out_rel_both;
e915af4e
AM
4434+
4435+ /* lookup dentry once again
8de2f54c
AM
4436+ old_path will be freed as old_path in out_rel_old */
4437+ ret = filename_lookup(AT_FDCWD, filename, LOOKUP_FOLLOW, &old_path, NULL);
2380c486
JR
4438+ if (ret)
4439+ goto out_rel_both;
d337f35e 4440+
e915af4e 4441+ /* drop reference on new_dentry */
a168f21d 4442+ dput(new_dentry);
8de2f54c 4443+ new_dentry = old_path.dentry;
e915af4e 4444+ dget(new_dentry);
2380c486 4445+ vxdprintk(VXD_CBIT(misc, 2),
763640ca 4446+ "do_path_lookup(redo): %p [" VS_Q("%.*s") ":%d]",
a168f21d
AM
4447+ new_dentry,
4448+ new_dentry->d_name.len, new_dentry->d_name.name,
4449+ new_dentry->d_name.len);
2380c486
JR
4450+
4451+out_rel_both:
8de2f54c 4452+ dump_path("put (new)", new_path);
e915af4e
AM
4453+ if (new_path)
4454+ path_put(new_path);
d337f35e 4455+out_rel_old:
8de2f54c
AM
4456+ dump_path("put (old)", &old_path);
4457+ path_put(&old_path);
2380c486 4458+out_free_path:
d337f35e 4459+ kfree(path);
2380c486 4460+out:
a168f21d
AM
4461+ if (ret) {
4462+ dput(new_dentry);
4463+ new_dentry = ERR_PTR(ret);
4464+ }
8de2f54c
AM
4465+ // if (!IS_ERR(filename))
4466+ // putname(filename);
a168f21d 4467+ vxdprintk(VXD_CBIT(misc, 3),
e915af4e 4468+ "cow_break_link returning with %p", new_dentry);
a168f21d 4469+ return new_dentry;
d337f35e
JR
4470+}
4471+
4472+#endif
1e8b8f9b
AM
4473+
4474+int vx_info_mnt_namespace(struct mnt_namespace *ns, char *buffer)
4475+{
4476+ struct path path;
4477+ struct vfsmount *vmnt;
4478+ char *pstr, *root;
4479+ int length = 0;
4480+
4481+ pstr = kmalloc(PATH_MAX, GFP_KERNEL);
4482+ if (!pstr)
4483+ return 0;
4484+
4485+ vmnt = &ns->root->mnt;
4486+ path.mnt = vmnt;
4487+ path.dentry = vmnt->mnt_root;
4488+ root = d_path(&path, pstr, PATH_MAX - 2);
4489+ length = sprintf(buffer + length,
4490+ "Namespace:\t%p [#%u]\n"
4491+ "RootPath:\t%s\n",
4492+ ns, atomic_read(&ns->count),
4493+ root);
4494+ kfree(pstr);
4495+ return length;
4496+}
bb20add7 4497+
265de2f7 4498+EXPORT_SYMBOL(vx_info_mnt_namespace);
d337f35e
JR
4499+
4500 /* get the link contents into pagecache */
4501 static char *page_getlink(struct dentry * dentry, struct page **ppage)
4502 {
8931d859
AM
4503diff -NurpP --minimal linux-4.4.161/fs/namespace.c linux-4.4.161-vs2.3.9.8/fs/namespace.c
4504--- linux-4.4.161/fs/namespace.c 2018-10-20 02:34:30.000000000 +0000
4505+++ linux-4.4.161-vs2.3.9.8/fs/namespace.c 2018-10-20 05:50:20.000000000 +0000
978063ce 4506@@ -24,6 +24,11 @@
09be7631 4507 #include <linux/magic.h>
52afa9bd 4508 #include <linux/bootmem.h>
bb20add7 4509 #include <linux/task_work.h>
d337f35e 4510+#include <linux/vs_base.h>
d337f35e
JR
4511+#include <linux/vs_context.h>
4512+#include <linux/vs_tag.h>
2380c486
JR
4513+#include <linux/vserver/space.h>
4514+#include <linux/vserver/global.h>
d337f35e 4515 #include "pnode.h"
db55b927
AM
4516 #include "internal.h"
4517
8931d859 4518@@ -980,6 +985,10 @@ vfs_kern_mount(struct file_system_type *
be261992
AM
4519 if (!type)
4520 return ERR_PTR(-ENODEV);
4521
4522+ if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
4523+ !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
4524+ return ERR_PTR(-EPERM);
4525+
4526 mnt = alloc_vfsmnt(name);
4527 if (!mnt)
4528 return ERR_PTR(-ENOMEM);
8931d859 4529@@ -1056,6 +1065,7 @@ static struct mount *clone_mnt(struct mo
92598135
AM
4530 mnt->mnt.mnt_root = dget(root);
4531 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
4532 mnt->mnt_parent = mnt;
c2e5f7c8
JR
4533+ mnt->mnt_tag = old->mnt_tag;
4534 lock_mount_hash();
92598135 4535 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
c2e5f7c8 4536 unlock_mount_hash();
8931d859 4537@@ -1645,7 +1655,8 @@ out_unlock:
c2e5f7c8
JR
4538 */
4539 static inline bool may_mount(void)
4540 {
4541- return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
4542+ return vx_ns_capable(current->nsproxy->mnt_ns->user_ns,
4543+ CAP_SYS_ADMIN, VXC_SECURE_MOUNT);
4544 }
4545
4546 /*
8931d859 4547@@ -2146,6 +2157,7 @@ static int do_change_type(struct path *p
763640ca
JR
4548 if (err)
4549 goto out_unlock;
4550 }
4551+ // mnt->mnt_flags = mnt_flags;
4552
c2e5f7c8 4553 lock_mount_hash();
763640ca 4554 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
8931d859 4555@@ -2174,12 +2186,14 @@ static bool has_locked_children(struct m
ec22aa5c 4556 * do loopback mount.
d337f35e 4557 */
537831f9 4558 static int do_loopback(struct path *path, const char *old_name,
2380c486 4559- int recurse)
61333608 4560+ vtag_t tag, unsigned long flags, int mnt_flags)
d337f35e 4561 {
ec22aa5c 4562 struct path old_path;
09be7631
JR
4563 struct mount *mnt = NULL, *old, *parent;
4564 struct mountpoint *mp;
d337f35e 4565+ int recurse = flags & MS_REC;
b00e13aa 4566 int err;
2380c486 4567+
d337f35e 4568 if (!old_name || !*old_name)
b00e13aa
AM
4569 return -EINVAL;
4570 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
8931d859 4571@@ -2259,7 +2273,7 @@ static int change_mount_flags(struct vfs
ec22aa5c 4572 * on it - tough luck.
d337f35e 4573 */
ec22aa5c 4574 static int do_remount(struct path *path, int flags, int mnt_flags,
d337f35e 4575- void *data)
61333608 4576+ void *data, vxid_t xid)
d337f35e
JR
4577 {
4578 int err;
ec22aa5c 4579 struct super_block *sb = path->mnt->mnt_sb;
8931d859 4580@@ -2767,6 +2781,7 @@ long do_mount(const char *dev_name, cons
ec22aa5c 4581 struct path path;
d337f35e
JR
4582 int retval = 0;
4583 int mnt_flags = 0;
61333608 4584+ vtag_t tag = 0;
d337f35e
JR
4585
4586 /* Discard magic */
4587 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
8931d859 4588@@ -2792,6 +2807,12 @@ long do_mount(const char *dev_name, cons
ec22aa5c
AM
4589 if (!(flags & MS_NOATIME))
4590 mnt_flags |= MNT_RELATIME;
d337f35e 4591
2380c486
JR
4592+ if (dx_parse_tag(data_page, &tag, 1, &mnt_flags, &flags)) {
4593+ /* FIXME: bind and re-mounts get the tag flag? */
d337f35e
JR
4594+ if (flags & (MS_BIND|MS_REMOUNT))
4595+ flags |= MS_TAGID;
4596+ }
d337f35e
JR
4597+
4598 /* Separate the per-mountpoint flags */
d337f35e
JR
4599 if (flags & MS_NOSUID)
4600 mnt_flags |= MNT_NOSUID;
8931d859 4601@@ -2816,15 +2837,17 @@ long do_mount(const char *dev_name, cons
bb20add7
AM
4602 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
4603 }
d337f35e 4604
b00e13aa 4605+ if (!vx_capable(CAP_SYS_ADMIN, VXC_DEV_MOUNT))
d337f35e 4606+ mnt_flags |= MNT_NODEV;
c146dd73 4607 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
ec22aa5c
AM
4608 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
4609 MS_STRICTATIME);
d337f35e
JR
4610
4611 if (flags & MS_REMOUNT)
ec22aa5c 4612 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
d337f35e
JR
4613- data_page);
4614+ data_page, tag);
4615 else if (flags & MS_BIND)
ec22aa5c
AM
4616- retval = do_loopback(&path, dev_name, flags & MS_REC);
4617+ retval = do_loopback(&path, dev_name, tag, flags, mnt_flags);
d337f35e 4618 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
ec22aa5c 4619 retval = do_change_type(&path, flags);
d337f35e 4620 else if (flags & MS_MOVE)
8931d859 4621@@ -2944,6 +2967,7 @@ struct mnt_namespace *copy_mnt_ns(unsign
c2e5f7c8 4622 p = next_mnt(p, old);
d337f35e 4623 }
09be7631 4624 namespace_unlock();
2380c486
JR
4625+ atomic_inc(&vs_global_mnt_ns);
4626
4627 if (rootmnt)
4628 mntput(rootmnt);
8931d859 4629@@ -3119,9 +3143,10 @@ SYSCALL_DEFINE2(pivot_root, const char _
db55b927
AM
4630 new_mnt = real_mount(new.mnt);
4631 root_mnt = real_mount(root.mnt);
09be7631
JR
4632 old_mnt = real_mount(old.mnt);
4633- if (IS_MNT_SHARED(old_mnt) ||
4634+ if ((IS_MNT_SHARED(old_mnt) ||
db55b927
AM
4635 IS_MNT_SHARED(new_mnt->mnt_parent) ||
4636- IS_MNT_SHARED(root_mnt->mnt_parent))
4637+ IS_MNT_SHARED(root_mnt->mnt_parent)) &&
50e68740 4638+ !vx_flags(VXF_STATE_SETUP, 0))
763640ca 4639 goto out4;
db55b927 4640 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
763640ca 4641 goto out4;
8931d859 4642@@ -3259,6 +3284,7 @@ void put_mnt_ns(struct mnt_namespace *ns
c2e5f7c8
JR
4643 if (!atomic_dec_and_test(&ns->count))
4644 return;
4645 drop_collected_mounts(&ns->root->mnt);
2380c486 4646+ atomic_dec(&vs_global_mnt_ns);
b00e13aa 4647 free_mnt_ns(ns);
2380c486 4648 }
db55b927 4649
8931d859
AM
4650diff -NurpP --minimal linux-4.4.161/fs/nfs/client.c linux-4.4.161-vs2.3.9.8/fs/nfs/client.c
4651--- linux-4.4.161/fs/nfs/client.c 2016-01-10 23:01:32.000000000 +0000
4652+++ linux-4.4.161-vs2.3.9.8/fs/nfs/client.c 2018-10-20 04:57:21.000000000 +0000
927ca606 4653@@ -583,6 +583,9 @@ int nfs_init_server_rpcclient(struct nfs
2380c486
JR
4654 if (server->flags & NFS_MOUNT_SOFT)
4655 server->client->cl_softrtry = 1;
d337f35e
JR
4656
4657+ server->client->cl_tag = 0;
4658+ if (server->flags & NFS_MOUNT_TAGGED)
4659+ server->client->cl_tag = 1;
4660 return 0;
4661 }
92598135 4662 EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);
927ca606 4663@@ -760,6 +763,10 @@ static void nfs_server_set_fsinfo(struct
d337f35e
JR
4664 server->acdirmin = server->acdirmax = 0;
4665 }
4666
4667+ /* FIXME: needs fsinfo
4668+ if (server->flags & NFS_MOUNT_TAGGED)
4669+ sb->s_flags |= MS_TAGGED; */
4670+
4671 server->maxfilesize = fsinfo->maxfilesize;
4672
ab30d09f 4673 server->time_delta = fsinfo->time_delta;
8931d859
AM
4674diff -NurpP --minimal linux-4.4.161/fs/nfs/dir.c linux-4.4.161-vs2.3.9.8/fs/nfs/dir.c
4675--- linux-4.4.161/fs/nfs/dir.c 2018-10-20 02:34:30.000000000 +0000
4676+++ linux-4.4.161-vs2.3.9.8/fs/nfs/dir.c 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 4677@@ -37,6 +37,7 @@
2380c486 4678 #include <linux/sched.h>
ab30d09f 4679 #include <linux/kmemleak.h>
d33d7b00 4680 #include <linux/xattr.h>
d337f35e
JR
4681+#include <linux/vs_tag.h>
4682
d337f35e 4683 #include "delegation.h"
ab30d09f 4684 #include "iostat.h"
927ca606 4685@@ -1396,6 +1397,7 @@ struct dentry *nfs_lookup(struct inode *
42bc425c
AM
4686 /* Success: notify readdir to use READDIRPLUS */
4687 nfs_advise_use_readdirplus(dir);
d337f35e
JR
4688
4689+ dx_propagate_tag(nd, inode);
4690 no_entry:
927ca606 4691 res = d_splice_alias(inode, dentry);
d337f35e 4692 if (res != NULL) {
8931d859
AM
4693diff -NurpP --minimal linux-4.4.161/fs/nfs/inode.c linux-4.4.161-vs2.3.9.8/fs/nfs/inode.c
4694--- linux-4.4.161/fs/nfs/inode.c 2018-10-20 02:34:30.000000000 +0000
4695+++ linux-4.4.161-vs2.3.9.8/fs/nfs/inode.c 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8
JR
4696@@ -38,6 +38,7 @@
4697 #include <linux/slab.h>
d33d7b00 4698 #include <linux/compat.h>
db55b927 4699 #include <linux/freezer.h>
d337f35e
JR
4700+#include <linux/vs_tag.h>
4701
d337f35e 4702 #include <asm/uaccess.h>
1e8b8f9b 4703
927ca606 4704@@ -376,6 +377,8 @@ nfs_fhget(struct super_block *sb, struct
ec22aa5c
AM
4705 if (inode->i_state & I_NEW) {
4706 struct nfs_inode *nfsi = NFS_I(inode);
4707 unsigned long now = jiffies;
a4a22af8
AM
4708+ kuid_t kuid;
4709+ kgid_t kgid;
ec22aa5c
AM
4710
4711 /* We set i_ino for the few things that still rely on it,
4712 * such as stat(2) */
927ca606 4713@@ -419,8 +422,8 @@ nfs_fhget(struct super_block *sb, struct
f6c5ef8b 4714 inode->i_version = 0;
ec22aa5c 4715 inode->i_size = 0;
f6c5ef8b 4716 clear_nlink(inode);
b00e13aa
AM
4717- inode->i_uid = make_kuid(&init_user_ns, -2);
4718- inode->i_gid = make_kgid(&init_user_ns, -2);
a4a22af8
AM
4719+ kuid = make_kuid(&init_user_ns, -2);
4720+ kgid = make_kgid(&init_user_ns, -2);
ec22aa5c
AM
4721 inode->i_blocks = 0;
4722 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
42bc425c 4723 nfsi->write_io = 0;
927ca606 4724@@ -455,11 +458,11 @@ nfs_fhget(struct super_block *sb, struct
7e46296a 4725 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
bb20add7 4726 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
ec22aa5c
AM
4727 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
4728- inode->i_uid = fattr->uid;
a4a22af8 4729+ kuid = fattr->uid;
7e46296a 4730 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
bb20add7 4731 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
ec22aa5c
AM
4732 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
4733- inode->i_gid = fattr->gid;
a4a22af8 4734+ kgid = fattr->gid;
7e46296a 4735 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
bb20add7 4736 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
42bc425c 4737 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
927ca606 4738@@ -470,6 +473,10 @@ nfs_fhget(struct super_block *sb, struct
ec22aa5c
AM
4739 */
4740 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
4741 }
a4a22af8
AM
4742+ inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
4743+ inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
4744+ inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, GLOBAL_ROOT_TAG);
ec22aa5c 4745+ /* maybe fattr->xid someday */
c2e5f7c8
JR
4746
4747 nfs_setsecurity(inode, fattr, label);
4748
927ca606 4749@@ -611,6 +618,8 @@ void nfs_setattr_update_inode(struct ino
d337f35e
JR
4750 inode->i_uid = attr->ia_uid;
4751 if ((attr->ia_valid & ATTR_GID) != 0)
4752 inode->i_gid = attr->ia_gid;
4753+ if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
4754+ inode->i_tag = attr->ia_tag;
bb20add7
AM
4755 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
4756 | NFS_INO_INVALID_ACL);
927ca606
AM
4757 }
4758@@ -1235,7 +1244,9 @@ static int nfs_check_inode_attributes(st
d337f35e
JR
4759 struct nfs_inode *nfsi = NFS_I(inode);
4760 loff_t cur_size, new_isize;
2380c486 4761 unsigned long invalid = 0;
a4a22af8 4762-
b00e13aa
AM
4763+ kuid_t kuid;
4764+ kgid_t kgid;
4765+ ktag_t ktag;
d337f35e 4766
42bc425c 4767 if (nfs_have_delegated_attributes(inode))
a4a22af8 4768 return 0;
927ca606
AM
4769@@ -1262,13 +1273,18 @@ static int nfs_check_inode_attributes(st
4770 if (nfsi->nrequests != 0)
4771 invalid &= ~NFS_INO_REVAL_PAGECACHE;
d337f35e 4772
a4a22af8
AM
4773+ kuid = INOTAG_KUID(DX_TAG(inode), fattr->uid, fattr->gid);
4774+ kgid = INOTAG_KGID(DX_TAG(inode), fattr->uid, fattr->gid);
4775+ ktag = INOTAG_KTAG(DX_TAG(inode), fattr->uid, fattr->gid, GLOBAL_ROOT_TAG);
d337f35e
JR
4776+
4777 /* Have any file permissions changed? */
ec22aa5c 4778 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
9474138d 4779 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
b00e13aa
AM
4780- if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
4781+ if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, kuid))
ec22aa5c 4782 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
b00e13aa
AM
4783- if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
4784+ if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, kgid))
ec22aa5c
AM
4785 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4786+ /* maybe check for tag too? */
d337f35e
JR
4787
4788 /* Has the link count changed? */
ec22aa5c 4789 if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
927ca606 4790@@ -1642,6 +1658,9 @@ static int nfs_update_inode(struct inode
2380c486 4791 unsigned long now = jiffies;
7e46296a 4792 unsigned long save_cache_validity;
927ca606 4793 bool cache_revalidated = true;
a4a22af8
AM
4794+ kuid_t kuid;
4795+ kgid_t kgid;
4796+ ktag_t ktag;
d337f35e 4797
bb20add7 4798 dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
2380c486 4799 __func__, inode->i_sb->s_id, inode->i_ino,
927ca606
AM
4800@@ -1752,6 +1771,9 @@ static int nfs_update_inode(struct inode
4801 cache_revalidated = false;
4802 }
d337f35e 4803
a4a22af8
AM
4804+ kuid = TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag);
4805+ kgid = TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag);
4806+ ktag = TAGINO_KTAG(DX_TAG(inode), inode->i_tag);
ec22aa5c
AM
4807
4808 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
4809 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
927ca606
AM
4810@@ -1806,6 +1828,10 @@ static int nfs_update_inode(struct inode
4811 cache_revalidated = false;
4812 }
ec22aa5c 4813
a4a22af8
AM
4814+ inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
4815+ inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
4816+ inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, ktag);
ec22aa5c
AM
4817+
4818 if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
4819 if (inode->i_nlink != fattr->nlink) {
4820 invalid |= NFS_INO_INVALID_ATTR;
8931d859
AM
4821diff -NurpP --minimal linux-4.4.161/fs/nfs/nfs3xdr.c linux-4.4.161-vs2.3.9.8/fs/nfs/nfs3xdr.c
4822--- linux-4.4.161/fs/nfs/nfs3xdr.c 2016-01-10 23:01:32.000000000 +0000
4823+++ linux-4.4.161-vs2.3.9.8/fs/nfs/nfs3xdr.c 2018-10-20 04:57:21.000000000 +0000
78865d5b 4824@@ -20,6 +20,7 @@
d337f35e
JR
4825 #include <linux/nfs3.h>
4826 #include <linux/nfs_fs.h>
4827 #include <linux/nfsacl.h>
4828+#include <linux/vs_tag.h>
4829 #include "internal.h"
4830
4831 #define NFSDBG_FACILITY NFSDBG_XDR
b00e13aa 4832@@ -558,7 +559,8 @@ static __be32 *xdr_decode_nfstime3(__be3
d33d7b00
AM
4833 * set_mtime mtime;
4834 * };
4835 */
4836-static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
4837+static void encode_sattr3(struct xdr_stream *xdr,
4838+ const struct iattr *attr, int tag)
d337f35e 4839 {
d33d7b00
AM
4840 u32 nbytes;
4841 __be32 *p;
b00e13aa 4842@@ -590,15 +592,19 @@ static void encode_sattr3(struct xdr_str
d33d7b00 4843 } else
d337f35e 4844 *p++ = xdr_zero;
d33d7b00 4845
d337f35e
JR
4846- if (attr->ia_valid & ATTR_UID) {
4847+ if (attr->ia_valid & ATTR_UID ||
4848+ (tag && (attr->ia_valid & ATTR_TAG))) {
4849 *p++ = xdr_one;
b00e13aa 4850- *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
a4a22af8
AM
4851+ *p++ = cpu_to_be32(from_kuid(&init_user_ns,
4852+ TAGINO_KUID(tag, attr->ia_uid, attr->ia_tag)));
d33d7b00 4853 } else
d337f35e 4854 *p++ = xdr_zero;
d33d7b00 4855
d337f35e
JR
4856- if (attr->ia_valid & ATTR_GID) {
4857+ if (attr->ia_valid & ATTR_GID ||
4858+ (tag && (attr->ia_valid & ATTR_TAG))) {
4859 *p++ = xdr_one;
b00e13aa 4860- *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
a4a22af8
AM
4861+ *p++ = cpu_to_be32(from_kgid(&init_user_ns,
4862+ TAGINO_KGID(tag, attr->ia_gid, attr->ia_tag)));
d33d7b00 4863 } else
d337f35e 4864 *p++ = xdr_zero;
d33d7b00 4865
b00e13aa 4866@@ -887,7 +893,7 @@ static void nfs3_xdr_enc_setattr3args(st
d33d7b00 4867 const struct nfs3_sattrargs *args)
d337f35e 4868 {
d33d7b00
AM
4869 encode_nfs_fh3(xdr, args->fh);
4870- encode_sattr3(xdr, args->sattr);
4871+ encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
4872 encode_sattrguard3(xdr, args);
4873 }
d337f35e 4874
b00e13aa 4875@@ -1037,13 +1043,13 @@ static void nfs3_xdr_enc_write3args(stru
d33d7b00
AM
4876 * };
4877 */
4878 static void encode_createhow3(struct xdr_stream *xdr,
4879- const struct nfs3_createargs *args)
4880+ const struct nfs3_createargs *args, int tag)
d337f35e 4881 {
d33d7b00
AM
4882 encode_uint32(xdr, args->createmode);
4883 switch (args->createmode) {
4884 case NFS3_CREATE_UNCHECKED:
4885 case NFS3_CREATE_GUARDED:
4886- encode_sattr3(xdr, args->sattr);
4887+ encode_sattr3(xdr, args->sattr, tag);
4888 break;
4889 case NFS3_CREATE_EXCLUSIVE:
4890 encode_createverf3(xdr, args->verifier);
b00e13aa 4891@@ -1058,7 +1064,7 @@ static void nfs3_xdr_enc_create3args(str
d33d7b00
AM
4892 const struct nfs3_createargs *args)
4893 {
4894 encode_diropargs3(xdr, args->fh, args->name, args->len);
4895- encode_createhow3(xdr, args);
4896+ encode_createhow3(xdr, args, req->rq_task->tk_client->cl_tag);
4897 }
4898
4899 /*
b00e13aa 4900@@ -1074,7 +1080,7 @@ static void nfs3_xdr_enc_mkdir3args(stru
d33d7b00
AM
4901 const struct nfs3_mkdirargs *args)
4902 {
4903 encode_diropargs3(xdr, args->fh, args->name, args->len);
4904- encode_sattr3(xdr, args->sattr);
4905+ encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
d337f35e 4906 }
d33d7b00
AM
4907
4908 /*
b00e13aa 4909@@ -1091,9 +1097,9 @@ static void nfs3_xdr_enc_mkdir3args(stru
d33d7b00
AM
4910 * };
4911 */
4912 static void encode_symlinkdata3(struct xdr_stream *xdr,
4913- const struct nfs3_symlinkargs *args)
4914+ const struct nfs3_symlinkargs *args, int tag)
4915 {
4916- encode_sattr3(xdr, args->sattr);
4917+ encode_sattr3(xdr, args->sattr, tag);
4918 encode_nfspath3(xdr, args->pages, args->pathlen);
4919 }
4920
b00e13aa 4921@@ -1102,7 +1108,7 @@ static void nfs3_xdr_enc_symlink3args(st
d33d7b00
AM
4922 const struct nfs3_symlinkargs *args)
4923 {
4924 encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
4925- encode_symlinkdata3(xdr, args);
4926+ encode_symlinkdata3(xdr, args, req->rq_task->tk_client->cl_tag);
927ca606 4927 xdr->buf->flags |= XDRBUF_WRITE;
d33d7b00
AM
4928 }
4929
927ca606 4930@@ -1131,24 +1137,24 @@ static void nfs3_xdr_enc_symlink3args(st
d33d7b00
AM
4931 * };
4932 */
4933 static void encode_devicedata3(struct xdr_stream *xdr,
4934- const struct nfs3_mknodargs *args)
4935+ const struct nfs3_mknodargs *args, int tag)
4936 {
4937- encode_sattr3(xdr, args->sattr);
4938+ encode_sattr3(xdr, args->sattr, tag);
4939 encode_specdata3(xdr, args->rdev);
4940 }
4941
4942 static void encode_mknoddata3(struct xdr_stream *xdr,
4943- const struct nfs3_mknodargs *args)
4944+ const struct nfs3_mknodargs *args, int tag)
4945 {
4946 encode_ftype3(xdr, args->type);
4947 switch (args->type) {
4948 case NF3CHR:
4949 case NF3BLK:
4950- encode_devicedata3(xdr, args);
4951+ encode_devicedata3(xdr, args, tag);
4952 break;
4953 case NF3SOCK:
4954 case NF3FIFO:
4955- encode_sattr3(xdr, args->sattr);
4956+ encode_sattr3(xdr, args->sattr, tag);
4957 break;
4958 case NF3REG:
4959 case NF3DIR:
927ca606 4960@@ -1163,7 +1169,7 @@ static void nfs3_xdr_enc_mknod3args(stru
d33d7b00 4961 const struct nfs3_mknodargs *args)
d337f35e 4962 {
d33d7b00
AM
4963 encode_diropargs3(xdr, args->fh, args->name, args->len);
4964- encode_mknoddata3(xdr, args);
4965+ encode_mknoddata3(xdr, args, req->rq_task->tk_client->cl_tag);
4966 }
4967
4968 /*
8931d859
AM
4969diff -NurpP --minimal linux-4.4.161/fs/nfs/super.c linux-4.4.161-vs2.3.9.8/fs/nfs/super.c
4970--- linux-4.4.161/fs/nfs/super.c 2018-10-20 02:34:30.000000000 +0000
4971+++ linux-4.4.161-vs2.3.9.8/fs/nfs/super.c 2018-10-20 04:57:21.000000000 +0000
927ca606 4972@@ -54,6 +54,7 @@
b00e13aa 4973 #include <linux/parser.h>
1e8b8f9b
AM
4974 #include <linux/nsproxy.h>
4975 #include <linux/rcupdate.h>
d337f35e
JR
4976+#include <linux/vs_tag.h>
4977
d337f35e 4978 #include <asm/uaccess.h>
1e8b8f9b 4979
927ca606 4980@@ -102,6 +103,7 @@ enum {
1e8b8f9b 4981 Opt_mountport,
ab30d09f 4982 Opt_mountvers,
ab30d09f
AM
4983 Opt_minorversion,
4984+ Opt_tagid,
4985
4986 /* Mount options that take string arguments */
1e8b8f9b 4987 Opt_nfsvers,
927ca606 4988@@ -114,6 +116,9 @@ enum {
537831f9
AM
4989 /* Special mount options */
4990 Opt_userspace, Opt_deprecated, Opt_sloppy,
4991
4992+ /* Linux-VServer tagging options */
4993+ Opt_tag, Opt_notag,
4994+
4995 Opt_err
4996 };
4997
927ca606 4998@@ -183,6 +188,10 @@ static const match_table_t nfs_mount_opt
537831f9
AM
4999 { Opt_fscache_uniq, "fsc=%s" },
5000 { Opt_local_lock, "local_lock=%s" },
ab30d09f
AM
5001
5002+ { Opt_tag, "tag" },
5003+ { Opt_notag, "notag" },
5004+ { Opt_tagid, "tagid=%u" },
5005+
537831f9
AM
5006 /* The following needs to be listed after all other options */
5007 { Opt_nfsvers, "v%s" },
ab30d09f 5008
927ca606 5009@@ -642,6 +651,7 @@ static void nfs_show_mount_options(struc
2380c486 5010 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
ec22aa5c
AM
5011 { NFS_MOUNT_UNSHARED, ",nosharecache", "" },
5012 { NFS_MOUNT_NORESVPORT, ",noresvport", "" },
d337f35e
JR
5013+ { NFS_MOUNT_TAGGED, ",tag", "" },
5014 { 0, NULL, NULL }
5015 };
5016 const struct proc_nfs_info *nfs_infop;
927ca606 5017@@ -1324,6 +1334,14 @@ static int nfs_parse_mount_options(char
537831f9 5018 case Opt_nomigration:
927ca606 5019 mnt->options &= ~NFS_OPTION_MIGRATION;
ab30d09f
AM
5020 break;
5021+#ifndef CONFIG_TAGGING_NONE
5022+ case Opt_tag:
5023+ mnt->flags |= NFS_MOUNT_TAGGED;
5024+ break;
5025+ case Opt_notag:
5026+ mnt->flags &= ~NFS_MOUNT_TAGGED;
5027+ break;
5028+#endif
5029
5030 /*
5031 * options that take numeric values
927ca606 5032@@ -1410,6 +1428,12 @@ static int nfs_parse_mount_options(char
ab30d09f
AM
5033 goto out_invalid_value;
5034 mnt->minorversion = option;
5035 break;
5036+#ifdef CONFIG_PROPAGATE
5037+ case Opt_tagid:
5038+ /* use args[0] */
5039+ nfs_data.flags |= NFS_MOUNT_TAGGED;
5040+ break;
5041+#endif
5042
5043 /*
5044 * options that take text values
8931d859
AM
5045diff -NurpP --minimal linux-4.4.161/fs/nfsd/auth.c linux-4.4.161-vs2.3.9.8/fs/nfsd/auth.c
5046--- linux-4.4.161/fs/nfsd/auth.c 2018-10-20 02:34:30.000000000 +0000
5047+++ linux-4.4.161-vs2.3.9.8/fs/nfsd/auth.c 2018-10-20 04:57:21.000000000 +0000
bb20add7
AM
5048@@ -1,6 +1,7 @@
5049 /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
2bf5ad28
AM
5050
5051 #include <linux/sched.h>
d337f35e 5052+#include <linux/vs_tag.h>
2bf5ad28 5053 #include "nfsd.h"
2380c486 5054 #include "auth.h"
d337f35e 5055
bb20add7 5056@@ -35,6 +36,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
d337f35e 5057
ec22aa5c
AM
5058 new->fsuid = rqstp->rq_cred.cr_uid;
5059 new->fsgid = rqstp->rq_cred.cr_gid;
5060+ /* FIXME: this desperately needs a tag :)
61333608 5061+ new->xid = (vxid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0);
ec22aa5c 5062+ */
d337f35e 5063
ec22aa5c
AM
5064 rqgi = rqstp->rq_cred.cr_group_info;
5065
8931d859
AM
5066diff -NurpP --minimal linux-4.4.161/fs/nfsd/nfs3xdr.c linux-4.4.161-vs2.3.9.8/fs/nfsd/nfs3xdr.c
5067--- linux-4.4.161/fs/nfsd/nfs3xdr.c 2018-10-20 02:34:30.000000000 +0000
5068+++ linux-4.4.161-vs2.3.9.8/fs/nfsd/nfs3xdr.c 2018-10-20 04:57:21.000000000 +0000
b00e13aa 5069@@ -8,6 +8,7 @@
2bf5ad28
AM
5070
5071 #include <linux/namei.h>
b00e13aa 5072 #include <linux/sunrpc/svc_xprt.h>
d337f35e 5073+#include <linux/vs_tag.h>
2bf5ad28 5074 #include "xdr3.h"
2380c486 5075 #include "auth.h"
b00e13aa
AM
5076 #include "netns.h"
5077@@ -98,6 +99,8 @@ static __be32 *
d337f35e
JR
5078 decode_sattr3(__be32 *p, struct iattr *iap)
5079 {
5080 u32 tmp;
a4a22af8
AM
5081+ kuid_t kuid = GLOBAL_ROOT_UID;
5082+ kgid_t kgid = GLOBAL_ROOT_GID;
d337f35e
JR
5083
5084 iap->ia_valid = 0;
5085
b00e13aa
AM
5086@@ -106,15 +109,18 @@ decode_sattr3(__be32 *p, struct iattr *i
5087 iap->ia_mode = ntohl(*p++);
d337f35e
JR
5088 }
5089 if (*p++) {
b00e13aa 5090- iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++));
a4a22af8 5091+ kuid = make_kuid(&init_user_ns, ntohl(*p++));
b00e13aa
AM
5092 if (uid_valid(iap->ia_uid))
5093 iap->ia_valid |= ATTR_UID;
d337f35e
JR
5094 }
5095 if (*p++) {
b00e13aa 5096- iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++));
a4a22af8 5097+ kgid = make_kgid(&init_user_ns, ntohl(*p++));
b00e13aa
AM
5098 if (gid_valid(iap->ia_gid))
5099 iap->ia_valid |= ATTR_GID;
d337f35e 5100 }
a4a22af8
AM
5101+ iap->ia_uid = INOTAG_KUID(DX_TAG_NFSD, kuid, kgid);
5102+ iap->ia_gid = INOTAG_KGID(DX_TAG_NFSD, kuid, kgid);
5103+ iap->ia_tag = INOTAG_KTAG(DX_TAG_NFSD, kuid, kgid, GLOBAL_ROOT_TAG);
d337f35e
JR
5104 if (*p++) {
5105 u64 newsize;
5106
bb20add7 5107@@ -167,8 +173,12 @@ encode_fattr3(struct svc_rqst *rqstp, __
d337f35e 5108 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
bb20add7 5109 *p++ = htonl((u32) (stat->mode & S_IALLUGO));
d337f35e 5110 *p++ = htonl((u32) stat->nlink);
b00e13aa
AM
5111- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
5112- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
5113+ *p++ = htonl((u32) from_kuid(&init_user_ns,
a4a22af8 5114+ TAGINO_KUID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
2380c486 5115+ stat->uid, stat->tag)));
b00e13aa 5116+ *p++ = htonl((u32) from_kgid(&init_user_ns,
a4a22af8 5117+ TAGINO_KGID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
2380c486 5118+ stat->gid, stat->tag)));
d337f35e
JR
5119 if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
5120 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
5121 } else {
8931d859
AM
5122diff -NurpP --minimal linux-4.4.161/fs/nfsd/nfs4xdr.c linux-4.4.161-vs2.3.9.8/fs/nfsd/nfs4xdr.c
5123--- linux-4.4.161/fs/nfsd/nfs4xdr.c 2018-10-20 02:34:30.000000000 +0000
5124+++ linux-4.4.161-vs2.3.9.8/fs/nfsd/nfs4xdr.c 2018-10-20 05:50:20.000000000 +0000
927ca606 5125@@ -40,6 +40,7 @@
d33d7b00 5126 #include <linux/utsname.h>
a168f21d 5127 #include <linux/pagemap.h>
2380c486 5128 #include <linux/sunrpc/svcauth_gss.h>
d337f35e
JR
5129+#include <linux/vs_tag.h>
5130
d33d7b00
AM
5131 #include "idmap.h"
5132 #include "acl.h"
8931d859 5133@@ -2639,12 +2640,16 @@ out_acl:
bb20add7 5134 *p++ = cpu_to_be32(stat.nlink);
d337f35e
JR
5135 }
5136 if (bmval1 & FATTR4_WORD1_OWNER) {
bb20add7
AM
5137- status = nfsd4_encode_user(xdr, rqstp, stat.uid);
5138+ status = nfsd4_encode_user(xdr, rqstp,
a4a22af8 5139+ TAGINO_KUID(DX_TAG(dentry->d_inode),
bb20add7 5140+ stat.uid, stat.tag));
d337f35e
JR
5141 if (status)
5142 goto out;
5143 }
5144 if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
bb20add7
AM
5145- status = nfsd4_encode_group(xdr, rqstp, stat.gid);
5146+ status = nfsd4_encode_group(xdr, rqstp,
a4a22af8 5147+ TAGINO_KGID(DX_TAG(dentry->d_inode),
bb20add7 5148+ stat.gid, stat.tag));
d337f35e 5149 if (status)
f15949f2
JR
5150 goto out;
5151 }
8931d859
AM
5152diff -NurpP --minimal linux-4.4.161/fs/nfsd/nfsxdr.c linux-4.4.161-vs2.3.9.8/fs/nfsd/nfsxdr.c
5153--- linux-4.4.161/fs/nfsd/nfsxdr.c 2018-10-20 02:34:30.000000000 +0000
5154+++ linux-4.4.161-vs2.3.9.8/fs/nfsd/nfsxdr.c 2018-10-20 04:57:21.000000000 +0000
b00e13aa
AM
5155@@ -7,6 +7,7 @@
5156 #include "vfs.h"
2bf5ad28 5157 #include "xdr.h"
2380c486 5158 #include "auth.h"
2bf5ad28 5159+#include <linux/vs_tag.h>
d337f35e
JR
5160
5161 #define NFSDDBG_FACILITY NFSDDBG_XDR
2bf5ad28 5162
b00e13aa 5163@@ -89,6 +90,8 @@ static __be32 *
d337f35e
JR
5164 decode_sattr(__be32 *p, struct iattr *iap)
5165 {
5166 u32 tmp, tmp1;
a4a22af8
AM
5167+ kuid_t kuid = GLOBAL_ROOT_UID;
5168+ kgid_t kgid = GLOBAL_ROOT_GID;
d337f35e
JR
5169
5170 iap->ia_valid = 0;
5171
b00e13aa
AM
5172@@ -101,15 +104,18 @@ decode_sattr(__be32 *p, struct iattr *ia
5173 iap->ia_mode = tmp;
d337f35e
JR
5174 }
5175 if ((tmp = ntohl(*p++)) != (u32)-1) {
b00e13aa 5176- iap->ia_uid = make_kuid(&init_user_ns, tmp);
a4a22af8 5177+ kuid = make_kuid(&init_user_ns, tmp);
b00e13aa
AM
5178 if (uid_valid(iap->ia_uid))
5179 iap->ia_valid |= ATTR_UID;
d337f35e
JR
5180 }
5181 if ((tmp = ntohl(*p++)) != (u32)-1) {
b00e13aa 5182- iap->ia_gid = make_kgid(&init_user_ns, tmp);
a4a22af8 5183+ kgid = make_kgid(&init_user_ns, tmp);
b00e13aa
AM
5184 if (gid_valid(iap->ia_gid))
5185 iap->ia_valid |= ATTR_GID;
d337f35e 5186 }
a4a22af8
AM
5187+ iap->ia_uid = INOTAG_KUID(DX_TAG_NFSD, kuid, kgid);
5188+ iap->ia_gid = INOTAG_KGID(DX_TAG_NFSD, kuid, kgid);
5189+ iap->ia_tag = INOTAG_KTAG(DX_TAG_NFSD, kuid, kgid, GLOBAL_ROOT_TAG);
d337f35e
JR
5190 if ((tmp = ntohl(*p++)) != (u32)-1) {
5191 iap->ia_valid |= ATTR_SIZE;
5192 iap->ia_size = tmp;
b00e13aa 5193@@ -154,8 +160,10 @@ encode_fattr(struct svc_rqst *rqstp, __b
d337f35e
JR
5194 *p++ = htonl(nfs_ftypes[type >> 12]);
5195 *p++ = htonl((u32) stat->mode);
5196 *p++ = htonl((u32) stat->nlink);
b00e13aa
AM
5197- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
5198- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
5199+ *p++ = htonl((u32) from_kuid(&init_user_ns,
a4a22af8 5200+ TAGINO_KUID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
b00e13aa 5201+ *p++ = htonl((u32) from_kgid(&init_user_ns,
a4a22af8 5202+ TAGINO_KGID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
d337f35e
JR
5203
5204 if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
5205 *p++ = htonl(NFS_MAXPATHLEN);
8931d859
AM
5206diff -NurpP --minimal linux-4.4.161/fs/ocfs2/dlmglue.c linux-4.4.161-vs2.3.9.8/fs/ocfs2/dlmglue.c
5207--- linux-4.4.161/fs/ocfs2/dlmglue.c 2018-10-20 02:34:30.000000000 +0000
5208+++ linux-4.4.161-vs2.3.9.8/fs/ocfs2/dlmglue.c 2018-10-20 04:57:21.000000000 +0000
927ca606 5209@@ -2128,6 +2128,7 @@ static void __ocfs2_stuff_meta_lvb(struc
d337f35e 5210 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
b00e13aa
AM
5211 lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode));
5212 lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
a4a22af8 5213+ lvb->lvb_itag = cpu_to_be16(i_tag_read(inode));
d337f35e
JR
5214 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
5215 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
5216 lvb->lvb_iatime_packed =
927ca606 5217@@ -2178,6 +2179,7 @@ static void ocfs2_refresh_inode_from_lvb
d337f35e 5218
b00e13aa
AM
5219 i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
5220 i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
5221+ i_tag_write(inode, be16_to_cpu(lvb->lvb_itag));
d337f35e 5222 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
f6c5ef8b 5223 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
d337f35e 5224 ocfs2_unpack_timespec(&inode->i_atime,
8931d859
AM
5225diff -NurpP --minimal linux-4.4.161/fs/ocfs2/dlmglue.h linux-4.4.161-vs2.3.9.8/fs/ocfs2/dlmglue.h
5226--- linux-4.4.161/fs/ocfs2/dlmglue.h 2018-10-20 02:34:30.000000000 +0000
5227+++ linux-4.4.161-vs2.3.9.8/fs/ocfs2/dlmglue.h 2018-10-20 04:57:21.000000000 +0000
2380c486
JR
5228@@ -46,7 +46,8 @@ struct ocfs2_meta_lvb {
5229 __be16 lvb_inlink;
5230 __be32 lvb_iattr;
5231 __be32 lvb_igeneration;
5232- __be32 lvb_reserved2;
d337f35e 5233+ __be16 lvb_itag;
2380c486
JR
5234+ __be16 lvb_reserved2;
5235 };
5236
ec22aa5c 5237 #define OCFS2_QINFO_LVB_VERSION 1
8931d859
AM
5238diff -NurpP --minimal linux-4.4.161/fs/ocfs2/file.c linux-4.4.161-vs2.3.9.8/fs/ocfs2/file.c
5239--- linux-4.4.161/fs/ocfs2/file.c 2018-10-20 02:34:30.000000000 +0000
5240+++ linux-4.4.161-vs2.3.9.8/fs/ocfs2/file.c 2018-10-20 04:57:21.000000000 +0000
927ca606 5241@@ -1151,7 +1151,7 @@ int ocfs2_setattr(struct dentry *dentry,
763640ca 5242 attr->ia_valid &= ~ATTR_SIZE;
d337f35e
JR
5243
5244 #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
5245- | ATTR_GID | ATTR_UID | ATTR_MODE)
5246+ | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE)
763640ca 5247 if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
d337f35e 5248 return 0;
763640ca 5249
8931d859
AM
5250diff -NurpP --minimal linux-4.4.161/fs/ocfs2/inode.c linux-4.4.161-vs2.3.9.8/fs/ocfs2/inode.c
5251--- linux-4.4.161/fs/ocfs2/inode.c 2016-01-10 23:01:32.000000000 +0000
5252+++ linux-4.4.161-vs2.3.9.8/fs/ocfs2/inode.c 2018-10-20 04:57:21.000000000 +0000
78865d5b 5253@@ -28,6 +28,7 @@
d337f35e
JR
5254 #include <linux/highmem.h>
5255 #include <linux/pagemap.h>
ec22aa5c 5256 #include <linux/quotaops.h>
d337f35e
JR
5257+#include <linux/vs_tag.h>
5258
5259 #include <asm/byteorder.h>
5260
537831f9 5261@@ -78,11 +79,13 @@ void ocfs2_set_inode_flags(struct inode
2380c486
JR
5262 {
5263 unsigned int flags = OCFS2_I(inode)->ip_attr;
5264
5265- inode->i_flags &= ~(S_IMMUTABLE |
5266+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
5267 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
d337f35e
JR
5268
5269 if (flags & OCFS2_IMMUTABLE_FL)
5270 inode->i_flags |= S_IMMUTABLE;
2380c486
JR
5271+ if (flags & OCFS2_IXUNLINK_FL)
5272+ inode->i_flags |= S_IXUNLINK;
d337f35e
JR
5273
5274 if (flags & OCFS2_SYNC_FL)
5275 inode->i_flags |= S_SYNC;
537831f9 5276@@ -92,25 +95,44 @@ void ocfs2_set_inode_flags(struct inode
2380c486
JR
5277 inode->i_flags |= S_NOATIME;
5278 if (flags & OCFS2_DIRSYNC_FL)
d337f35e 5279 inode->i_flags |= S_DIRSYNC;
2380c486
JR
5280+
5281+ inode->i_vflags &= ~(V_BARRIER | V_COW);
5282+
5283+ if (flags & OCFS2_BARRIER_FL)
5284+ inode->i_vflags |= V_BARRIER;
5285+ if (flags & OCFS2_COW_FL)
5286+ inode->i_vflags |= V_COW;
d337f35e
JR
5287 }
5288
2380c486
JR
5289 /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
5290 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
5291 {
5292 unsigned int flags = oi->vfs_inode.i_flags;
5293+ unsigned int vflags = oi->vfs_inode.i_vflags;
5294+
5295+ oi->ip_attr &= ~(OCFS2_SYNC_FL | OCFS2_APPEND_FL |
5296+ OCFS2_IMMUTABLE_FL | OCFS2_IXUNLINK_FL |
5297+ OCFS2_NOATIME_FL | OCFS2_DIRSYNC_FL |
5298+ OCFS2_BARRIER_FL | OCFS2_COW_FL);
5299+
5300+ if (flags & S_IMMUTABLE)
5301+ oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5302+ if (flags & S_IXUNLINK)
5303+ oi->ip_attr |= OCFS2_IXUNLINK_FL;
5304
5305- oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
5306- OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
5307 if (flags & S_SYNC)
5308 oi->ip_attr |= OCFS2_SYNC_FL;
5309 if (flags & S_APPEND)
5310 oi->ip_attr |= OCFS2_APPEND_FL;
5311- if (flags & S_IMMUTABLE)
5312- oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5313 if (flags & S_NOATIME)
5314 oi->ip_attr |= OCFS2_NOATIME_FL;
5315 if (flags & S_DIRSYNC)
5316 oi->ip_attr |= OCFS2_DIRSYNC_FL;
5317+
5318+ if (vflags & V_BARRIER)
5319+ oi->ip_attr |= OCFS2_BARRIER_FL;
5320+ if (vflags & V_COW)
5321+ oi->ip_attr |= OCFS2_COW_FL;
2380c486
JR
5322 }
5323
ec22aa5c 5324 struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
bb20add7 5325@@ -268,6 +290,8 @@ void ocfs2_populate_inode(struct inode *
d337f35e
JR
5326 struct super_block *sb;
5327 struct ocfs2_super *osb;
ec22aa5c 5328 int use_plocks = 1;
d337f35e
JR
5329+ uid_t uid;
5330+ gid_t gid;
5331
763640ca
JR
5332 sb = inode->i_sb;
5333 osb = OCFS2_SB(sb);
bb20add7 5334@@ -296,8 +320,12 @@ void ocfs2_populate_inode(struct inode *
d337f35e
JR
5335 inode->i_generation = le32_to_cpu(fe->i_generation);
5336 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
5337 inode->i_mode = le16_to_cpu(fe->i_mode);
b00e13aa
AM
5338- i_uid_write(inode, le32_to_cpu(fe->i_uid));
5339- i_gid_write(inode, le32_to_cpu(fe->i_gid));
d337f35e
JR
5340+ uid = le32_to_cpu(fe->i_uid);
5341+ gid = le32_to_cpu(fe->i_gid);
b00e13aa
AM
5342+ i_uid_write(inode, INOTAG_UID(DX_TAG(inode), uid, gid));
5343+ i_gid_write(inode, INOTAG_GID(DX_TAG(inode), uid, gid));
5344+ i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), uid, gid,
5345+ /* le16_to_cpu(raw_inode->i_raw_tag) */ 0));
d337f35e
JR
5346
5347 /* Fast symlinks will have i_size but no allocated clusters. */
42bc425c 5348 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) {
8931d859
AM
5349diff -NurpP --minimal linux-4.4.161/fs/ocfs2/inode.h linux-4.4.161-vs2.3.9.8/fs/ocfs2/inode.h
5350--- linux-4.4.161/fs/ocfs2/inode.h 2016-01-10 23:01:32.000000000 +0000
5351+++ linux-4.4.161-vs2.3.9.8/fs/ocfs2/inode.h 2018-10-20 04:57:21.000000000 +0000
927ca606 5352@@ -161,6 +161,7 @@ struct buffer_head *ocfs2_bread(struct i
d337f35e
JR
5353
5354 void ocfs2_set_inode_flags(struct inode *inode);
2380c486 5355 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
d4263eb0 5356+int ocfs2_sync_flags(struct inode *inode, int, int);
d337f35e 5357
2380c486
JR
5358 static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
5359 {
8931d859
AM
5360diff -NurpP --minimal linux-4.4.161/fs/ocfs2/ioctl.c linux-4.4.161-vs2.3.9.8/fs/ocfs2/ioctl.c
5361--- linux-4.4.161/fs/ocfs2/ioctl.c 2016-01-10 23:01:32.000000000 +0000
5362+++ linux-4.4.161-vs2.3.9.8/fs/ocfs2/ioctl.c 2018-10-20 04:57:21.000000000 +0000
1e8b8f9b 5363@@ -76,7 +76,41 @@ static int ocfs2_get_inode_attr(struct i
d337f35e
JR
5364 return status;
5365 }
5366
5367-static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
d4263eb0
JR
5368+int ocfs2_sync_flags(struct inode *inode, int flags, int vflags)
5369+{
5370+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5371+ struct buffer_head *bh = NULL;
5372+ handle_t *handle = NULL;
5373+ int status;
5374+
5375+ status = ocfs2_inode_lock(inode, &bh, 1);
5376+ if (status < 0) {
5377+ mlog_errno(status);
5378+ return status;
5379+ }
5380+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5381+ if (IS_ERR(handle)) {
5382+ status = PTR_ERR(handle);
5383+ mlog_errno(status);
5384+ goto bail_unlock;
5385+ }
5386+
5387+ inode->i_flags = flags;
5388+ inode->i_vflags = vflags;
5389+ ocfs2_get_inode_flags(OCFS2_I(inode));
5390+
5391+ status = ocfs2_mark_inode_dirty(handle, inode, bh);
5392+ if (status < 0)
5393+ mlog_errno(status);
5394+
5395+ ocfs2_commit_trans(osb, handle);
5396+bail_unlock:
5397+ ocfs2_inode_unlock(inode, 1);
5398+ brelse(bh);
5399+ return status;
5400+}
5401+
d337f35e
JR
5402+int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
5403 unsigned mask)
5404 {
5405 struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
09be7631
JR
5406@@ -116,6 +150,11 @@ static int ocfs2_set_inode_attr(struct i
5407 goto bail_unlock;
5408 }
2380c486
JR
5409
5410+ if (IS_BARRIER(inode)) {
5411+ vxwprintk_task(1, "messing with the barrier.");
5412+ goto bail_unlock;
5413+ }
5414+
5415 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5416 if (IS_ERR(handle)) {
5417 status = PTR_ERR(handle);
bb20add7 5418@@ -841,6 +880,7 @@ bail:
d4263eb0
JR
5419 return status;
5420 }
d337f35e 5421
d337f35e 5422+
d4263eb0
JR
5423 long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
5424 {
b00e13aa 5425 struct inode *inode = file_inode(filp);
8931d859
AM
5426diff -NurpP --minimal linux-4.4.161/fs/ocfs2/namei.c linux-4.4.161-vs2.3.9.8/fs/ocfs2/namei.c
5427--- linux-4.4.161/fs/ocfs2/namei.c 2018-10-20 02:34:30.000000000 +0000
5428+++ linux-4.4.161-vs2.3.9.8/fs/ocfs2/namei.c 2018-10-20 04:57:21.000000000 +0000
ec22aa5c 5429@@ -41,6 +41,7 @@
d337f35e
JR
5430 #include <linux/slab.h>
5431 #include <linux/highmem.h>
ec22aa5c 5432 #include <linux/quotaops.h>
d337f35e
JR
5433+#include <linux/vs_tag.h>
5434
d337f35e 5435 #include <cluster/masklog.h>
763640ca 5436
927ca606 5437@@ -516,6 +517,7 @@ static int __ocfs2_mknod_locked(struct i
93de0823 5438 struct ocfs2_extent_list *fel;
ec22aa5c 5439 u16 feat;
265de2f7 5440 struct ocfs2_inode_info *oi = OCFS2_I(inode);
a4a22af8 5441+ ktag_t ktag;
d337f35e 5442
7e46296a
AM
5443 *new_fe_bh = NULL;
5444
927ca606 5445@@ -553,8 +555,13 @@ static int __ocfs2_mknod_locked(struct i
76514441 5446 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
d337f35e 5447 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
2380c486 5448 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
b00e13aa
AM
5449- fe->i_uid = cpu_to_le32(i_uid_read(inode));
5450- fe->i_gid = cpu_to_le32(i_gid_read(inode));
d337f35e 5451+
a4a22af8
AM
5452+ ktag = make_ktag(&init_user_ns, dx_current_fstag(osb->sb));
5453+ fe->i_uid = cpu_to_le32(from_kuid(&init_user_ns,
5454+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, ktag)));
5455+ fe->i_gid = cpu_to_le32(from_kgid(&init_user_ns,
5456+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, ktag)));
5457+ inode->i_tag = ktag; /* is this correct? */
ec22aa5c
AM
5458 fe->i_mode = cpu_to_le16(inode->i_mode);
5459 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
d337f35e 5460 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
8931d859
AM
5461diff -NurpP --minimal linux-4.4.161/fs/ocfs2/ocfs2.h linux-4.4.161-vs2.3.9.8/fs/ocfs2/ocfs2.h
5462--- linux-4.4.161/fs/ocfs2/ocfs2.h 2018-10-20 02:34:30.000000000 +0000
5463+++ linux-4.4.161-vs2.3.9.8/fs/ocfs2/ocfs2.h 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
5464@@ -289,6 +289,7 @@ enum ocfs2_mount_options
5465 OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
5466 OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
5467 OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
5468+ OCFS2_MOUNT_TAGGED = 1 << 18, /* use tagging */
d33d7b00
AM
5469 };
5470
bb20add7 5471 #define OCFS2_OSB_SOFT_RO 0x0001
8931d859
AM
5472diff -NurpP --minimal linux-4.4.161/fs/ocfs2/ocfs2_fs.h linux-4.4.161-vs2.3.9.8/fs/ocfs2/ocfs2_fs.h
5473--- linux-4.4.161/fs/ocfs2/ocfs2_fs.h 2016-01-10 23:01:32.000000000 +0000
5474+++ linux-4.4.161-vs2.3.9.8/fs/ocfs2/ocfs2_fs.h 2018-10-20 04:57:21.000000000 +0000
927ca606 5475@@ -275,6 +275,11 @@
93de0823
AM
5476 #define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
5477 #define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
2380c486 5478
93de0823
AM
5479+#define OCFS2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
5480+
5481+#define OCFS2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
5482+#define OCFS2_COW_FL FS_COW_FL /* Copy on Write marker */
5483+
5484 #define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
5485 #define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
5486
8931d859
AM
5487diff -NurpP --minimal linux-4.4.161/fs/ocfs2/super.c linux-4.4.161-vs2.3.9.8/fs/ocfs2/super.c
5488--- linux-4.4.161/fs/ocfs2/super.c 2018-10-20 02:34:30.000000000 +0000
5489+++ linux-4.4.161-vs2.3.9.8/fs/ocfs2/super.c 2018-10-20 04:57:21.000000000 +0000
927ca606 5490@@ -193,6 +193,7 @@ enum {
76514441 5491 Opt_dir_resv_level,
927ca606
AM
5492 Opt_journal_async_commit,
5493 Opt_err_cont,
d337f35e
JR
5494+ Opt_tag, Opt_notag, Opt_tagid,
5495 Opt_err,
5496 };
5497
927ca606 5498@@ -226,6 +227,9 @@ static const match_table_t tokens = {
76514441 5499 {Opt_dir_resv_level, "dir_resv_level=%u"},
927ca606
AM
5500 {Opt_journal_async_commit, "journal_async_commit"},
5501 {Opt_err_cont, "errors=continue"},
d337f35e 5502+ {Opt_tag, "tag"},
d337f35e
JR
5503+ {Opt_notag, "notag"},
5504+ {Opt_tagid, "tagid=%u"},
5505 {Opt_err, NULL}
5506 };
5507
8931d859 5508@@ -676,6 +680,13 @@ static int ocfs2_remount(struct super_bl
d337f35e
JR
5509 goto out;
5510 }
5511
d4263eb0
JR
5512+ if ((osb->s_mount_opt & OCFS2_MOUNT_TAGGED) !=
5513+ (parsed_options.mount_opt & OCFS2_MOUNT_TAGGED)) {
d337f35e
JR
5514+ ret = -EINVAL;
5515+ mlog(ML_ERROR, "Cannot change tagging on remount\n");
5516+ goto out;
5517+ }
5518+
ab30d09f
AM
5519 /* We're going to/from readonly mode. */
5520 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
5521 /* Disable quota accounting before remounting RO */
8931d859 5522@@ -1165,6 +1176,9 @@ static int ocfs2_fill_super(struct super
d337f35e
JR
5523
5524 ocfs2_complete_mount_recovery(osb);
5525
5526+ if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED)
5527+ sb->s_flags |= MS_TAGGED;
5528+
2380c486
JR
5529 if (ocfs2_mount_local(osb))
5530 snprintf(nodestr, sizeof(nodestr), "local");
5531 else
8931d859 5532@@ -1485,6 +1499,20 @@ static int ocfs2_parse_options(struct su
927ca606
AM
5533 case Opt_journal_async_commit:
5534 mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
d337f35e
JR
5535 break;
5536+#ifndef CONFIG_TAGGING_NONE
5537+ case Opt_tag:
2380c486 5538+ mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
d337f35e
JR
5539+ break;
5540+ case Opt_notag:
2380c486 5541+ mopt->mount_opt &= ~OCFS2_MOUNT_TAGGED;
d337f35e
JR
5542+ break;
5543+#endif
5544+#ifdef CONFIG_PROPAGATE
5545+ case Opt_tagid:
5546+ /* use args[0] */
2380c486 5547+ mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
d337f35e
JR
5548+ break;
5549+#endif
5550 default:
5551 mlog(ML_ERROR,
5552 "Unrecognized mount option \"%s\" "
8931d859
AM
5553diff -NurpP --minimal linux-4.4.161/fs/open.c linux-4.4.161-vs2.3.9.8/fs/open.c
5554--- linux-4.4.161/fs/open.c 2018-10-20 02:34:30.000000000 +0000
5555+++ linux-4.4.161-vs2.3.9.8/fs/open.c 2018-10-20 04:57:21.000000000 +0000
b00e13aa 5556@@ -31,6 +31,11 @@
2bf5ad28 5557 #include <linux/ima.h>
93de0823 5558 #include <linux/dnotify.h>
b00e13aa 5559 #include <linux/compat.h>
d337f35e
JR
5560+#include <linux/vs_base.h>
5561+#include <linux/vs_limit.h>
d337f35e
JR
5562+#include <linux/vs_tag.h>
5563+#include <linux/vs_cowbl.h>
78865d5b 5564+#include <linux/vserver/dlimit.h>
d337f35e 5565
2bf5ad28
AM
5566 #include "internal.h"
5567
927ca606 5568@@ -70,6 +75,11 @@ long vfs_truncate(struct path *path, lof
b00e13aa
AM
5569 struct inode *inode;
5570 long error;
5571
76514441 5572+#ifdef CONFIG_VSERVER_COWBL
b00e13aa 5573+ error = cow_check_and_break(path);
d337f35e 5574+ if (error)
b00e13aa 5575+ goto out;
76514441 5576+#endif
b00e13aa 5577 inode = path->dentry->d_inode;
d337f35e 5578
a168f21d 5579 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
927ca606 5580@@ -548,6 +558,13 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons
b00e13aa
AM
5581 unsigned int lookup_flags = LOOKUP_FOLLOW;
5582 retry:
5583 error = user_path_at(dfd, filename, lookup_flags, &path);
a168f21d 5584+#ifdef CONFIG_VSERVER_COWBL
b00e13aa 5585+ if (!error) {
a168f21d 5586+ error = cow_check_and_break(&path);
b00e13aa
AM
5587+ if (error)
5588+ path_put(&path);
5589+ }
a168f21d 5590+#endif
b00e13aa 5591 if (!error) {
a168f21d
AM
5592 error = chmod_common(&path, mode);
5593 path_put(&path);
927ca606 5594@@ -582,13 +599,15 @@ retry_deleg:
42bc425c
AM
5595 if (!uid_valid(uid))
5596 return -EINVAL;
d337f35e 5597 newattrs.ia_valid |= ATTR_UID;
42bc425c 5598- newattrs.ia_uid = uid;
8ce283e1
AM
5599+ newattrs.ia_uid = make_kuid(&init_user_ns,
5600+ dx_map_uid(user));
d337f35e
JR
5601 }
5602 if (group != (gid_t) -1) {
42bc425c
AM
5603 if (!gid_valid(gid))
5604 return -EINVAL;
d337f35e 5605 newattrs.ia_valid |= ATTR_GID;
42bc425c 5606- newattrs.ia_gid = gid;
8ce283e1
AM
5607+ newattrs.ia_gid = make_kgid(&init_user_ns,
5608+ dx_map_gid(group));
d337f35e
JR
5609 }
5610 if (!S_ISDIR(inode->i_mode))
2380c486 5611 newattrs.ia_valid |=
927ca606 5612@@ -626,6 +645,10 @@ retry:
2380c486 5613 error = mnt_want_write(path.mnt);
d337f35e 5614 if (error)
2380c486 5615 goto out_release;
d337f35e 5616+#ifdef CONFIG_VSERVER_COWBL
2380c486 5617+ error = cow_check_and_break(&path);
d337f35e 5618+ if (!error)
d337f35e 5619+#endif
2bf5ad28 5620 error = chown_common(&path, user, group);
2380c486
JR
5621 mnt_drop_write(path.mnt);
5622 out_release:
8931d859
AM
5623diff -NurpP --minimal linux-4.4.161/fs/proc/array.c linux-4.4.161-vs2.3.9.8/fs/proc/array.c
5624--- linux-4.4.161/fs/proc/array.c 2018-10-20 02:34:30.000000000 +0000
5625+++ linux-4.4.161-vs2.3.9.8/fs/proc/array.c 2018-10-20 05:50:20.000000000 +0000
5626@@ -84,6 +84,8 @@
2380c486 5627 #include <linux/tracehook.h>
927ca606 5628 #include <linux/string_helpers.h>
42bc425c 5629 #include <linux/user_namespace.h>
d337f35e
JR
5630+#include <linux/vs_context.h>
5631+#include <linux/vs_network.h>
5632
d337f35e 5633 #include <asm/pgtable.h>
2380c486 5634 #include <asm/processor.h>
8931d859 5635@@ -155,6 +157,9 @@ static inline void task_state(struct seq
2380c486
JR
5636 ppid = pid_alive(p) ?
5637 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
927ca606 5638
2380c486
JR
5639+ if (unlikely(vx_current_initpid(p->pid)))
5640+ ppid = 0;
5641+
927ca606
AM
5642 tracer = ptrace_parent(p);
5643 if (tracer)
5644 tpid = task_pid_nr_ns(tracer, ns);
8931d859 5645@@ -293,8 +298,8 @@ static inline void task_sig(struct seq_f
bb20add7 5646 render_sigset_t(m, "SigCgt:\t", &caught);
2380c486
JR
5647 }
5648
bb20add7 5649-static void render_cap_t(struct seq_file *m, const char *header,
2380c486 5650- kernel_cap_t *a)
bb20add7 5651+void render_cap_t(struct seq_file *m, const char *header,
2380c486 5652+ struct vx_info *vxi, kernel_cap_t *a)
d337f35e 5653 {
2380c486
JR
5654 unsigned __capi;
5655
8931d859 5656@@ -321,11 +326,12 @@ static inline void task_cap(struct seq_f
927ca606 5657 cap_ambient = cred->cap_ambient;
bb20add7 5658 rcu_read_unlock();
2380c486 5659
ec22aa5c
AM
5660- render_cap_t(m, "CapInh:\t", &cap_inheritable);
5661- render_cap_t(m, "CapPrm:\t", &cap_permitted);
5662- render_cap_t(m, "CapEff:\t", &cap_effective);
5663- render_cap_t(m, "CapBnd:\t", &cap_bset);
927ca606 5664- render_cap_t(m, "CapAmb:\t", &cap_ambient);
ec22aa5c
AM
5665+ /* FIXME: maybe move the p->vx_info masking to __task_cred() ? */
5666+ render_cap_t(m, "CapInh:\t", p->vx_info, &cap_inheritable);
5667+ render_cap_t(m, "CapPrm:\t", p->vx_info, &cap_permitted);
5668+ render_cap_t(m, "CapEff:\t", p->vx_info, &cap_effective);
5669+ render_cap_t(m, "CapBnd:\t", p->vx_info, &cap_bset);
927ca606 5670+ render_cap_t(m, "CapAmb:\t", p->vx_info, &cap_ambient);
d337f35e
JR
5671 }
5672
b00e13aa 5673 static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
8931d859 5674@@ -377,6 +383,43 @@ static void task_cpus_allowed(struct seq
927ca606 5675 cpumask_pr_args(&task->cpus_allowed));
2380c486
JR
5676 }
5677
5678+int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
5679+ struct pid *pid, struct task_struct *task)
5680+{
5681+ seq_printf(m, "Proxy:\t%p(%c)\n"
5682+ "Count:\t%u\n"
5683+ "uts:\t%p(%c)\n"
5684+ "ipc:\t%p(%c)\n"
5685+ "mnt:\t%p(%c)\n"
5686+ "pid:\t%p(%c)\n"
5687+ "net:\t%p(%c)\n",
5688+ task->nsproxy,
5689+ (task->nsproxy == init_task.nsproxy ? 'I' : '-'),
5690+ atomic_read(&task->nsproxy->count),
5691+ task->nsproxy->uts_ns,
5692+ (task->nsproxy->uts_ns == init_task.nsproxy->uts_ns ? 'I' : '-'),
5693+ task->nsproxy->ipc_ns,
5694+ (task->nsproxy->ipc_ns == init_task.nsproxy->ipc_ns ? 'I' : '-'),
5695+ task->nsproxy->mnt_ns,
5696+ (task->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns ? 'I' : '-'),
c2e5f7c8
JR
5697+ task->nsproxy->pid_ns_for_children,
5698+ (task->nsproxy->pid_ns_for_children ==
5699+ init_task.nsproxy->pid_ns_for_children ? 'I' : '-'),
2380c486
JR
5700+ task->nsproxy->net_ns,
5701+ (task->nsproxy->net_ns == init_task.nsproxy->net_ns ? 'I' : '-'));
5702+ return 0;
5703+}
d337f35e 5704+
2380c486
JR
5705+void task_vs_id(struct seq_file *m, struct task_struct *task)
5706+{
d337f35e 5707+ if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
2380c486
JR
5708+ return;
5709+
bb20add7
AM
5710+ seq_printf(m, "VxID:\t%d\n", vx_task_xid(task));
5711+ seq_printf(m, "NxID:\t%d\n", nx_task_nid(task));
2380c486
JR
5712+}
5713+
5714+
5715 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
5716 struct pid *pid, struct task_struct *task)
5717 {
8931d859 5718@@ -394,6 +437,7 @@ int proc_pid_status(struct seq_file *m,
b00e13aa 5719 task_seccomp(m, task);
2bf5ad28 5720 task_cpus_allowed(m, task);
2380c486
JR
5721 cpuset_task_status_allowed(m, task);
5722+ task_vs_id(m, task);
152aeb71
JR
5723 task_context_switch_counts(m, task);
5724 return 0;
5725 }
8931d859 5726@@ -497,6 +541,17 @@ static int do_task_stat(struct seq_file
d337f35e 5727 /* convert nsec -> ticks */
bb20add7 5728 start_time = nsec_to_clock_t(task->real_start_time);
d337f35e
JR
5729
5730+ /* fixup start time for virt uptime */
5731+ if (vx_flags(VXF_VIRT_UPTIME, 0)) {
5732+ unsigned long long bias =
5733+ current->vx_info->cvirt.bias_clock;
5734+
5735+ if (start_time > bias)
5736+ start_time -= bias;
5737+ else
5738+ start_time = 0;
5739+ }
5740+
1e8b8f9b
AM
5741 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
5742 seq_put_decimal_ll(m, ' ', ppid);
5743 seq_put_decimal_ll(m, ' ', pgid);
8931d859
AM
5744diff -NurpP --minimal linux-4.4.161/fs/proc/base.c linux-4.4.161-vs2.3.9.8/fs/proc/base.c
5745--- linux-4.4.161/fs/proc/base.c 2018-10-20 02:34:30.000000000 +0000
5746+++ linux-4.4.161-vs2.3.9.8/fs/proc/base.c 2018-10-20 05:50:20.000000000 +0000
09be7631 5747@@ -87,6 +87,8 @@
78865d5b 5748 #include <linux/slab.h>
db55b927 5749 #include <linux/flex_array.h>
09be7631 5750 #include <linux/posix-timers.h>
d337f35e
JR
5751+#include <linux/vs_context.h>
5752+#include <linux/vs_network.h>
763640ca
JR
5753 #ifdef CONFIG_HARDWALL
5754 #include <asm/hardwall.h>
5755 #endif
8931d859 5756@@ -1120,11 +1122,15 @@ static ssize_t oom_adj_write(struct file
537831f9 5757 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
7e46296a 5758
537831f9
AM
5759 if (oom_adj < task->signal->oom_score_adj &&
5760- !capable(CAP_SYS_RESOURCE)) {
5761+ !vx_capable(CAP_SYS_RESOURCE, VXC_OOM_ADJUST)) {
ab30d09f
AM
5762 err = -EACCES;
5763 goto err_sighand;
4a036bed 5764 }
7e46296a 5765
4a036bed 5766+ /* prevent guest processes from circumventing the oom killer */
537831f9
AM
5767+ if (vx_current_xid() && (oom_adj == OOM_DISABLE))
5768+ oom_adj = OOM_ADJUST_MIN;
7e46296a 5769+
f6c5ef8b 5770 /*
537831f9
AM
5771 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
5772 * /proc/pid/oom_score_adj instead.
8931d859 5773@@ -1689,6 +1695,8 @@ struct inode *proc_pid_make_inode(struct
ec22aa5c
AM
5774 inode->i_gid = cred->egid;
5775 rcu_read_unlock();
d337f35e
JR
5776 }
5777+ /* procfs is xid tagged */
61333608 5778+ i_tag_write(inode, (vtag_t)vx_task_xid(task));
d337f35e
JR
5779 security_task_to_inode(task, inode);
5780
5781 out:
8931d859 5782@@ -1734,6 +1742,8 @@ int pid_getattr(struct vfsmount *mnt, st
d33d7b00
AM
5783
5784 /* dentry stuff */
5785
bb20add7 5786+// static unsigned name_to_int(struct dentry *dentry);
d33d7b00
AM
5787+
5788 /*
5789 * Exceptional case: normally we are not allowed to unhash a busy
5790 * directory. In this case, however, we can do it - no aliasing problems
8931d859 5791@@ -1762,6 +1772,19 @@ int pid_revalidate(struct dentry *dentry
d33d7b00
AM
5792 task = get_proc_task(inode);
5793
5794 if (task) {
bb20add7
AM
5795+ unsigned pid = name_to_int(&dentry->d_name);
5796+
5797+ if (pid != ~0U && pid != vx_map_pid(task->pid) &&
5798+ pid != __task_pid_nr_ns(task, PIDTYPE_PID,
5799+ task_active_pid_ns(task))) {
5800+ vxdprintk(VXD_CBIT(misc, 10),
5801+ VS_Q("%*s") " dropped by pid_revalidate(%d!=%d)",
5802+ dentry->d_name.len, dentry->d_name.name,
5803+ pid, vx_map_pid(task->pid));
d33d7b00 5804+ put_task_struct(task);
bb20add7
AM
5805+ d_drop(dentry);
5806+ return 0;
d33d7b00
AM
5807+ }
5808 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
5809 task_dumpable(task)) {
5810 rcu_read_lock();
8931d859 5811@@ -2331,6 +2354,13 @@ static struct dentry *proc_pident_lookup
d337f35e
JR
5812 if (!task)
5813 goto out_no_task;
5814
2380c486 5815+ /* TODO: maybe we can come up with a generic approach? */
d337f35e
JR
5816+ if (task_vx_flags(task, VXF_HIDE_VINFO, 0) &&
5817+ (dentry->d_name.len == 5) &&
5818+ (!memcmp(dentry->d_name.name, "vinfo", 5) ||
5819+ !memcmp(dentry->d_name.name, "ninfo", 5)))
5820+ goto out;
5821+
5822 /*
5823 * Yes, it does not scale. And it should not. Don't add
5824 * new entries into /proc/<tgid>/ without very good reasons.
8931d859 5825@@ -2773,6 +2803,11 @@ static int proc_pid_personality(struct s
2380c486
JR
5826 static const struct file_operations proc_task_operations;
5827 static const struct inode_operations proc_task_inode_operations;
d337f35e 5828
bb20add7
AM
5829+extern int proc_pid_vx_info(struct seq_file *,
5830+ struct pid_namespace *, struct pid *, struct task_struct *);
5831+extern int proc_pid_nx_info(struct seq_file *,
5832+ struct pid_namespace *, struct pid *, struct task_struct *);
d337f35e 5833+
2380c486 5834 static const struct pid_entry tgid_base_stuff[] = {
ec22aa5c
AM
5835 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
5836 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
8931d859 5837@@ -2837,6 +2872,8 @@ static const struct pid_entry tgid_base_
2380c486 5838 #ifdef CONFIG_CGROUPS
bb20add7 5839 ONE("cgroup", S_IRUGO, proc_cgroup_show),
d337f35e 5840 #endif
bb20add7
AM
5841+ ONE("vinfo", S_IRUGO, proc_pid_vx_info),
5842+ ONE("ninfo", S_IRUGO, proc_pid_nx_info),
5843 ONE("oom_score", S_IRUGO, proc_oom_score),
537831f9 5844 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
93de0823 5845 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
8931d859 5846@@ -3051,7 +3088,7 @@ retry:
2380c486
JR
5847 iter.task = NULL;
5848 pid = find_ge_pid(iter.tgid, ns);
5849 if (pid) {
5850- iter.tgid = pid_nr_ns(pid, ns);
5851+ iter.tgid = pid_unmapped_nr_ns(pid, ns);
5852 iter.task = pid_task(pid, PIDTYPE_PID);
5853 /* What we to know is if the pid we have find is the
5854 * pid of a thread_group_leader. Testing for task
8931d859 5855@@ -3111,8 +3148,10 @@ int proc_pid_readdir(struct file *file,
c2e5f7c8
JR
5856 if (!has_pid_permissions(ns, iter.task, 2))
5857 continue;
db55b927 5858
c2e5f7c8
JR
5859- len = snprintf(name, sizeof(name), "%d", iter.tgid);
5860+ len = snprintf(name, sizeof(name), "%d", vx_map_tgid(iter.tgid));
5861 ctx->pos = iter.tgid + TGID_OFFSET;
2380c486 5862+ if (!vx_proc_task_visible(iter.task))
d337f35e 5863+ continue;
c2e5f7c8
JR
5864 if (!proc_fill_cache(file, ctx, name, len,
5865 proc_pid_instantiate, iter.task, NULL)) {
2380c486 5866 put_task_struct(iter.task);
8931d859 5867@@ -3249,6 +3288,7 @@ static const struct pid_entry tid_base_s
09be7631 5868 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
bb20add7 5869 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
09be7631
JR
5870 #endif
5871+ ONE("nsproxy", S_IRUGO, proc_pid_nsproxy),
5872 };
5873
c2e5f7c8 5874 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
8931d859 5875@@ -3315,6 +3355,8 @@ static struct dentry *proc_task_lookup(s
bb20add7 5876 tid = name_to_int(&dentry->d_name);
d337f35e
JR
5877 if (tid == ~0U)
5878 goto out;
5879+ if (vx_current_initpid(tid))
5880+ goto out;
5881
2380c486 5882 ns = dentry->d_sb->s_fs_info;
d337f35e 5883 rcu_read_lock();
8931d859
AM
5884diff -NurpP --minimal linux-4.4.161/fs/proc/generic.c linux-4.4.161-vs2.3.9.8/fs/proc/generic.c
5885--- linux-4.4.161/fs/proc/generic.c 2018-10-20 02:34:30.000000000 +0000
5886+++ linux-4.4.161-vs2.3.9.8/fs/proc/generic.c 2018-10-20 04:57:21.000000000 +0000
927ca606 5887@@ -22,6 +22,7 @@
d337f35e
JR
5888 #include <linux/bitops.h>
5889 #include <linux/spinlock.h>
2380c486 5890 #include <linux/completion.h>
d337f35e
JR
5891+#include <linux/vserver/inode.h>
5892 #include <asm/uaccess.h>
5893
5894 #include "internal.h"
927ca606
AM
5895@@ -66,8 +67,16 @@ static struct proc_dir_entry *pde_subdir
5896 node = node->rb_left;
5897 else if (result > 0)
5898 node = node->rb_right;
5899- else
5900+ else {
5901+ if (!vx_hide_check(0, de->vx_flags)) {
5902+ vxdprintk(VXD_CBIT(misc, 9),
5903+ VS_Q("%*s")
5904+ " hidden in pde_subdir_find()",
5905+ de->namelen, de->name);
5906+ return 0;
5907+ }
5908 return de;
bb20add7 5909+ }
927ca606
AM
5910 }
5911 return NULL;
5912 }
5913@@ -241,6 +250,8 @@ struct dentry *proc_lookup_de(struct pro
5914 return ERR_PTR(-ENOMEM);
5915 d_set_d_op(dentry, &simple_dentry_operations);
5916 d_add(dentry, inode);
ba86f833 5917+ /* generic proc entries belong to the host */
537831f9 5918+ i_tag_write(inode, 0);
927ca606 5919 return NULL;
2380c486 5920 }
927ca606
AM
5921 read_unlock(&proc_subdir_lock);
5922@@ -287,6 +298,12 @@ int proc_readdir_de(struct proc_dir_entr
c2e5f7c8
JR
5923 do {
5924 struct proc_dir_entry *next;
5925 pde_get(de);
bb20add7
AM
5926+ if (!vx_hide_check(0, de->vx_flags)) {
5927+ vxdprintk(VXD_CBIT(misc, 9),
5928+ VS_Q("%*s") " hidden in proc_readdir_de()",
5929+ de->namelen, de->name);
c2e5f7c8 5930+ goto skip;
bb20add7 5931+ }
927ca606 5932 read_unlock(&proc_subdir_lock);
c2e5f7c8
JR
5933 if (!dir_emit(ctx, de->name, de->namelen,
5934 de->low_ino, de->mode >> 12)) {
927ca606 5935@@ -294,6 +311,7 @@ int proc_readdir_de(struct proc_dir_entr
c2e5f7c8
JR
5936 return 0;
5937 }
927ca606 5938 read_lock(&proc_subdir_lock);
c2e5f7c8
JR
5939+ skip:
5940 ctx->pos++;
927ca606 5941 next = pde_subdir_next(de);
c2e5f7c8 5942 pde_put(de);
927ca606 5943@@ -387,6 +405,7 @@ static struct proc_dir_entry *__proc_cre
537831f9 5944 ent->mode = mode;
d337f35e 5945 ent->nlink = nlink;
927ca606 5946 ent->subdir = RB_ROOT;
d337f35e 5947+ ent->vx_flags = IATTR_PROC_DEFAULT;
537831f9 5948 atomic_set(&ent->count, 1);
2380c486 5949 spin_lock_init(&ent->pde_unload_lock);
2380c486 5950 INIT_LIST_HEAD(&ent->pde_openers);
927ca606 5951@@ -411,7 +430,8 @@ struct proc_dir_entry *proc_symlink(cons
d337f35e
JR
5952 kfree(ent->data);
5953 kfree(ent);
5954 ent = NULL;
5955- }
5956+ } else
5957+ ent->vx_flags = IATTR_PROC_SYMLINK;
5958 } else {
5959 kfree(ent);
5960 ent = NULL;
8931d859
AM
5961diff -NurpP --minimal linux-4.4.161/fs/proc/inode.c linux-4.4.161-vs2.3.9.8/fs/proc/inode.c
5962--- linux-4.4.161/fs/proc/inode.c 2016-01-10 23:01:32.000000000 +0000
5963+++ linux-4.4.161-vs2.3.9.8/fs/proc/inode.c 2018-10-20 04:57:21.000000000 +0000
927ca606 5964@@ -431,6 +431,8 @@ struct inode *proc_get_inode(struct supe
d337f35e
JR
5965 inode->i_uid = de->uid;
5966 inode->i_gid = de->gid;
5967 }
5968+ if (de->vx_flags)
5969+ PROC_I(inode)->vx_flags = de->vx_flags;
5970 if (de->size)
5971 inode->i_size = de->size;
5972 if (de->nlink)
8931d859
AM
5973diff -NurpP --minimal linux-4.4.161/fs/proc/internal.h linux-4.4.161-vs2.3.9.8/fs/proc/internal.h
5974--- linux-4.4.161/fs/proc/internal.h 2016-01-10 23:01:32.000000000 +0000
5975+++ linux-4.4.161-vs2.3.9.8/fs/proc/internal.h 2018-10-20 04:57:21.000000000 +0000
09be7631
JR
5976@@ -14,6 +14,7 @@
5977 #include <linux/spinlock.h>
5978 #include <linux/atomic.h>
b00e13aa 5979 #include <linux/binfmts.h>
d337f35e
JR
5980+#include <linux/vs_pid.h>
5981
09be7631
JR
5982 struct ctl_table_header;
5983 struct mempolicy;
927ca606 5984@@ -34,6 +35,7 @@ struct proc_dir_entry {
09be7631
JR
5985 nlink_t nlink;
5986 kuid_t uid;
5987 kgid_t gid;
5988+ int vx_flags;
5989 loff_t size;
5990 const struct inode_operations *proc_iops;
5991 const struct file_operations *proc_fops;
927ca606 5992@@ -51,15 +53,22 @@ struct proc_dir_entry {
09be7631
JR
5993 char name[];
5994 };
5995
5996+struct vx_info;
5997+struct nx_info;
2380c486 5998+
09be7631
JR
5999 union proc_op {
6000 int (*proc_get_link)(struct dentry *, struct path *);
09be7631
JR
6001 int (*proc_show)(struct seq_file *m,
6002 struct pid_namespace *ns, struct pid *pid,
6003 struct task_struct *task);
6004+ int (*proc_vs_read)(char *page);
6005+ int (*proc_vxi_read)(struct vx_info *vxi, char *page);
6006+ int (*proc_nxi_read)(struct nx_info *nxi, char *page);
6007 };
2380c486 6008
09be7631
JR
6009 struct proc_inode {
6010 struct pid *pid;
6011+ int vx_flags;
6012 int fd;
6013 union proc_op op;
6014 struct proc_dir_entry *pde;
927ca606 6015@@ -92,11 +101,16 @@ static inline struct pid *proc_pid(struc
d337f35e
JR
6016 return PROC_I(inode)->pid;
6017 }
6018
6019-static inline struct task_struct *get_proc_task(struct inode *inode)
6020+static inline struct task_struct *get_proc_task_real(struct inode *inode)
6021 {
6022 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
6023 }
6024
6025+static inline struct task_struct *get_proc_task(struct inode *inode)
6026+{
6027+ return vx_get_proc_task(inode, proc_pid(inode));
6028+}
6029+
09be7631 6030 static inline int task_dumpable(struct task_struct *task)
d337f35e 6031 {
09be7631 6032 int dumpable = 0;
927ca606 6033@@ -155,6 +169,8 @@ extern int proc_pid_status(struct seq_fi
09be7631
JR
6034 struct pid *, struct task_struct *);
6035 extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
6036 struct pid *, struct task_struct *);
6037+extern int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
6038+ struct pid *pid, struct task_struct *task);
6039
6040 /*
6041 * base.c
8931d859
AM
6042diff -NurpP --minimal linux-4.4.161/fs/proc/loadavg.c linux-4.4.161-vs2.3.9.8/fs/proc/loadavg.c
6043--- linux-4.4.161/fs/proc/loadavg.c 2016-01-10 23:01:32.000000000 +0000
6044+++ linux-4.4.161-vs2.3.9.8/fs/proc/loadavg.c 2018-10-20 04:57:21.000000000 +0000
ec22aa5c 6045@@ -12,15 +12,27 @@
1bc743c0 6046
ec22aa5c 6047 static int loadavg_proc_show(struct seq_file *m, void *v)
1bc743c0
JR
6048 {
6049+ unsigned long running;
6050+ unsigned int threads;
ec22aa5c 6051 unsigned long avnrun[3];
1bc743c0 6052
ec22aa5c 6053 get_avenrun(avnrun, FIXED_1/200, 0);
bd427b06 6054
ec22aa5c 6055+ if (vx_flags(VXF_VIRT_LOAD, 0)) {
eab5a9a6 6056+ struct vx_info *vxi = current_vx_info();
ec22aa5c
AM
6057+
6058+ running = atomic_read(&vxi->cvirt.nr_running);
6059+ threads = atomic_read(&vxi->cvirt.nr_threads);
6060+ } else {
6061+ running = nr_running();
6062+ threads = nr_threads;
6063+ }
6064+
6065 seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
6066 LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
6067 LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
6068 LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
1bc743c0
JR
6069- nr_running(), nr_threads,
6070+ running, threads,
6071 task_active_pid_ns(current)->last_pid);
ec22aa5c 6072 return 0;
1bc743c0 6073 }
8931d859
AM
6074diff -NurpP --minimal linux-4.4.161/fs/proc/meminfo.c linux-4.4.161-vs2.3.9.8/fs/proc/meminfo.c
6075--- linux-4.4.161/fs/proc/meminfo.c 2018-10-20 02:34:30.000000000 +0000
6076+++ linux-4.4.161-vs2.3.9.8/fs/proc/meminfo.c 2018-10-20 04:57:21.000000000 +0000
927ca606 6077@@ -43,7 +43,8 @@ static int meminfo_proc_show(struct seq_
c2e5f7c8
JR
6078 si_swapinfo(&i);
6079 committed = percpu_counter_read_positive(&vm_committed_as);
e3afe727
AM
6080
6081- cached = global_page_state(NR_FILE_PAGES) -
6082+ cached = vx_flags(VXF_VIRT_MEM, 0) ?
6083+ vx_vsi_cached(&i) : global_page_state(NR_FILE_PAGES) -
b00e13aa 6084 total_swapcache_pages() - i.bufferram;
e3afe727 6085 if (cached < 0)
d337f35e 6086 cached = 0;
8931d859 6087@@ -67,13 +68,16 @@ static int meminfo_proc_show(struct seq_
8de2f54c
AM
6088 */
6089 pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
6090 pagecache -= min(pagecache / 2, wmark_low);
6091- available += pagecache;
6092+
6093+ if (!vx_flags(VXF_VIRT_MEM, 0))
6094+ available += pagecache;
6095
6096 /*
6097 * Part of the reclaimable slab consists of items that are in use,
6098 * and cannot be freed. Cap this estimate at the low watermark.
6099 */
6100- available += global_page_state(NR_SLAB_RECLAIMABLE) -
6101+ if (!vx_flags(VXF_VIRT_MEM, 0))
6102+ available += global_page_state(NR_SLAB_RECLAIMABLE) -
6103 min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
6104
6105 if (available < 0)
8931d859
AM
6106diff -NurpP --minimal linux-4.4.161/fs/proc/root.c linux-4.4.161-vs2.3.9.8/fs/proc/root.c
6107--- linux-4.4.161/fs/proc/root.c 2018-10-20 02:34:30.000000000 +0000
6108+++ linux-4.4.161-vs2.3.9.8/fs/proc/root.c 2018-10-20 04:57:21.000000000 +0000
b00e13aa 6109@@ -20,9 +20,14 @@
2380c486
JR
6110 #include <linux/mount.h>
6111 #include <linux/pid_namespace.h>
db55b927 6112 #include <linux/parser.h>
2380c486 6113+#include <linux/vserver/inode.h>
d337f35e 6114
2380c486 6115 #include "internal.h"
d337f35e 6116
d337f35e
JR
6117+struct proc_dir_entry *proc_virtual;
6118+
6119+extern void proc_vx_init(void);
2380c486
JR
6120+
6121 static int proc_test_super(struct super_block *sb, void *data)
6122 {
6123 return sb->s_fs_info == data;
927ca606
AM
6124@@ -113,7 +118,8 @@ static struct dentry *proc_mount(struct
6125 options = data;
c2e5f7c8
JR
6126
6127 /* Does the mounter have privilege over the pid namespace? */
6128- if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
6129+ if (!vx_ns_capable(ns->user_ns,
6130+ CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
6131 return ERR_PTR(-EPERM);
6132 }
6133
927ca606 6134@@ -196,6 +202,7 @@ void __init proc_root_init(void)
bb20add7 6135 proc_tty_init();
2380c486
JR
6136 proc_mkdir("bus", NULL);
6137 proc_sys_init();
d337f35e
JR
6138+ proc_vx_init();
6139 }
6140
6141 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
927ca606 6142@@ -257,6 +264,7 @@ struct proc_dir_entry proc_root = {
2380c486
JR
6143 .proc_iops = &proc_root_inode_operations,
6144 .proc_fops = &proc_root_operations,
6145 .parent = &proc_root,
6146+ .vx_flags = IATTR_ADMIN | IATTR_WATCH,
927ca606 6147 .subdir = RB_ROOT,
a168f21d 6148 .name = "/proc",
2380c486 6149 };
8931d859
AM
6150diff -NurpP --minimal linux-4.4.161/fs/proc/self.c linux-4.4.161-vs2.3.9.8/fs/proc/self.c
6151--- linux-4.4.161/fs/proc/self.c 2016-01-10 23:01:32.000000000 +0000
6152+++ linux-4.4.161-vs2.3.9.8/fs/proc/self.c 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
6153@@ -1,6 +1,7 @@
6154 #include <linux/sched.h>
09be7631
JR
6155 #include <linux/slab.h>
6156 #include <linux/pid_namespace.h>
b00e13aa 6157+#include <linux/vserver/inode.h>
09be7631 6158 #include "internal.h"
b00e13aa
AM
6159
6160 /*
927ca606 6161@@ -52,6 +53,8 @@ int proc_setup_self(struct super_block *
09be7631
JR
6162 self = d_alloc_name(s->s_root, "self");
6163 if (self) {
6164 struct inode *inode = new_inode_pseudo(s);
6165+
6166+ // self->vx_flags = IATTR_PROC_SYMLINK;
6167 if (inode) {
6168 inode->i_ino = self_inum;
6169 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
8931d859
AM
6170diff -NurpP --minimal linux-4.4.161/fs/proc/stat.c linux-4.4.161-vs2.3.9.8/fs/proc/stat.c
6171--- linux-4.4.161/fs/proc/stat.c 2016-01-10 23:01:32.000000000 +0000
6172+++ linux-4.4.161-vs2.3.9.8/fs/proc/stat.c 2018-10-20 04:57:21.000000000 +0000
537831f9 6173@@ -9,8 +9,10 @@
1e8b8f9b
AM
6174 #include <linux/slab.h>
6175 #include <linux/time.h>
6176 #include <linux/irqnr.h>
6177+#include <linux/vserver/cvirt.h>
265de2f7 6178 #include <linux/cputime.h>
1e8b8f9b 6179 #include <linux/tick.h>
537831f9
AM
6180+#include <linux/cpuset.h>
6181
6182 #ifndef arch_irq_stat_cpu
6183 #define arch_irq_stat_cpu(cpu) 0
6184@@ -87,14 +89,26 @@ static int show_stat(struct seq_file *p,
6185 u64 sum_softirq = 0;
6186 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
6187 struct timespec boottime;
6188+ cpumask_var_t cpus_allowed;
6189+ bool virt_cpu = vx_flags(VXF_VIRT_CPU, 0);
6190
6191 user = nice = system = idle = iowait =
1e8b8f9b
AM
6192 irq = softirq = steal = 0;
6193 guest = guest_nice = 0;
6194 getboottime(&boottime);
6195+
6196+ if (vx_flags(VXF_VIRT_UPTIME, 0))
6197+ vx_vsi_boottime(&boottime);
537831f9
AM
6198+
6199+ if (virt_cpu)
6200+ cpuset_cpus_allowed(current, cpus_allowed);
1e8b8f9b
AM
6201+
6202 jif = boottime.tv_sec;
6203
6204 for_each_possible_cpu(i) {
537831f9
AM
6205+ if (virt_cpu && !cpumask_test_cpu(i, cpus_allowed))
6206+ continue;
6207+
6208 user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
6209 nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
6210 system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
6211@@ -131,6 +145,9 @@ static int show_stat(struct seq_file *p,
6212 seq_putc(p, '\n');
6213
6214 for_each_online_cpu(i) {
6215+ if (virt_cpu && !cpumask_test_cpu(i, cpus_allowed))
6216+ continue;
6217+
6218 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
6219 user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
6220 nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
8931d859
AM
6221diff -NurpP --minimal linux-4.4.161/fs/proc/uptime.c linux-4.4.161-vs2.3.9.8/fs/proc/uptime.c
6222--- linux-4.4.161/fs/proc/uptime.c 2016-01-10 23:01:32.000000000 +0000
6223+++ linux-4.4.161-vs2.3.9.8/fs/proc/uptime.c 2018-10-20 04:57:21.000000000 +0000
f6c5ef8b 6224@@ -5,6 +5,7 @@
ec22aa5c
AM
6225 #include <linux/seq_file.h>
6226 #include <linux/time.h>
f6c5ef8b 6227 #include <linux/kernel_stat.h>
ec22aa5c 6228+#include <linux/vserver/cvirt.h>
265de2f7 6229 #include <linux/cputime.h>
ec22aa5c
AM
6230
6231 static int uptime_proc_show(struct seq_file *m, void *v)
c2e5f7c8 6232@@ -24,6 +25,10 @@ static int uptime_proc_show(struct seq_f
f6c5ef8b
AM
6233 nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
6234 idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
6235 idle.tv_nsec = rem;
ec22aa5c
AM
6236+
6237+ if (vx_flags(VXF_VIRT_UPTIME, 0))
6238+ vx_vsi_uptime(&uptime, &idle);
6239+
6240 seq_printf(m, "%lu.%02lu %lu.%02lu\n",
6241 (unsigned long) uptime.tv_sec,
6242 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
8931d859
AM
6243diff -NurpP --minimal linux-4.4.161/fs/proc_namespace.c linux-4.4.161-vs2.3.9.8/fs/proc_namespace.c
6244--- linux-4.4.161/fs/proc_namespace.c 2018-10-20 02:34:30.000000000 +0000
6245+++ linux-4.4.161-vs2.3.9.8/fs/proc_namespace.c 2018-10-20 04:57:21.000000000 +0000
927ca606 6246@@ -46,6 +46,8 @@ static int show_sb_opts(struct seq_file
db55b927
AM
6247 { MS_DIRSYNC, ",dirsync" },
6248 { MS_MANDLOCK, ",mand" },
927ca606 6249 { MS_LAZYTIME, ",lazytime" },
db55b927
AM
6250+ { MS_TAGGED, ",tag" },
6251+ { MS_NOTAGCHECK, ",notagcheck" },
6252 { 0, NULL }
6253 };
6254 const struct proc_fs_info *fs_infop;
927ca606 6255@@ -82,6 +84,38 @@ static inline void mangle(struct seq_fil
db55b927
AM
6256 seq_escape(m, s, " \t\n\\");
6257 }
6258
61b0c03f
JR
6259+#ifdef CONFIG_VSERVER_EXTRA_MNT_CHECK
6260+
db55b927
AM
6261+static int mnt_is_reachable(struct vfsmount *vfsmnt)
6262+{
6263+ struct path root;
6264+ struct dentry *point;
6265+ struct mount *mnt = real_mount(vfsmnt);
6266+ struct mount *root_mnt;
6267+ int ret;
6268+
6269+ if (mnt == mnt->mnt_ns->root)
6270+ return 1;
6271+
98d9a5b1 6272+ rcu_read_lock();
db55b927
AM
6273+ root = current->fs->root;
6274+ root_mnt = real_mount(root.mnt);
6275+ point = root.dentry;
6276+
6277+ while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) {
6278+ point = mnt->mnt_mountpoint;
6279+ mnt = mnt->mnt_parent;
6280+ }
98d9a5b1 6281+ rcu_read_unlock();
db55b927
AM
6282+
6283+ ret = (mnt == root_mnt) && is_subdir(point, root.dentry);
db55b927
AM
6284+ return ret;
6285+}
61b0c03f
JR
6286+
6287+#else
6288+#define mnt_is_reachable(v) (1)
6289+#endif
db55b927
AM
6290+
6291 static void show_type(struct seq_file *m, struct super_block *sb)
6292 {
6293 mangle(m, sb->s_type->name);
927ca606 6294@@ -99,6 +133,17 @@ static int show_vfsmnt(struct seq_file *
db55b927
AM
6295 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
6296 struct super_block *sb = mnt_path.dentry->d_sb;
6297
6298+ if (vx_flags(VXF_HIDE_MOUNT, 0))
6299+ return SEQ_SKIP;
6300+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6301+ return SEQ_SKIP;
6302+
6303+ if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
6304+ mnt == current->fs->root.mnt) {
6305+ seq_puts(m, "/dev/root / ");
6306+ goto type;
6307+ }
6308+
6309 if (sb->s_op->show_devname) {
6310 err = sb->s_op->show_devname(m, mnt_path.dentry);
6311 if (err)
927ca606
AM
6312@@ -112,6 +157,7 @@ static int show_vfsmnt(struct seq_file *
6313 if (err)
6314 goto out;
db55b927
AM
6315 seq_putc(m, ' ');
6316+type:
6317 show_type(m, sb);
6318 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
6319 err = show_sb_opts(m, sb);
927ca606
AM
6320@@ -133,6 +179,11 @@ static int show_mountinfo(struct seq_fil
6321 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
db55b927
AM
6322 int err = 0;
6323
6324+ if (vx_flags(VXF_HIDE_MOUNT, 0))
6325+ return SEQ_SKIP;
6326+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6327+ return SEQ_SKIP;
6328+
6329 seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
6330 MAJOR(sb->s_dev), MINOR(sb->s_dev));
6331 if (sb->s_op->show_path)
927ca606 6332@@ -193,6 +244,17 @@ static int show_vfsstat(struct seq_file
db55b927
AM
6333 struct super_block *sb = mnt_path.dentry->d_sb;
6334 int err = 0;
6335
6336+ if (vx_flags(VXF_HIDE_MOUNT, 0))
6337+ return SEQ_SKIP;
6338+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6339+ return SEQ_SKIP;
6340+
6341+ if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
6342+ mnt == current->fs->root.mnt) {
6343+ seq_puts(m, "device /dev/root mounted on / ");
6344+ goto type;
6345+ }
6346+
6347 /* device */
6348 if (sb->s_op->show_devname) {
6349 seq_puts(m, "device ");
927ca606
AM
6350@@ -214,7 +276,7 @@ static int show_vfsstat(struct seq_file
6351 if (err)
6352 goto out;
db55b927
AM
6353 seq_putc(m, ' ');
6354-
6355+type:
6356 /* file system type */
6357 seq_puts(m, "with fstype ");
6358 show_type(m, sb);
8931d859
AM
6359diff -NurpP --minimal linux-4.4.161/fs/quota/dquot.c linux-4.4.161-vs2.3.9.8/fs/quota/dquot.c
6360--- linux-4.4.161/fs/quota/dquot.c 2018-10-20 02:34:30.000000000 +0000
6361+++ linux-4.4.161-vs2.3.9.8/fs/quota/dquot.c 2018-10-20 04:57:21.000000000 +0000
927ca606 6362@@ -1643,6 +1643,9 @@ int __dquot_alloc_space(struct inode *in
76514441 6363 int reserve = flags & DQUOT_SPACE_RESERVE;
927ca606 6364 struct dquot **dquots;
76514441
AM
6365
6366+ if ((ret = dl_alloc_space(inode, number)))
6367+ return ret;
6368+
bb20add7
AM
6369 if (!dquot_active(inode)) {
6370 inode_incr_space(inode, number, reserve);
6371 goto out;
927ca606 6372@@ -1695,6 +1698,9 @@ int dquot_alloc_inode(struct inode *inod
1e8b8f9b 6373 struct dquot_warn warn[MAXQUOTAS];
927ca606 6374 struct dquot * const *dquots;
76514441
AM
6375
6376+ if ((ret = dl_alloc_inode(inode)))
6377+ return ret;
6378+
93de0823 6379 if (!dquot_active(inode))
bb20add7
AM
6380 return 0;
6381 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
927ca606
AM
6382@@ -1797,6 +1803,8 @@ void __dquot_free_space(struct inode *in
6383 struct dquot **dquots;
bb20add7 6384 int reserve = flags & DQUOT_SPACE_RESERVE, index;
76514441
AM
6385
6386+ dl_free_space(inode, number);
6387+
93de0823 6388 if (!dquot_active(inode)) {
bb20add7
AM
6389 inode_decr_space(inode, number, reserve);
6390 return;
927ca606
AM
6391@@ -1841,6 +1849,8 @@ void dquot_free_inode(struct inode *inod
6392 struct dquot * const *dquots;
bb20add7 6393 int index;
76514441
AM
6394
6395+ dl_free_inode(inode);
6396+
93de0823 6397 if (!dquot_active(inode))
bb20add7
AM
6398 return;
6399
8931d859
AM
6400diff -NurpP --minimal linux-4.4.161/fs/quota/quota.c linux-4.4.161-vs2.3.9.8/fs/quota/quota.c
6401--- linux-4.4.161/fs/quota/quota.c 2018-10-20 02:34:30.000000000 +0000
6402+++ linux-4.4.161-vs2.3.9.8/fs/quota/quota.c 2018-10-20 05:50:20.000000000 +0000
78865d5b
AM
6403@@ -8,6 +8,7 @@
6404 #include <linux/fs.h>
6405 #include <linux/namei.h>
6406 #include <linux/slab.h>
d337f35e 6407+#include <linux/vs_context.h>
78865d5b 6408 #include <asm/current.h>
92598135 6409 #include <linux/uaccess.h>
78865d5b 6410 #include <linux/kernel.h>
8931d859 6411@@ -39,7 +40,7 @@ static int check_quotactl_permission(str
78865d5b
AM
6412 break;
6413 /*FALLTHROUGH*/
6414 default:
d337f35e
JR
6415- if (!capable(CAP_SYS_ADMIN))
6416+ if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
6417 return -EPERM;
6418 }
6419
8931d859 6420@@ -704,6 +705,46 @@ static int do_quotactl(struct super_bloc
b00e13aa
AM
6421
6422 #ifdef CONFIG_BLOCK
d337f35e 6423
d337f35e
JR
6424+#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6425+
6426+#include <linux/vroot.h>
2380c486
JR
6427+#include <linux/major.h>
6428+#include <linux/module.h>
d337f35e 6429+#include <linux/kallsyms.h>
2380c486 6430+#include <linux/vserver/debug.h>
d337f35e
JR
6431+
6432+static vroot_grb_func *vroot_get_real_bdev = NULL;
6433+
763640ca 6434+static DEFINE_SPINLOCK(vroot_grb_lock);
d337f35e
JR
6435+
6436+int register_vroot_grb(vroot_grb_func *func) {
6437+ int ret = -EBUSY;
6438+
6439+ spin_lock(&vroot_grb_lock);
6440+ if (!vroot_get_real_bdev) {
6441+ vroot_get_real_bdev = func;
6442+ ret = 0;
6443+ }
6444+ spin_unlock(&vroot_grb_lock);
6445+ return ret;
6446+}
6447+EXPORT_SYMBOL(register_vroot_grb);
6448+
6449+int unregister_vroot_grb(vroot_grb_func *func) {
6450+ int ret = -EINVAL;
6451+
6452+ spin_lock(&vroot_grb_lock);
6453+ if (vroot_get_real_bdev) {
6454+ vroot_get_real_bdev = NULL;
6455+ ret = 0;
6456+ }
6457+ spin_unlock(&vroot_grb_lock);
6458+ return ret;
6459+}
6460+EXPORT_SYMBOL(unregister_vroot_grb);
6461+
6462+#endif
6463+
db55b927
AM
6464 /* Return 1 if 'cmd' will block on frozen filesystem */
6465 static int quotactl_cmd_write(int cmd)
6466 {
8931d859 6467@@ -739,6 +780,22 @@ static struct super_block *quotactl_bloc
2380c486
JR
6468 putname(tmp);
6469 if (IS_ERR(bdev))
6470 return ERR_CAST(bdev);
6471+#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6472+ if (bdev && bdev->bd_inode &&
537831f9 6473+ imajor(bdev->bd_inode) == VROOT_MAJOR) {
2380c486
JR
6474+ struct block_device *bdnew = (void *)-EINVAL;
6475+
6476+ if (vroot_get_real_bdev)
6477+ bdnew = vroot_get_real_bdev(bdev);
6478+ else
6479+ vxdprintk(VXD_CBIT(misc, 0),
6480+ "vroot_get_real_bdev not set");
6481+ bdput(bdev);
6482+ if (IS_ERR(bdnew))
6483+ return ERR_PTR(PTR_ERR(bdnew));
6484+ bdev = bdnew;
6485+ }
6486+#endif
db55b927
AM
6487 if (quotactl_cmd_write(cmd))
6488 sb = get_super_thawed(bdev);
6489 else
8931d859
AM
6490diff -NurpP --minimal linux-4.4.161/fs/stat.c linux-4.4.161-vs2.3.9.8/fs/stat.c
6491--- linux-4.4.161/fs/stat.c 2018-10-20 02:34:30.000000000 +0000
6492+++ linux-4.4.161-vs2.3.9.8/fs/stat.c 2018-10-20 04:57:21.000000000 +0000
2380c486 6493@@ -26,6 +26,7 @@ void generic_fillattr(struct inode *inod
d337f35e
JR
6494 stat->nlink = inode->i_nlink;
6495 stat->uid = inode->i_uid;
6496 stat->gid = inode->i_gid;
6497+ stat->tag = inode->i_tag;
6498 stat->rdev = inode->i_rdev;
a168f21d 6499 stat->size = i_size_read(inode);
d337f35e 6500 stat->atime = inode->i_atime;
8931d859
AM
6501diff -NurpP --minimal linux-4.4.161/fs/statfs.c linux-4.4.161-vs2.3.9.8/fs/statfs.c
6502--- linux-4.4.161/fs/statfs.c 2016-01-10 23:01:32.000000000 +0000
6503+++ linux-4.4.161-vs2.3.9.8/fs/statfs.c 2018-10-20 04:57:21.000000000 +0000
93de0823 6504@@ -7,6 +7,8 @@
76514441
AM
6505 #include <linux/statfs.h>
6506 #include <linux/security.h>
6507 #include <linux/uaccess.h>
6508+#include <linux/vs_base.h>
6509+#include <linux/vs_dlimit.h>
db55b927 6510 #include "internal.h"
76514441 6511
93de0823 6512 static int flags_by_mnt(int mnt_flags)
db55b927 6513@@ -60,6 +62,8 @@ static int statfs_by_dentry(struct dentr
93de0823
AM
6514 retval = dentry->d_sb->s_op->statfs(dentry, buf);
6515 if (retval == 0 && buf->f_frsize == 0)
6516 buf->f_frsize = buf->f_bsize;
6517+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
6518+ vx_vsi_statfs(dentry->d_sb, buf);
76514441
AM
6519 return retval;
6520 }
93de0823 6521
8931d859
AM
6522diff -NurpP --minimal linux-4.4.161/fs/super.c linux-4.4.161-vs2.3.9.8/fs/super.c
6523--- linux-4.4.161/fs/super.c 2018-10-20 02:34:30.000000000 +0000
6524+++ linux-4.4.161-vs2.3.9.8/fs/super.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 6525@@ -33,6 +33,8 @@
be261992 6526 #include <linux/cleancache.h>
1e8b8f9b 6527 #include <linux/fsnotify.h>
92598135 6528 #include <linux/lockdep.h>
1e8b8f9b 6529+#include <linux/magic.h>
be261992
AM
6530+#include <linux/vs_context.h>
6531 #include "internal.h"
6532
6533
8931d859 6534@@ -1135,6 +1137,13 @@ mount_fs(struct file_system_type *type,
927ca606 6535 WARN_ON(!sb->s_bdi);
be261992
AM
6536 sb->s_flags |= MS_BORN;
6537
6538+ error = -EPERM;
6539+ if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) &&
6540+ !sb->s_bdev &&
6541+ (sb->s_magic != PROC_SUPER_MAGIC) &&
6542+ (sb->s_magic != DEVPTS_SUPER_MAGIC))
6543+ goto out_sb;
6544+
6545 error = security_sb_kern_mount(sb, flags, secdata);
6546 if (error)
6547 goto out_sb;
8931d859
AM
6548diff -NurpP --minimal linux-4.4.161/fs/utimes.c linux-4.4.161-vs2.3.9.8/fs/utimes.c
6549--- linux-4.4.161/fs/utimes.c 2018-10-20 02:34:30.000000000 +0000
6550+++ linux-4.4.161-vs2.3.9.8/fs/utimes.c 2018-10-20 04:57:21.000000000 +0000
2380c486
JR
6551@@ -8,6 +8,8 @@
6552 #include <linux/stat.h>
d337f35e 6553 #include <linux/utime.h>
2380c486 6554 #include <linux/syscalls.h>
d337f35e
JR
6555+#include <linux/mount.h>
6556+#include <linux/vs_cowbl.h>
6557 #include <asm/uaccess.h>
6558 #include <asm/unistd.h>
6559
c2e5f7c8 6560@@ -52,13 +54,19 @@ static int utimes_common(struct path *pa
76514441
AM
6561 {
6562 int error;
6563 struct iattr newattrs;
6564- struct inode *inode = path->dentry->d_inode;
c2e5f7c8 6565 struct inode *delegated_inode = NULL;
76514441 6566+ struct inode *inode;
b00e13aa
AM
6567+
6568+ error = cow_check_and_break(path);
6569+ if (error)
6570+ goto out;
76514441
AM
6571
6572 error = mnt_want_write(path->mnt);
6573 if (error)
6574 goto out;
6575
76514441
AM
6576+ inode = path->dentry->d_inode;
6577+
6578 if (times && times[0].tv_nsec == UTIME_NOW &&
6579 times[1].tv_nsec == UTIME_NOW)
6580 times = NULL;
8931d859
AM
6581diff -NurpP --minimal linux-4.4.161/fs/xattr.c linux-4.4.161-vs2.3.9.8/fs/xattr.c
6582--- linux-4.4.161/fs/xattr.c 2018-10-20 02:34:30.000000000 +0000
6583+++ linux-4.4.161-vs2.3.9.8/fs/xattr.c 2018-10-20 05:50:20.000000000 +0000
537831f9 6584@@ -21,6 +21,7 @@
d337f35e 6585 #include <linux/audit.h>
1e8b8f9b 6586 #include <linux/vmalloc.h>
537831f9 6587 #include <linux/posix_acl_xattr.h>
d337f35e 6588+#include <linux/mount.h>
d337f35e 6589
1e8b8f9b 6590 #include <asm/uaccess.h>
d337f35e 6591
537831f9 6592@@ -52,7 +53,7 @@ xattr_permission(struct inode *inode, co
763640ca 6593 * The trusted.* namespace can only be accessed by privileged users.
e03b8c3c 6594 */
763640ca
JR
6595 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
6596- if (!capable(CAP_SYS_ADMIN))
a168f21d
AM
6597+ if (!vx_capable(CAP_SYS_ADMIN, VXC_FS_TRUSTED))
6598 return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
6599 return 0;
6600 }
8931d859
AM
6601diff -NurpP --minimal linux-4.4.161/include/linux/capability.h linux-4.4.161-vs2.3.9.8/include/linux/capability.h
6602--- linux-4.4.161/include/linux/capability.h 2018-10-20 02:34:30.000000000 +0000
6603+++ linux-4.4.161-vs2.3.9.8/include/linux/capability.h 2018-10-20 04:57:21.000000000 +0000
927ca606 6604@@ -77,7 +77,8 @@ extern const kernel_cap_t __cap_init_eff
bb20add7
AM
6605 #else /* HAND-CODED capability initializers */
6606
6607 #define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1)
6608-#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1)
6609+#define CAP_LAST_U32_VALID_MASK ((CAP_TO_MASK(CAP_LAST_CAP + 1) -1) \
6610+ | CAP_TO_MASK(CAP_CONTEXT))
6611
6612 # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }})
6613 # define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }})
8931d859
AM
6614diff -NurpP --minimal linux-4.4.161/include/linux/cred.h linux-4.4.161-vs2.3.9.8/include/linux/cred.h
6615--- linux-4.4.161/include/linux/cred.h 2018-10-20 02:34:30.000000000 +0000
6616+++ linux-4.4.161-vs2.3.9.8/include/linux/cred.h 2018-10-20 04:57:21.000000000 +0000
f19bd705 6617@@ -161,6 +161,7 @@ extern void exit_creds(struct task_struc
1163e6ab
AM
6618 extern int copy_creds(struct task_struct *, unsigned long);
6619 extern const struct cred *get_task_cred(struct task_struct *);
6620 extern struct cred *cred_alloc_blank(void);
6621+extern struct cred *__prepare_creds(const struct cred *);
6622 extern struct cred *prepare_creds(void);
6623 extern struct cred *prepare_exec_creds(void);
6624 extern int commit_creds(struct cred *);
f19bd705 6625@@ -221,6 +222,31 @@ static inline bool cap_ambient_invariant
927ca606 6626 cred->cap_inheritable));
3bac966d 6627 }
3bac966d
AM
6628
6629+static inline void set_cred_subscribers(struct cred *cred, int n)
6630+{
6631+#ifdef CONFIG_DEBUG_CREDENTIALS
6632+ atomic_set(&cred->subscribers, n);
6633+#endif
6634+}
6635+
6636+static inline int read_cred_subscribers(const struct cred *cred)
6637+{
6638+#ifdef CONFIG_DEBUG_CREDENTIALS
6639+ return atomic_read(&cred->subscribers);
6640+#else
6641+ return 0;
6642+#endif
6643+}
6644+
6645+static inline void alter_cred_subscribers(const struct cred *_cred, int n)
6646+{
6647+#ifdef CONFIG_DEBUG_CREDENTIALS
6648+ struct cred *cred = (struct cred *) _cred;
6649+
6650+ atomic_add(n, &cred->subscribers);
6651+#endif
6652+}
6653+
6654 /**
6655 * get_new_cred - Get a reference on a new set of credentials
6656 * @cred: The new credentials to reference
8931d859
AM
6657diff -NurpP --minimal linux-4.4.161/include/linux/dcache.h linux-4.4.161-vs2.3.9.8/include/linux/dcache.h
6658--- linux-4.4.161/include/linux/dcache.h 2018-10-20 02:34:30.000000000 +0000
6659+++ linux-4.4.161-vs2.3.9.8/include/linux/dcache.h 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
6660@@ -10,6 +10,7 @@
6661 #include <linux/cache.h>
6662 #include <linux/rcupdate.h>
6663 #include <linux/lockref.h>
6664+// #include <linux/vs_limit.h>
6665
6666 struct path;
6667 struct vfsmount;
8931d859 6668@@ -352,8 +353,10 @@ extern char *dentry_path(struct dentry *
927ca606
AM
6669 */
6670 static inline struct dentry *dget_dlock(struct dentry *dentry)
6671 {
6672- if (dentry)
6673+ if (dentry) {
6674 dentry->d_lockref.count++;
6675+ // vx_dentry_inc(dentry);
6676+ }
6677 return dentry;
6678 }
6679
8931d859
AM
6680diff -NurpP --minimal linux-4.4.161/include/linux/devpts_fs.h linux-4.4.161-vs2.3.9.8/include/linux/devpts_fs.h
6681--- linux-4.4.161/include/linux/devpts_fs.h 2018-10-20 02:34:30.000000000 +0000
6682+++ linux-4.4.161-vs2.3.9.8/include/linux/devpts_fs.h 2018-10-20 04:57:21.000000000 +0000
927ca606 6683@@ -35,5 +35,4 @@ void devpts_pty_kill(struct inode *inode
2380c486
JR
6684
6685 #endif
d337f35e 6686
2380c486 6687-
d337f35e 6688 #endif /* _LINUX_DEVPTS_FS_H */
8931d859
AM
6689diff -NurpP --minimal linux-4.4.161/include/linux/fs.h linux-4.4.161-vs2.3.9.8/include/linux/fs.h
6690--- linux-4.4.161/include/linux/fs.h 2018-10-20 02:34:30.000000000 +0000
6691+++ linux-4.4.161-vs2.3.9.8/include/linux/fs.h 2018-10-20 04:57:21.000000000 +0000
927ca606 6692@@ -227,6 +227,7 @@ typedef void (dax_iodone_t)(struct buffe
2380c486
JR
6693 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
6694 #define ATTR_TIMES_SET (1 << 16)
927ca606
AM
6695 #define ATTR_TOUCH (1 << 17)
6696+#define ATTR_TAG (1 << 18)
d337f35e
JR
6697
6698 /*
bb20add7 6699 * Whiteout is represented by a char device. The following constants define the
927ca606 6700@@ -249,6 +250,7 @@ struct iattr {
d337f35e 6701 umode_t ia_mode;
42bc425c
AM
6702 kuid_t ia_uid;
6703 kgid_t ia_gid;
537831f9 6704+ ktag_t ia_tag;
d337f35e
JR
6705 loff_t ia_size;
6706 struct timespec ia_atime;
6707 struct timespec ia_mtime;
927ca606 6708@@ -587,7 +589,9 @@ struct inode {
a168f21d 6709 unsigned short i_opflags;
42bc425c
AM
6710 kuid_t i_uid;
6711 kgid_t i_gid;
2380c486 6712- unsigned int i_flags;
537831f9 6713+ ktag_t i_tag;
2380c486
JR
6714+ unsigned short i_flags;
6715+ unsigned short i_vflags;
a168f21d
AM
6716
6717 #ifdef CONFIG_FS_POSIX_ACL
6718 struct posix_acl *i_acl;
927ca606 6719@@ -616,6 +620,7 @@ struct inode {
f6c5ef8b
AM
6720 unsigned int __i_nlink;
6721 };
d33d7b00
AM
6722 dev_t i_rdev;
6723+ dev_t i_mdev;
42bc425c 6724 loff_t i_size;
a168f21d
AM
6725 struct timespec i_atime;
6726 struct timespec i_mtime;
927ca606 6727@@ -814,6 +819,11 @@ static inline gid_t i_gid_read(const str
537831f9
AM
6728 return from_kgid(&init_user_ns, inode->i_gid);
6729 }
6730
61333608 6731+static inline vtag_t i_tag_read(const struct inode *inode)
537831f9
AM
6732+{
6733+ return from_ktag(&init_user_ns, inode->i_tag);
6734+}
6735+
6736 static inline void i_uid_write(struct inode *inode, uid_t uid)
6737 {
6738 inode->i_uid = make_kuid(&init_user_ns, uid);
927ca606 6739@@ -824,14 +834,19 @@ static inline void i_gid_write(struct in
537831f9
AM
6740 inode->i_gid = make_kgid(&init_user_ns, gid);
6741 }
2380c486 6742
61333608 6743+static inline void i_tag_write(struct inode *inode, vtag_t tag)
537831f9
AM
6744+{
6745+ inode->i_tag = make_ktag(&init_user_ns, tag);
6746+}
6747+
2380c486
JR
6748 static inline unsigned iminor(const struct inode *inode)
6749 {
6750- return MINOR(inode->i_rdev);
6751+ return MINOR(inode->i_mdev);
6752 }
6753
6754 static inline unsigned imajor(const struct inode *inode)
6755 {
6756- return MAJOR(inode->i_rdev);
6757+ return MAJOR(inode->i_mdev);
6758 }
6759
6760 extern struct block_device *I_BDEV(struct inode *inode);
927ca606 6761@@ -888,6 +903,7 @@ struct file {
d337f35e
JR
6762 loff_t f_pos;
6763 struct fown_struct f_owner;
ec22aa5c 6764 const struct cred *f_cred;
61333608 6765+ vxid_t f_xid;
d337f35e
JR
6766 struct file_ra_state f_ra;
6767
2380c486 6768 u64 f_version;
927ca606 6769@@ -1022,6 +1038,7 @@ struct file_lock {
2380c486 6770 struct file *fl_file;
d337f35e
JR
6771 loff_t fl_start;
6772 loff_t fl_end;
61333608 6773+ vxid_t fl_xid;
d337f35e
JR
6774
6775 struct fasync_struct * fl_fasync; /* for lease break notifications */
f6c5ef8b 6776 /* for lease breaks: */
8931d859 6777@@ -1699,6 +1716,7 @@ struct inode_operations {
d4263eb0
JR
6778 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
6779 ssize_t (*listxattr) (struct dentry *, char *, size_t);
6780 int (*removexattr) (struct dentry *, const char *);
6781+ int (*sync_flags) (struct inode *, int, int);
d33d7b00
AM
6782 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
6783 u64 len);
42bc425c 6784 int (*update_time)(struct inode *, struct timespec *, int);
8931d859 6785@@ -1713,6 +1731,7 @@ ssize_t rw_copy_check_uvector(int type,
537831f9
AM
6786 unsigned long nr_segs, unsigned long fast_segs,
6787 struct iovec *fast_pointer,
6788 struct iovec **ret_pointer);
d337f35e
JR
6789+ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
6790
927ca606
AM
6791 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
6792 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
8931d859 6793@@ -1778,6 +1797,14 @@ struct super_operations {
927ca606
AM
6794 #else
6795 #define S_DAX 0 /* Make all the DAX code disappear */
6796 #endif
6797+#define S_IXUNLINK 16384 /* Immutable Invert on unlink */
537831f9
AM
6798+
6799+/* Linux-VServer related Inode flags */
6800+
6801+#define V_VALID 1
6802+#define V_XATTR 2
6803+#define V_BARRIER 4 /* Barrier for chroot() */
6804+#define V_COW 8 /* Copy on Write */
6805
6806 /*
6807 * Note that nosuid etc flags are inode-specific: setting some file-system
8931d859 6808@@ -1802,10 +1829,13 @@ struct super_operations {
537831f9
AM
6809 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
6810 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
6811 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
6812+#define IS_TAGGED(inode) __IS_FLG(inode, MS_TAGGED)
6813
6814 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
6815 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
6816 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
6817+#define IS_IXUNLINK(inode) ((inode)->i_flags & S_IXUNLINK)
6818+#define IS_IXORUNLINK(inode) ((IS_IXUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
6819 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
6820
6821 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
8931d859 6822@@ -1820,6 +1850,16 @@ struct super_operations {
bb20add7
AM
6823 #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
6824 (inode)->i_rdev == WHITEOUT_DEV)
537831f9
AM
6825
6826+#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_vflags & V_BARRIER))
6827+
6828+#ifdef CONFIG_VSERVER_COWBL
6829+# define IS_COW(inode) (IS_IXUNLINK(inode) && IS_IMMUTABLE(inode))
6830+# define IS_COW_LINK(inode) (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
6831+#else
6832+# define IS_COW(inode) (0)
6833+# define IS_COW_LINK(inode) (0)
6834+#endif
6835+
6836 /*
6837 * Inode state bits. Protected by inode->i_lock
6838 *
8931d859 6839@@ -2076,6 +2116,9 @@ extern struct kobject *fs_kobj;
bb20add7 6840 extern int locks_mandatory_locked(struct file *);
537831f9
AM
6841 extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
6842
6843+#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */
6844+#define ATTR_FLAG_IXUNLINK 1024 /* Immutable invert on unlink */
6845+
6846 /*
6847 * Candidates for mandatory locking have the setgid bit set
6848 * but no group execute bit - an otherwise meaningless combination.
8931d859 6849@@ -2831,6 +2874,7 @@ extern int dcache_dir_open(struct inode
d337f35e
JR
6850 extern int dcache_dir_close(struct inode *, struct file *);
6851 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
c2e5f7c8
JR
6852 extern int dcache_readdir(struct file *, struct dir_context *);
6853+extern int dcache_readdir_filter(struct file *, struct dir_context *, int (*)(struct dentry *));
76514441 6854 extern int simple_setattr(struct dentry *, struct iattr *);
d337f35e
JR
6855 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
6856 extern int simple_statfs(struct dentry *, struct kstatfs *);
8931d859
AM
6857diff -NurpP --minimal linux-4.4.161/include/linux/init_task.h linux-4.4.161-vs2.3.9.8/include/linux/init_task.h
6858--- linux-4.4.161/include/linux/init_task.h 2016-01-10 23:01:32.000000000 +0000
6859+++ linux-4.4.161-vs2.3.9.8/include/linux/init_task.h 2018-10-20 04:57:21.000000000 +0000
927ca606 6860@@ -260,6 +260,10 @@ extern struct task_group root_task_group
b00e13aa 6861 INIT_VTIME(tsk) \
927ca606
AM
6862 INIT_NUMA_BALANCING(tsk) \
6863 INIT_KASAN(tsk) \
d337f35e
JR
6864+ .xid = 0, \
6865+ .vx_info = NULL, \
6866+ .nid = 0, \
6867+ .nx_info = NULL, \
6868 }
6869
6870
8931d859
AM
6871diff -NurpP --minimal linux-4.4.161/include/linux/ipc.h linux-4.4.161-vs2.3.9.8/include/linux/ipc.h
6872--- linux-4.4.161/include/linux/ipc.h 2016-01-10 23:01:32.000000000 +0000
6873+++ linux-4.4.161-vs2.3.9.8/include/linux/ipc.h 2018-10-20 04:57:21.000000000 +0000
537831f9 6874@@ -16,6 +16,7 @@ struct kern_ipc_perm
d337f35e 6875 key_t key;
537831f9
AM
6876 kuid_t uid;
6877 kgid_t gid;
61333608 6878+ vxid_t xid;
537831f9
AM
6879 kuid_t cuid;
6880 kgid_t cgid;
db55b927 6881 umode_t mode;
8931d859
AM
6882diff -NurpP --minimal linux-4.4.161/include/linux/memcontrol.h linux-4.4.161-vs2.3.9.8/include/linux/memcontrol.h
6883--- linux-4.4.161/include/linux/memcontrol.h 2018-10-20 02:34:30.000000000 +0000
6884+++ linux-4.4.161-vs2.3.9.8/include/linux/memcontrol.h 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
6885@@ -113,6 +113,7 @@ struct cg_proto {
6886 struct mem_cgroup *memcg;
6887 };
6888
6889+
6890 #ifdef CONFIG_MEMCG
6891 struct mem_cgroup_stat_cpu {
6892 long count[MEM_CGROUP_STAT_NSTATS];
1d9ad342 6893@@ -338,6 +339,12 @@ static inline bool mem_cgroup_is_descend
927ca606
AM
6894 return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
6895 }
6896
1d9ad342
AM
6897+extern unsigned long mem_cgroup_mem_usage_pages(struct mem_cgroup *memcg);
6898+extern unsigned long mem_cgroup_mem_limit_pages(struct mem_cgroup *memcg);
6899+extern unsigned long mem_cgroup_memsw_usage_pages(struct mem_cgroup *memcg);
6900+extern unsigned long mem_cgroup_memsw_limit_pages(struct mem_cgroup *memcg);
6901+extern void dump_mem_cgroup(struct mem_cgroup *memcg);
927ca606
AM
6902+
6903 static inline bool mm_match_cgroup(struct mm_struct *mm,
6904 struct mem_cgroup *memcg)
e3afe727 6905 {
8931d859
AM
6906diff -NurpP --minimal linux-4.4.161/include/linux/mount.h linux-4.4.161-vs2.3.9.8/include/linux/mount.h
6907--- linux-4.4.161/include/linux/mount.h 2018-10-20 02:34:30.000000000 +0000
6908+++ linux-4.4.161-vs2.3.9.8/include/linux/mount.h 2018-10-20 04:57:21.000000000 +0000
927ca606 6909@@ -63,6 +63,9 @@ struct mnt_namespace;
bb20add7 6910 #define MNT_MARKED 0x4000000
927ca606 6911 #define MNT_UMOUNT 0x8000000
d337f35e 6912
2380c486
JR
6913+#define MNT_TAGID 0x10000
6914+#define MNT_NOTAG 0x20000
6915+
d337f35e 6916 struct vfsmount {
db55b927
AM
6917 struct dentry *mnt_root; /* root of the mounted tree */
6918 struct super_block *mnt_sb; /* pointer to superblock */
8931d859
AM
6919diff -NurpP --minimal linux-4.4.161/include/linux/net.h linux-4.4.161-vs2.3.9.8/include/linux/net.h
6920--- linux-4.4.161/include/linux/net.h 2018-10-20 02:34:30.000000000 +0000
6921+++ linux-4.4.161-vs2.3.9.8/include/linux/net.h 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
6922@@ -43,6 +43,7 @@ struct net;
6923 #define SOCK_NOSPACE 2
d337f35e
JR
6924 #define SOCK_PASSCRED 3
6925 #define SOCK_PASSSEC 4
927ca606 6926+#define SOCK_USER_SOCKET 5
d337f35e
JR
6927
6928 #ifndef ARCH_HAS_SOCKET_TYPES
6929 /**
8931d859
AM
6930diff -NurpP --minimal linux-4.4.161/include/linux/netdevice.h linux-4.4.161-vs2.3.9.8/include/linux/netdevice.h
6931--- linux-4.4.161/include/linux/netdevice.h 2018-10-20 02:34:30.000000000 +0000
6932+++ linux-4.4.161-vs2.3.9.8/include/linux/netdevice.h 2018-10-20 04:57:21.000000000 +0000
927ca606 6933@@ -2296,6 +2296,7 @@ static inline int dev_recursion_level(vo
c2e5f7c8
JR
6934
6935 struct net_device *dev_get_by_index(struct net *net, int ifindex);
6936 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
6937+struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex);
6938 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
6939 int netdev_get_name(struct net *net, char *name, int ifindex);
6940 int dev_restart(struct net_device *dev);
8931d859
AM
6941diff -NurpP --minimal linux-4.4.161/include/linux/nsproxy.h linux-4.4.161-vs2.3.9.8/include/linux/nsproxy.h
6942--- linux-4.4.161/include/linux/nsproxy.h 2016-01-10 23:01:32.000000000 +0000
6943+++ linux-4.4.161-vs2.3.9.8/include/linux/nsproxy.h 2018-10-20 04:57:21.000000000 +0000
2380c486 6944@@ -3,6 +3,7 @@
d337f35e 6945
2380c486
JR
6946 #include <linux/spinlock.h>
6947 #include <linux/sched.h>
6948+#include <linux/vserver/debug.h>
6949
6950 struct mnt_namespace;
6951 struct uts_namespace;
bb20add7
AM
6952@@ -63,6 +64,7 @@ extern struct nsproxy init_nsproxy;
6953 */
2380c486
JR
6954
6955 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
6956+struct nsproxy *copy_nsproxy(struct nsproxy *orig);
6957 void exit_task_namespaces(struct task_struct *tsk);
6958 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
6959 void free_nsproxy(struct nsproxy *ns);
bb20add7 6960@@ -70,16 +72,26 @@ int unshare_nsproxy_namespaces(unsigned
b00e13aa 6961 struct cred *, struct fs_struct *);
a168f21d 6962 int __init nsproxy_cache_init(void);
2380c486
JR
6963
6964-static inline void put_nsproxy(struct nsproxy *ns)
6965+#define get_nsproxy(n) __get_nsproxy(n, __FILE__, __LINE__)
d337f35e 6966+
2380c486
JR
6967+static inline void __get_nsproxy(struct nsproxy *ns,
6968+ const char *_file, int _line)
6969 {
6970- if (atomic_dec_and_test(&ns->count)) {
6971- free_nsproxy(ns);
6972- }
6973+ vxlprintk(VXD_CBIT(space, 0), "get_nsproxy(%p[%u])",
6974+ ns, atomic_read(&ns->count), _file, _line);
d337f35e 6975+ atomic_inc(&ns->count);
2380c486
JR
6976 }
6977
6978-static inline void get_nsproxy(struct nsproxy *ns)
6979+#define put_nsproxy(n) __put_nsproxy(n, __FILE__, __LINE__)
d337f35e 6980+
2380c486
JR
6981+static inline void __put_nsproxy(struct nsproxy *ns,
6982+ const char *_file, int _line)
6983 {
6984- atomic_inc(&ns->count);
6985+ vxlprintk(VXD_CBIT(space, 0), "put_nsproxy(%p[%u])",
6986+ ns, atomic_read(&ns->count), _file, _line);
6987+ if (atomic_dec_and_test(&ns->count)) {
6988+ free_nsproxy(ns);
6989+ }
6990 }
d337f35e 6991
763640ca 6992 #endif
8931d859
AM
6993diff -NurpP --minimal linux-4.4.161/include/linux/pid.h linux-4.4.161-vs2.3.9.8/include/linux/pid.h
6994--- linux-4.4.161/include/linux/pid.h 2018-10-20 02:34:30.000000000 +0000
6995+++ linux-4.4.161-vs2.3.9.8/include/linux/pid.h 2018-10-20 04:57:21.000000000 +0000
927ca606 6996@@ -10,7 +10,8 @@ enum pid_type
d337f35e 6997 PIDTYPE_SID,
927ca606
AM
6998 PIDTYPE_MAX,
6999 /* only valid to __task_pid_nr_ns() */
7000- __PIDTYPE_TGID
7001+ __PIDTYPE_TGID,
7002+ __PIDTYPE_REALPID
d337f35e
JR
7003 };
7004
7005 /*
927ca606 7006@@ -172,6 +173,7 @@ static inline pid_t pid_nr(struct pid *p
2380c486
JR
7007 }
7008
7009 pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
7010+pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns);
7011 pid_t pid_vnr(struct pid *pid);
7012
7013 #define do_each_pid_task(pid, type, task) \
8931d859
AM
7014diff -NurpP --minimal linux-4.4.161/include/linux/quotaops.h linux-4.4.161-vs2.3.9.8/include/linux/quotaops.h
7015--- linux-4.4.161/include/linux/quotaops.h 2016-01-10 23:01:32.000000000 +0000
7016+++ linux-4.4.161-vs2.3.9.8/include/linux/quotaops.h 2018-10-20 04:57:21.000000000 +0000
e22b5178
AM
7017@@ -8,6 +8,7 @@
7018 #define _LINUX_QUOTAOPS_
7019
7020 #include <linux/fs.h>
7021+#include <linux/vs_dlimit.h>
7022
76514441
AM
7023 #define DQUOT_SPACE_WARN 0x1
7024 #define DQUOT_SPACE_RESERVE 0x2
927ca606 7025@@ -211,11 +212,12 @@ static inline void dquot_drop(struct ino
76514441 7026
927ca606 7027 static inline int dquot_alloc_inode(struct inode *inode)
76514441
AM
7028 {
7029- return 0;
7030+ return dl_alloc_inode(inode);
7031 }
7032
927ca606 7033 static inline void dquot_free_inode(struct inode *inode)
e22b5178 7034 {
76514441
AM
7035+ dl_free_inode(inode);
7036 }
7037
7038 static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
927ca606 7039@@ -226,6 +228,10 @@ static inline int dquot_transfer(struct
76514441
AM
7040 static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
7041 int flags)
7042 {
7043+ int ret = 0;
7044+
7045+ if ((ret = dl_alloc_space(inode, number)))
7046+ return ret;
7047 if (!(flags & DQUOT_SPACE_RESERVE))
7048 inode_add_bytes(inode, number);
7049 return 0;
927ca606 7050@@ -236,6 +242,7 @@ static inline void __dquot_free_space(st
76514441
AM
7051 {
7052 if (!(flags & DQUOT_SPACE_RESERVE))
7053 inode_sub_bytes(inode, number);
7054+ dl_free_space(inode, number);
7055 }
7056
7057 static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
8931d859
AM
7058diff -NurpP --minimal linux-4.4.161/include/linux/sched.h linux-4.4.161-vs2.3.9.8/include/linux/sched.h
7059--- linux-4.4.161/include/linux/sched.h 2018-10-20 02:34:30.000000000 +0000
7060+++ linux-4.4.161-vs2.3.9.8/include/linux/sched.h 2018-10-20 05:50:20.000000000 +0000
0e1bbc97 7061@@ -1601,6 +1601,14 @@ struct task_struct {
2380c486 7062 #endif
42bc425c 7063 struct seccomp seccomp;
2380c486
JR
7064
7065+/* vserver context data */
7066+ struct vx_info *vx_info;
7067+ struct nx_info *nx_info;
d337f35e 7068+
61333608
AM
7069+ vxid_t xid;
7070+ vnid_t nid;
7071+ vtag_t tag;
2380c486
JR
7072+
7073 /* Thread group tracking */
7074 u32 parent_exec_id;
7075 u32 self_exec_id;
0e1bbc97 7076@@ -1928,6 +1936,11 @@ struct pid_namespace;
ec22aa5c
AM
7077 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
7078 struct pid_namespace *ns);
d337f35e 7079
2380c486
JR
7080+#include <linux/vserver/base.h>
7081+#include <linux/vserver/context.h>
7082+#include <linux/vserver/debug.h>
7083+#include <linux/vserver/pid.h>
7084+
7085 static inline pid_t task_pid_nr(struct task_struct *tsk)
7086 {
7087 return tsk->pid;
0e1bbc97 7088@@ -1941,7 +1954,8 @@ static inline pid_t task_pid_nr_ns(struc
d337f35e 7089
2380c486
JR
7090 static inline pid_t task_pid_vnr(struct task_struct *tsk)
7091 {
ec22aa5c
AM
7092- return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
7093+ // return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
7094+ return vx_map_pid(__task_pid_nr_ns(tsk, PIDTYPE_PID, NULL));
2380c486 7095 }
d337f35e 7096
d337f35e 7097
8931d859
AM
7098diff -NurpP --minimal linux-4.4.161/include/linux/shmem_fs.h linux-4.4.161-vs2.3.9.8/include/linux/shmem_fs.h
7099--- linux-4.4.161/include/linux/shmem_fs.h 2018-10-20 02:34:30.000000000 +0000
7100+++ linux-4.4.161-vs2.3.9.8/include/linux/shmem_fs.h 2018-10-20 04:57:21.000000000 +0000
bb20add7 7101@@ -10,6 +10,9 @@
2380c486 7102
a168f21d 7103 /* inode in-kernel data */
2380c486
JR
7104
7105+#define TMPFS_SUPER_MAGIC 0x01021994
7106+
7107+
7108 struct shmem_inode_info {
7109 spinlock_t lock;
bb20add7 7110 unsigned int seals; /* shmem seals */
8931d859
AM
7111diff -NurpP --minimal linux-4.4.161/include/linux/stat.h linux-4.4.161-vs2.3.9.8/include/linux/stat.h
7112--- linux-4.4.161/include/linux/stat.h 2016-01-10 23:01:32.000000000 +0000
7113+++ linux-4.4.161-vs2.3.9.8/include/linux/stat.h 2018-10-20 04:57:21.000000000 +0000
537831f9 7114@@ -25,6 +25,7 @@ struct kstat {
2380c486 7115 unsigned int nlink;
42bc425c
AM
7116 kuid_t uid;
7117 kgid_t gid;
8ce283e1 7118+ ktag_t tag;
2380c486
JR
7119 dev_t rdev;
7120 loff_t size;
7121 struct timespec atime;
8931d859
AM
7122diff -NurpP --minimal linux-4.4.161/include/linux/sunrpc/auth.h linux-4.4.161-vs2.3.9.8/include/linux/sunrpc/auth.h
7123--- linux-4.4.161/include/linux/sunrpc/auth.h 2016-01-10 23:01:32.000000000 +0000
7124+++ linux-4.4.161-vs2.3.9.8/include/linux/sunrpc/auth.h 2018-10-20 04:57:21.000000000 +0000
927ca606 7125@@ -40,6 +40,7 @@ enum {
2380c486 7126 struct auth_cred {
b00e13aa
AM
7127 kuid_t uid;
7128 kgid_t gid;
7129+ ktag_t tag;
2380c486 7130 struct group_info *group_info;
db55b927 7131 const char *principal;
c2e5f7c8 7132 unsigned long ac_flags;
8931d859
AM
7133diff -NurpP --minimal linux-4.4.161/include/linux/sunrpc/clnt.h linux-4.4.161-vs2.3.9.8/include/linux/sunrpc/clnt.h
7134--- linux-4.4.161/include/linux/sunrpc/clnt.h 2018-10-20 02:34:30.000000000 +0000
7135+++ linux-4.4.161-vs2.3.9.8/include/linux/sunrpc/clnt.h 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 7136@@ -51,7 +51,8 @@ struct rpc_clnt {
2380c486 7137 cl_discrtry : 1,/* disconnect before retry */
c2e5f7c8 7138 cl_noretranstimeo: 1,/* No retransmit timeouts */
2380c486
JR
7139 cl_autobind : 1,/* use getport() */
7140- cl_chatty : 1;/* be verbose */
7141+ cl_chatty : 1,/* be verbose */
7142+ cl_tag : 1;/* context tagging */
d337f35e 7143
2380c486
JR
7144 struct rpc_rtt * cl_rtt; /* RTO estimator data */
7145 const struct rpc_timeout *cl_timeout; /* Timeout strategy */
8931d859
AM
7146diff -NurpP --minimal linux-4.4.161/include/linux/types.h linux-4.4.161-vs2.3.9.8/include/linux/types.h
7147--- linux-4.4.161/include/linux/types.h 2016-01-10 23:01:32.000000000 +0000
7148+++ linux-4.4.161-vs2.3.9.8/include/linux/types.h 2018-10-20 04:57:21.000000000 +0000
537831f9 7149@@ -32,6 +32,9 @@ typedef __kernel_uid32_t uid_t;
2380c486
JR
7150 typedef __kernel_gid32_t gid_t;
7151 typedef __kernel_uid16_t uid16_t;
7152 typedef __kernel_gid16_t gid16_t;
61333608
AM
7153+typedef unsigned int vxid_t;
7154+typedef unsigned int vnid_t;
7155+typedef unsigned int vtag_t;
2380c486
JR
7156
7157 typedef unsigned long uintptr_t;
7158
8931d859
AM
7159diff -NurpP --minimal linux-4.4.161/include/linux/uidgid.h linux-4.4.161-vs2.3.9.8/include/linux/uidgid.h
7160--- linux-4.4.161/include/linux/uidgid.h 2016-01-10 23:01:32.000000000 +0000
7161+++ linux-4.4.161-vs2.3.9.8/include/linux/uidgid.h 2018-10-20 04:57:21.000000000 +0000
bb20add7 7162@@ -21,13 +21,17 @@ typedef struct {
537831f9
AM
7163 uid_t val;
7164 } kuid_t;
7165
7166-
7167 typedef struct {
7168 gid_t val;
7169 } kgid_t;
7170
7171+typedef struct {
61333608 7172+ vtag_t val;
537831f9
AM
7173+} ktag_t;
7174+
7175 #define KUIDT_INIT(value) (kuid_t){ value }
7176 #define KGIDT_INIT(value) (kgid_t){ value }
7177+#define KTAGT_INIT(value) (ktag_t){ value }
7178
927ca606 7179 #ifdef CONFIG_MULTIUSER
537831f9 7180 static inline uid_t __kuid_val(kuid_t uid)
927ca606 7181@@ -51,11 +55,18 @@ static inline gid_t __kgid_val(kgid_t gi
537831f9 7182 }
927ca606 7183 #endif
537831f9 7184
61333608 7185+static inline vtag_t __ktag_val(ktag_t tag)
537831f9
AM
7186+{
7187+ return tag.val;
7188+}
7189+
537831f9
AM
7190 #define GLOBAL_ROOT_UID KUIDT_INIT(0)
7191 #define GLOBAL_ROOT_GID KGIDT_INIT(0)
7192+#define GLOBAL_ROOT_TAG KTAGT_INIT(0)
7193
7194 #define INVALID_UID KUIDT_INIT(-1)
7195 #define INVALID_GID KGIDT_INIT(-1)
7196+#define INVALID_TAG KTAGT_INIT(-1)
7197
7198 static inline bool uid_eq(kuid_t left, kuid_t right)
7199 {
927ca606 7200@@ -67,6 +78,11 @@ static inline bool gid_eq(kgid_t left, k
537831f9
AM
7201 return __kgid_val(left) == __kgid_val(right);
7202 }
7203
7204+static inline bool tag_eq(ktag_t left, ktag_t right)
7205+{
7206+ return __ktag_val(left) == __ktag_val(right);
7207+}
7208+
7209 static inline bool uid_gt(kuid_t left, kuid_t right)
7210 {
7211 return __kuid_val(left) > __kuid_val(right);
927ca606
AM
7212@@ -117,13 +133,21 @@ static inline bool gid_valid(kgid_t gid)
7213 return __kgid_val(gid) != (gid_t) -1;
537831f9
AM
7214 }
7215
7216+static inline bool tag_valid(ktag_t tag)
7217+{
7218+ return !tag_eq(tag, INVALID_TAG);
7219+}
7220+
7221 #ifdef CONFIG_USER_NS
7222
7223 extern kuid_t make_kuid(struct user_namespace *from, uid_t uid);
7224 extern kgid_t make_kgid(struct user_namespace *from, gid_t gid);
c90fe048 7225+extern ktag_t make_ktag(struct user_namespace *from, gid_t gid);
537831f9
AM
7226
7227 extern uid_t from_kuid(struct user_namespace *to, kuid_t uid);
7228 extern gid_t from_kgid(struct user_namespace *to, kgid_t gid);
61333608 7229+extern vtag_t from_ktag(struct user_namespace *to, ktag_t tag);
537831f9
AM
7230+
7231 extern uid_t from_kuid_munged(struct user_namespace *to, kuid_t uid);
7232 extern gid_t from_kgid_munged(struct user_namespace *to, kgid_t gid);
7233
927ca606 7234@@ -149,6 +173,11 @@ static inline kgid_t make_kgid(struct us
537831f9
AM
7235 return KGIDT_INIT(gid);
7236 }
7237
61333608 7238+static inline ktag_t make_ktag(struct user_namespace *from, vtag_t tag)
537831f9
AM
7239+{
7240+ return KTAGT_INIT(tag);
7241+}
7242+
7243 static inline uid_t from_kuid(struct user_namespace *to, kuid_t kuid)
7244 {
7245 return __kuid_val(kuid);
927ca606 7246@@ -159,6 +188,11 @@ static inline gid_t from_kgid(struct use
537831f9
AM
7247 return __kgid_val(kgid);
7248 }
7249
61333608 7250+static inline vtag_t from_ktag(struct user_namespace *to, ktag_t ktag)
537831f9
AM
7251+{
7252+ return __ktag_val(ktag);
7253+}
7254+
7255 static inline uid_t from_kuid_munged(struct user_namespace *to, kuid_t kuid)
7256 {
7257 uid_t uid = from_kuid(to, kuid);
8931d859
AM
7258diff -NurpP --minimal linux-4.4.161/include/linux/vroot.h linux-4.4.161-vs2.3.9.8/include/linux/vroot.h
7259--- linux-4.4.161/include/linux/vroot.h 1970-01-01 00:00:00.000000000 +0000
7260+++ linux-4.4.161-vs2.3.9.8/include/linux/vroot.h 2018-10-20 04:57:21.000000000 +0000
2380c486
JR
7261@@ -0,0 +1,51 @@
7262+
7263+/*
7264+ * include/linux/vroot.h
7265+ *
d6221c00
AM
7266+ * written by Herbert P?tzl, 9/11/2002
7267+ * ported to 2.6 by Herbert P?tzl, 30/12/2004
2380c486 7268+ *
d6221c00 7269+ * Copyright (C) 2002-2007 by Herbert P?tzl.
2380c486
JR
7270+ * Redistribution of this file is permitted under the
7271+ * GNU General Public License.
7272+ */
7273+
7274+#ifndef _LINUX_VROOT_H
7275+#define _LINUX_VROOT_H
7276+
7277+
7278+#ifdef __KERNEL__
7279+
7280+/* Possible states of device */
7281+enum {
7282+ Vr_unbound,
7283+ Vr_bound,
7284+};
7285+
7286+struct vroot_device {
7287+ int vr_number;
7288+ int vr_refcnt;
7289+
7290+ struct semaphore vr_ctl_mutex;
7291+ struct block_device *vr_device;
7292+ int vr_state;
7293+};
7294+
7295+
7296+typedef struct block_device *(vroot_grb_func)(struct block_device *);
7297+
7298+extern int register_vroot_grb(vroot_grb_func *);
7299+extern int unregister_vroot_grb(vroot_grb_func *);
7300+
7301+#endif /* __KERNEL__ */
7302+
7303+#define MAX_VROOT_DEFAULT 8
7304+
7305+/*
7306+ * IOCTL commands --- we will commandeer 0x56 ('V')
7307+ */
7308+
7309+#define VROOT_SET_DEV 0x5600
7310+#define VROOT_CLR_DEV 0x5601
7311+
7312+#endif /* _LINUX_VROOT_H */
8931d859
AM
7313diff -NurpP --minimal linux-4.4.161/include/linux/vs_base.h linux-4.4.161-vs2.3.9.8/include/linux/vs_base.h
7314--- linux-4.4.161/include/linux/vs_base.h 1970-01-01 00:00:00.000000000 +0000
7315+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_base.h 2018-10-20 04:57:21.000000000 +0000
2380c486
JR
7316@@ -0,0 +1,10 @@
7317+#ifndef _VS_BASE_H
7318+#define _VS_BASE_H
7319+
7320+#include "vserver/base.h"
7321+#include "vserver/check.h"
7322+#include "vserver/debug.h"
7323+
7324+#else
7325+#warning duplicate inclusion
7326+#endif
8931d859
AM
7327diff -NurpP --minimal linux-4.4.161/include/linux/vs_context.h linux-4.4.161-vs2.3.9.8/include/linux/vs_context.h
7328--- linux-4.4.161/include/linux/vs_context.h 1970-01-01 00:00:00.000000000 +0000
7329+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_context.h 2018-10-20 04:57:21.000000000 +0000
4a036bed 7330@@ -0,0 +1,242 @@
2380c486
JR
7331+#ifndef _VS_CONTEXT_H
7332+#define _VS_CONTEXT_H
7333+
7334+#include "vserver/base.h"
7335+#include "vserver/check.h"
7336+#include "vserver/context.h"
7337+#include "vserver/history.h"
7338+#include "vserver/debug.h"
7339+
7340+#include <linux/sched.h>
7341+
7342+
7343+#define get_vx_info(i) __get_vx_info(i, __FILE__, __LINE__, __HERE__)
7344+
7345+static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
7346+ const char *_file, int _line, void *_here)
7347+{
7348+ if (!vxi)
7349+ return NULL;
7350+
7351+ vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
7352+ vxi, vxi ? vxi->vx_id : 0,
7353+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7354+ _file, _line);
7355+ __vxh_get_vx_info(vxi, _here);
7356+
7357+ atomic_inc(&vxi->vx_usecnt);
7358+ return vxi;
7359+}
7360+
7361+
7362+extern void free_vx_info(struct vx_info *);
7363+
7364+#define put_vx_info(i) __put_vx_info(i, __FILE__, __LINE__, __HERE__)
7365+
7366+static inline void __put_vx_info(struct vx_info *vxi,
7367+ const char *_file, int _line, void *_here)
7368+{
7369+ if (!vxi)
7370+ return;
7371+
7372+ vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
7373+ vxi, vxi ? vxi->vx_id : 0,
7374+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7375+ _file, _line);
7376+ __vxh_put_vx_info(vxi, _here);
7377+
7378+ if (atomic_dec_and_test(&vxi->vx_usecnt))
7379+ free_vx_info(vxi);
7380+}
7381+
7382+
7383+#define init_vx_info(p, i) \
7384+ __init_vx_info(p, i, __FILE__, __LINE__, __HERE__)
7385+
7386+static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
7387+ const char *_file, int _line, void *_here)
7388+{
7389+ if (vxi) {
7390+ vxlprintk(VXD_CBIT(xid, 3),
7391+ "init_vx_info(%p[#%d.%d])",
7392+ vxi, vxi ? vxi->vx_id : 0,
7393+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7394+ _file, _line);
7395+ __vxh_init_vx_info(vxi, vxp, _here);
7396+
7397+ atomic_inc(&vxi->vx_usecnt);
7398+ }
7399+ *vxp = vxi;
7400+}
7401+
7402+
7403+#define set_vx_info(p, i) \
7404+ __set_vx_info(p, i, __FILE__, __LINE__, __HERE__)
7405+
7406+static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
7407+ const char *_file, int _line, void *_here)
7408+{
7409+ struct vx_info *vxo;
7410+
7411+ if (!vxi)
7412+ return;
7413+
7414+ vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
7415+ vxi, vxi ? vxi->vx_id : 0,
7416+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7417+ _file, _line);
7418+ __vxh_set_vx_info(vxi, vxp, _here);
7419+
7420+ atomic_inc(&vxi->vx_usecnt);
7421+ vxo = xchg(vxp, vxi);
7422+ BUG_ON(vxo);
7423+}
7424+
7425+
7426+#define clr_vx_info(p) __clr_vx_info(p, __FILE__, __LINE__, __HERE__)
7427+
7428+static inline void __clr_vx_info(struct vx_info **vxp,
7429+ const char *_file, int _line, void *_here)
7430+{
7431+ struct vx_info *vxo;
7432+
7433+ vxo = xchg(vxp, NULL);
7434+ if (!vxo)
7435+ return;
7436+
7437+ vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
7438+ vxo, vxo ? vxo->vx_id : 0,
7439+ vxo ? atomic_read(&vxo->vx_usecnt) : 0,
7440+ _file, _line);
7441+ __vxh_clr_vx_info(vxo, vxp, _here);
7442+
7443+ if (atomic_dec_and_test(&vxo->vx_usecnt))
7444+ free_vx_info(vxo);
7445+}
7446+
7447+
7448+#define claim_vx_info(v, p) \
7449+ __claim_vx_info(v, p, __FILE__, __LINE__, __HERE__)
7450+
7451+static inline void __claim_vx_info(struct vx_info *vxi,
7452+ struct task_struct *task,
7453+ const char *_file, int _line, void *_here)
7454+{
7455+ vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
7456+ vxi, vxi ? vxi->vx_id : 0,
7457+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7458+ vxi ? atomic_read(&vxi->vx_tasks) : 0,
7459+ task, _file, _line);
7460+ __vxh_claim_vx_info(vxi, task, _here);
7461+
7462+ atomic_inc(&vxi->vx_tasks);
7463+}
7464+
7465+
7466+extern void unhash_vx_info(struct vx_info *);
7467+
7468+#define release_vx_info(v, p) \
7469+ __release_vx_info(v, p, __FILE__, __LINE__, __HERE__)
7470+
7471+static inline void __release_vx_info(struct vx_info *vxi,
7472+ struct task_struct *task,
7473+ const char *_file, int _line, void *_here)
7474+{
7475+ vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
7476+ vxi, vxi ? vxi->vx_id : 0,
7477+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7478+ vxi ? atomic_read(&vxi->vx_tasks) : 0,
7479+ task, _file, _line);
7480+ __vxh_release_vx_info(vxi, task, _here);
7481+
7482+ might_sleep();
7483+
7484+ if (atomic_dec_and_test(&vxi->vx_tasks))
7485+ unhash_vx_info(vxi);
7486+}
7487+
7488+
7489+#define task_get_vx_info(p) \
7490+ __task_get_vx_info(p, __FILE__, __LINE__, __HERE__)
7491+
7492+static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
7493+ const char *_file, int _line, void *_here)
7494+{
7495+ struct vx_info *vxi;
7496+
7497+ task_lock(p);
7498+ vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
7499+ p, _file, _line);
7500+ vxi = __get_vx_info(p->vx_info, _file, _line, _here);
7501+ task_unlock(p);
7502+ return vxi;
7503+}
7504+
7505+
7506+static inline void __wakeup_vx_info(struct vx_info *vxi)
7507+{
7508+ if (waitqueue_active(&vxi->vx_wait))
7509+ wake_up_interruptible(&vxi->vx_wait);
7510+}
7511+
7512+
7513+#define enter_vx_info(v, s) __enter_vx_info(v, s, __FILE__, __LINE__)
7514+
7515+static inline void __enter_vx_info(struct vx_info *vxi,
7516+ struct vx_info_save *vxis, const char *_file, int _line)
7517+{
7518+ vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
7519+ vxi, vxi ? vxi->vx_id : 0, vxis, current,
7520+ current->xid, current->vx_info, _file, _line);
7521+ vxis->vxi = xchg(&current->vx_info, vxi);
7522+ vxis->xid = current->xid;
7523+ current->xid = vxi ? vxi->vx_id : 0;
7524+}
7525+
7526+#define leave_vx_info(s) __leave_vx_info(s, __FILE__, __LINE__)
7527+
7528+static inline void __leave_vx_info(struct vx_info_save *vxis,
7529+ const char *_file, int _line)
7530+{
7531+ vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
7532+ vxis, vxis->xid, vxis->vxi, current,
7533+ current->xid, current->vx_info, _file, _line);
7534+ (void)xchg(&current->vx_info, vxis->vxi);
7535+ current->xid = vxis->xid;
7536+}
7537+
7538+
7539+static inline void __enter_vx_admin(struct vx_info_save *vxis)
7540+{
7541+ vxis->vxi = xchg(&current->vx_info, NULL);
61333608 7542+ vxis->xid = xchg(&current->xid, (vxid_t)0);
2380c486
JR
7543+}
7544+
7545+static inline void __leave_vx_admin(struct vx_info_save *vxis)
7546+{
7547+ (void)xchg(&current->xid, vxis->xid);
7548+ (void)xchg(&current->vx_info, vxis->vxi);
7549+}
7550+
4a036bed
AM
7551+#define task_is_init(p) \
7552+ __task_is_init(p, __FILE__, __LINE__, __HERE__)
7553+
7554+static inline int __task_is_init(struct task_struct *p,
7555+ const char *_file, int _line, void *_here)
7556+{
7557+ int is_init = is_global_init(p);
7558+
7559+ task_lock(p);
7560+ if (p->vx_info)
7561+ is_init = p->vx_info->vx_initpid == p->pid;
7562+ task_unlock(p);
7563+ return is_init;
7564+}
7565+
2380c486
JR
7566+extern void exit_vx_info(struct task_struct *, int);
7567+extern void exit_vx_info_early(struct task_struct *, int);
7568+
7569+
7570+#else
7571+#warning duplicate inclusion
7572+#endif
8931d859
AM
7573diff -NurpP --minimal linux-4.4.161/include/linux/vs_cowbl.h linux-4.4.161-vs2.3.9.8/include/linux/vs_cowbl.h
7574--- linux-4.4.161/include/linux/vs_cowbl.h 1970-01-01 00:00:00.000000000 +0000
7575+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_cowbl.h 2018-10-20 04:57:21.000000000 +0000
78865d5b 7576@@ -0,0 +1,48 @@
2380c486
JR
7577+#ifndef _VS_COWBL_H
7578+#define _VS_COWBL_H
7579+
7580+#include <linux/fs.h>
7581+#include <linux/dcache.h>
7582+#include <linux/namei.h>
78865d5b 7583+#include <linux/slab.h>
2380c486
JR
7584+
7585+extern struct dentry *cow_break_link(const char *pathname);
7586+
7587+static inline int cow_check_and_break(struct path *path)
7588+{
7589+ struct inode *inode = path->dentry->d_inode;
7590+ int error = 0;
7591+
7592+ /* do we need this check? */
7593+ if (IS_RDONLY(inode))
7594+ return -EROFS;
7595+
7596+ if (IS_COW(inode)) {
7597+ if (IS_COW_LINK(inode)) {
7598+ struct dentry *new_dentry, *old_dentry = path->dentry;
7599+ char *pp, *buf;
7600+
7601+ buf = kmalloc(PATH_MAX, GFP_KERNEL);
7602+ if (!buf) {
7603+ return -ENOMEM;
7604+ }
7605+ pp = d_path(path, buf, PATH_MAX);
7606+ new_dentry = cow_break_link(pp);
7607+ kfree(buf);
7608+ if (!IS_ERR(new_dentry)) {
7609+ path->dentry = new_dentry;
7610+ dput(old_dentry);
7611+ } else
7612+ error = PTR_ERR(new_dentry);
7613+ } else {
7614+ inode->i_flags &= ~(S_IXUNLINK | S_IMMUTABLE);
7615+ inode->i_ctime = CURRENT_TIME;
7616+ mark_inode_dirty(inode);
7617+ }
7618+ }
7619+ return error;
7620+}
7621+
7622+#else
7623+#warning duplicate inclusion
7624+#endif
8931d859
AM
7625diff -NurpP --minimal linux-4.4.161/include/linux/vs_cvirt.h linux-4.4.161-vs2.3.9.8/include/linux/vs_cvirt.h
7626--- linux-4.4.161/include/linux/vs_cvirt.h 1970-01-01 00:00:00.000000000 +0000
7627+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_cvirt.h 2018-10-20 04:57:21.000000000 +0000
2380c486
JR
7628@@ -0,0 +1,50 @@
7629+#ifndef _VS_CVIRT_H
7630+#define _VS_CVIRT_H
7631+
7632+#include "vserver/cvirt.h"
7633+#include "vserver/context.h"
7634+#include "vserver/base.h"
7635+#include "vserver/check.h"
7636+#include "vserver/debug.h"
7637+
7638+
7639+static inline void vx_activate_task(struct task_struct *p)
7640+{
7641+ struct vx_info *vxi;
7642+
7643+ if ((vxi = p->vx_info)) {
7644+ vx_update_load(vxi);
7645+ atomic_inc(&vxi->cvirt.nr_running);
7646+ }
7647+}
7648+
7649+static inline void vx_deactivate_task(struct task_struct *p)
7650+{
7651+ struct vx_info *vxi;
7652+
7653+ if ((vxi = p->vx_info)) {
7654+ vx_update_load(vxi);
7655+ atomic_dec(&vxi->cvirt.nr_running);
7656+ }
7657+}
7658+
7659+static inline void vx_uninterruptible_inc(struct task_struct *p)
7660+{
7661+ struct vx_info *vxi;
7662+
7663+ if ((vxi = p->vx_info))
7664+ atomic_inc(&vxi->cvirt.nr_uninterruptible);
7665+}
7666+
7667+static inline void vx_uninterruptible_dec(struct task_struct *p)
7668+{
7669+ struct vx_info *vxi;
7670+
7671+ if ((vxi = p->vx_info))
7672+ atomic_dec(&vxi->cvirt.nr_uninterruptible);
7673+}
7674+
7675+
7676+#else
7677+#warning duplicate inclusion
7678+#endif
8931d859
AM
7679diff -NurpP --minimal linux-4.4.161/include/linux/vs_device.h linux-4.4.161-vs2.3.9.8/include/linux/vs_device.h
7680--- linux-4.4.161/include/linux/vs_device.h 1970-01-01 00:00:00.000000000 +0000
7681+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_device.h 2018-10-20 04:57:21.000000000 +0000
2380c486
JR
7682@@ -0,0 +1,45 @@
7683+#ifndef _VS_DEVICE_H
7684+#define _VS_DEVICE_H
7685+
7686+#include "vserver/base.h"
7687+#include "vserver/device.h"
7688+#include "vserver/debug.h"
7689+
7690+
7691+#ifdef CONFIG_VSERVER_DEVICE
7692+
7693+int vs_map_device(struct vx_info *, dev_t, dev_t *, umode_t);
7694+
7695+#define vs_device_perm(v, d, m, p) \
7696+ ((vs_map_device(current_vx_info(), d, NULL, m) & (p)) == (p))
7697+
7698+#else
7699+
7700+static inline
7701+int vs_map_device(struct vx_info *vxi,
7702+ dev_t device, dev_t *target, umode_t mode)
7703+{
7704+ if (target)
7705+ *target = device;
7706+ return ~0;
7707+}
7708+
7709+#define vs_device_perm(v, d, m, p) ((p) == (p))
7710+
7711+#endif
7712+
7713+
7714+#define vs_map_chrdev(d, t, p) \
7715+ ((vs_map_device(current_vx_info(), d, t, S_IFCHR) & (p)) == (p))
7716+#define vs_map_blkdev(d, t, p) \
7717+ ((vs_map_device(current_vx_info(), d, t, S_IFBLK) & (p)) == (p))
7718+
7719+#define vs_chrdev_perm(d, p) \
7720+ vs_device_perm(current_vx_info(), d, S_IFCHR, p)
7721+#define vs_blkdev_perm(d, p) \
7722+ vs_device_perm(current_vx_info(), d, S_IFBLK, p)
7723+
7724+
7725+#else
7726+#warning duplicate inclusion
7727+#endif
8931d859
AM
7728diff -NurpP --minimal linux-4.4.161/include/linux/vs_dlimit.h linux-4.4.161-vs2.3.9.8/include/linux/vs_dlimit.h
7729--- linux-4.4.161/include/linux/vs_dlimit.h 1970-01-01 00:00:00.000000000 +0000
7730+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_dlimit.h 2018-10-20 04:57:21.000000000 +0000
2c8c5bc5 7731@@ -0,0 +1,215 @@
2380c486
JR
7732+#ifndef _VS_DLIMIT_H
7733+#define _VS_DLIMIT_H
7734+
7735+#include <linux/fs.h>
7736+
7737+#include "vserver/dlimit.h"
7738+#include "vserver/base.h"
7739+#include "vserver/debug.h"
7740+
7741+
7742+#define get_dl_info(i) __get_dl_info(i, __FILE__, __LINE__)
7743+
7744+static inline struct dl_info *__get_dl_info(struct dl_info *dli,
7745+ const char *_file, int _line)
7746+{
7747+ if (!dli)
7748+ return NULL;
7749+ vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
7750+ dli, dli ? dli->dl_tag : 0,
7751+ dli ? atomic_read(&dli->dl_usecnt) : 0,
7752+ _file, _line);
7753+ atomic_inc(&dli->dl_usecnt);
7754+ return dli;
7755+}
7756+
7757+
7758+#define free_dl_info(i) \
7759+ call_rcu(&(i)->dl_rcu, rcu_free_dl_info)
7760+
7761+#define put_dl_info(i) __put_dl_info(i, __FILE__, __LINE__)
7762+
7763+static inline void __put_dl_info(struct dl_info *dli,
7764+ const char *_file, int _line)
7765+{
7766+ if (!dli)
7767+ return;
7768+ vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
7769+ dli, dli ? dli->dl_tag : 0,
7770+ dli ? atomic_read(&dli->dl_usecnt) : 0,
7771+ _file, _line);
7772+ if (atomic_dec_and_test(&dli->dl_usecnt))
7773+ free_dl_info(dli);
7774+}
7775+
7776+
7777+#define __dlimit_char(d) ((d) ? '*' : ' ')
7778+
7779+static inline int __dl_alloc_space(struct super_block *sb,
61333608 7780+ vtag_t tag, dlsize_t nr, const char *file, int line)
2380c486
JR
7781+{
7782+ struct dl_info *dli = NULL;
7783+ int ret = 0;
7784+
7785+ if (nr == 0)
7786+ goto out;
7787+ dli = locate_dl_info(sb, tag);
7788+ if (!dli)
7789+ goto out;
7790+
7791+ spin_lock(&dli->dl_lock);
7792+ ret = (dli->dl_space_used + nr > dli->dl_space_total);
7793+ if (!ret)
7794+ dli->dl_space_used += nr;
7795+ spin_unlock(&dli->dl_lock);
7796+ put_dl_info(dli);
7797+out:
7798+ vxlprintk(VXD_CBIT(dlim, 1),
7799+ "ALLOC (%p,#%d)%c %lld bytes (%d)",
7800+ sb, tag, __dlimit_char(dli), (long long)nr,
7801+ ret, file, line);
76514441 7802+ return ret ? -ENOSPC : 0;
2380c486
JR
7803+}
7804+
7805+static inline void __dl_free_space(struct super_block *sb,
61333608 7806+ vtag_t tag, dlsize_t nr, const char *_file, int _line)
2380c486
JR
7807+{
7808+ struct dl_info *dli = NULL;
7809+
7810+ if (nr == 0)
7811+ goto out;
7812+ dli = locate_dl_info(sb, tag);
7813+ if (!dli)
7814+ goto out;
7815+
7816+ spin_lock(&dli->dl_lock);
7817+ if (dli->dl_space_used > nr)
7818+ dli->dl_space_used -= nr;
7819+ else
7820+ dli->dl_space_used = 0;
7821+ spin_unlock(&dli->dl_lock);
7822+ put_dl_info(dli);
7823+out:
7824+ vxlprintk(VXD_CBIT(dlim, 1),
7825+ "FREE (%p,#%d)%c %lld bytes",
7826+ sb, tag, __dlimit_char(dli), (long long)nr,
7827+ _file, _line);
7828+}
7829+
7830+static inline int __dl_alloc_inode(struct super_block *sb,
61333608 7831+ vtag_t tag, const char *_file, int _line)
2380c486
JR
7832+{
7833+ struct dl_info *dli;
7834+ int ret = 0;
d337f35e 7835+
2380c486
JR
7836+ dli = locate_dl_info(sb, tag);
7837+ if (!dli)
7838+ goto out;
d337f35e 7839+
2380c486 7840+ spin_lock(&dli->dl_lock);
2c8c5bc5
AM
7841+ dli->dl_inodes_used++;
7842+ ret = (dli->dl_inodes_used > dli->dl_inodes_total);
2380c486
JR
7843+ spin_unlock(&dli->dl_lock);
7844+ put_dl_info(dli);
7845+out:
7846+ vxlprintk(VXD_CBIT(dlim, 0),
7847+ "ALLOC (%p,#%d)%c inode (%d)",
7848+ sb, tag, __dlimit_char(dli), ret, _file, _line);
76514441 7849+ return ret ? -ENOSPC : 0;
2380c486 7850+}
d337f35e 7851+
2380c486 7852+static inline void __dl_free_inode(struct super_block *sb,
61333608 7853+ vtag_t tag, const char *_file, int _line)
d337f35e 7854+{
2380c486
JR
7855+ struct dl_info *dli;
7856+
7857+ dli = locate_dl_info(sb, tag);
7858+ if (!dli)
7859+ goto out;
7860+
7861+ spin_lock(&dli->dl_lock);
7862+ if (dli->dl_inodes_used > 1)
7863+ dli->dl_inodes_used--;
7864+ else
7865+ dli->dl_inodes_used = 0;
7866+ spin_unlock(&dli->dl_lock);
7867+ put_dl_info(dli);
7868+out:
7869+ vxlprintk(VXD_CBIT(dlim, 0),
7870+ "FREE (%p,#%d)%c inode",
7871+ sb, tag, __dlimit_char(dli), _file, _line);
d337f35e
JR
7872+}
7873+
61333608 7874+static inline void __dl_adjust_block(struct super_block *sb, vtag_t tag,
2380c486
JR
7875+ unsigned long long *free_blocks, unsigned long long *root_blocks,
7876+ const char *_file, int _line)
d337f35e 7877+{
2380c486
JR
7878+ struct dl_info *dli;
7879+ uint64_t broot, bfree;
7880+
7881+ dli = locate_dl_info(sb, tag);
7882+ if (!dli)
7883+ return;
7884+
7885+ spin_lock(&dli->dl_lock);
7886+ broot = (dli->dl_space_total -
7887+ (dli->dl_space_total >> 10) * dli->dl_nrlmult)
7888+ >> sb->s_blocksize_bits;
7889+ bfree = (dli->dl_space_total - dli->dl_space_used)
7890+ >> sb->s_blocksize_bits;
7891+ spin_unlock(&dli->dl_lock);
7892+
7893+ vxlprintk(VXD_CBIT(dlim, 2),
7894+ "ADJUST: %lld,%lld on %lld,%lld [mult=%d]",
7895+ (long long)bfree, (long long)broot,
7896+ *free_blocks, *root_blocks, dli->dl_nrlmult,
7897+ _file, _line);
7898+ if (free_blocks) {
7899+ if (*free_blocks > bfree)
7900+ *free_blocks = bfree;
7901+ }
7902+ if (root_blocks) {
7903+ if (*root_blocks > broot)
7904+ *root_blocks = broot;
7905+ }
7906+ put_dl_info(dli);
d337f35e
JR
7907+}
7908+
e22b5178 7909+#define dl_prealloc_space(in, bytes) \
537831f9 7910+ __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7911+ __FILE__, __LINE__ )
d337f35e 7912+
e22b5178 7913+#define dl_alloc_space(in, bytes) \
537831f9 7914+ __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7915+ __FILE__, __LINE__ )
d337f35e 7916+
e22b5178 7917+#define dl_reserve_space(in, bytes) \
537831f9 7918+ __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7919+ __FILE__, __LINE__ )
d337f35e 7920+
e22b5178
AM
7921+#define dl_claim_space(in, bytes) (0)
7922+
7923+#define dl_release_space(in, bytes) \
537831f9 7924+ __dl_free_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7925+ __FILE__, __LINE__ )
d337f35e 7926+
e22b5178 7927+#define dl_free_space(in, bytes) \
537831f9 7928+ __dl_free_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
e22b5178
AM
7929+ __FILE__, __LINE__ )
7930+
7931+
d337f35e 7932+
e22b5178 7933+#define dl_alloc_inode(in) \
537831f9 7934+ __dl_alloc_inode((in)->i_sb, i_tag_read(in), __FILE__, __LINE__ )
d337f35e 7935+
e22b5178 7936+#define dl_free_inode(in) \
537831f9 7937+ __dl_free_inode((in)->i_sb, i_tag_read(in), __FILE__, __LINE__ )
d337f35e 7938+
d337f35e 7939+
e22b5178 7940+#define dl_adjust_block(sb, tag, fb, rb) \
2380c486 7941+ __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
d337f35e 7942+
d337f35e 7943+
2380c486
JR
7944+#else
7945+#warning duplicate inclusion
7946+#endif
8931d859
AM
7947diff -NurpP --minimal linux-4.4.161/include/linux/vs_inet.h linux-4.4.161-vs2.3.9.8/include/linux/vs_inet.h
7948--- linux-4.4.161/include/linux/vs_inet.h 1970-01-01 00:00:00.000000000 +0000
7949+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_inet.h 2018-10-20 04:57:21.000000000 +0000
5cb1760b 7950@@ -0,0 +1,364 @@
d33d7b00
AM
7951+#ifndef _VS_INET_H
7952+#define _VS_INET_H
d337f35e 7953+
d33d7b00
AM
7954+#include "vserver/base.h"
7955+#include "vserver/network.h"
7956+#include "vserver/debug.h"
d337f35e 7957+
d33d7b00 7958+#define IPI_LOOPBACK htonl(INADDR_LOOPBACK)
d337f35e 7959+
d33d7b00
AM
7960+#define NXAV4(a) NIPQUAD((a)->ip[0]), NIPQUAD((a)->ip[1]), \
7961+ NIPQUAD((a)->mask), (a)->type
7962+#define NXAV4_FMT "[" NIPQUAD_FMT "-" NIPQUAD_FMT "/" NIPQUAD_FMT ":%04x]"
d337f35e 7963+
d33d7b00
AM
7964+#define NIPQUAD(addr) \
7965+ ((unsigned char *)&addr)[0], \
7966+ ((unsigned char *)&addr)[1], \
7967+ ((unsigned char *)&addr)[2], \
7968+ ((unsigned char *)&addr)[3]
d337f35e 7969+
d33d7b00 7970+#define NIPQUAD_FMT "%u.%u.%u.%u"
d337f35e 7971+
d337f35e 7972+
d33d7b00
AM
7973+static inline
7974+int v4_addr_match(struct nx_addr_v4 *nxa, __be32 addr, uint16_t tmask)
7975+{
7976+ __be32 ip = nxa->ip[0].s_addr;
7977+ __be32 mask = nxa->mask.s_addr;
7978+ __be32 bcast = ip | ~mask;
7979+ int ret = 0;
d337f35e 7980+
d33d7b00
AM
7981+ switch (nxa->type & tmask) {
7982+ case NXA_TYPE_MASK:
7983+ ret = (ip == (addr & mask));
7984+ break;
7985+ case NXA_TYPE_ADDR:
7986+ ret = 3;
7987+ if (addr == ip)
7988+ break;
7989+ /* fall through to broadcast */
7990+ case NXA_MOD_BCAST:
7991+ ret = ((tmask & NXA_MOD_BCAST) && (addr == bcast));
7992+ break;
7993+ case NXA_TYPE_RANGE:
7994+ ret = ((nxa->ip[0].s_addr <= addr) &&
7995+ (nxa->ip[1].s_addr > addr));
7996+ break;
7997+ case NXA_TYPE_ANY:
7998+ ret = 2;
7999+ break;
8000+ }
d337f35e 8001+
d33d7b00
AM
8002+ vxdprintk(VXD_CBIT(net, 0),
8003+ "v4_addr_match(%p" NXAV4_FMT "," NIPQUAD_FMT ",%04x) = %d",
8004+ nxa, NXAV4(nxa), NIPQUAD(addr), tmask, ret);
8005+ return ret;
8006+}
d337f35e 8007+
d33d7b00
AM
8008+static inline
8009+int v4_addr_in_nx_info(struct nx_info *nxi, __be32 addr, uint16_t tmask)
8010+{
8011+ struct nx_addr_v4 *nxa;
7a9e40b8 8012+ unsigned long irqflags;
d33d7b00 8013+ int ret = 1;
d337f35e 8014+
d33d7b00
AM
8015+ if (!nxi)
8016+ goto out;
d337f35e 8017+
d33d7b00
AM
8018+ ret = 2;
8019+ /* allow 127.0.0.1 when remapping lback */
8020+ if ((tmask & NXA_LOOPBACK) &&
8021+ (addr == IPI_LOOPBACK) &&
8022+ nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
8023+ goto out;
8024+ ret = 3;
8025+ /* check for lback address */
8026+ if ((tmask & NXA_MOD_LBACK) &&
8027+ (nxi->v4_lback.s_addr == addr))
8028+ goto out;
8029+ ret = 4;
8030+ /* check for broadcast address */
8031+ if ((tmask & NXA_MOD_BCAST) &&
8032+ (nxi->v4_bcast.s_addr == addr))
8033+ goto out;
8034+ ret = 5;
4bf69007 8035+
d33d7b00 8036+ /* check for v4 addresses */
7a9e40b8 8037+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
d33d7b00
AM
8038+ for (nxa = &nxi->v4; nxa; nxa = nxa->next)
8039+ if (v4_addr_match(nxa, addr, tmask))
4bf69007 8040+ goto out_unlock;
d33d7b00 8041+ ret = 0;
4bf69007 8042+out_unlock:
7a9e40b8 8043+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
d33d7b00
AM
8044+out:
8045+ vxdprintk(VXD_CBIT(net, 0),
8046+ "v4_addr_in_nx_info(%p[#%u]," NIPQUAD_FMT ",%04x) = %d",
8047+ nxi, nxi ? nxi->nx_id : 0, NIPQUAD(addr), tmask, ret);
8048+ return ret;
8049+}
d337f35e 8050+
d33d7b00
AM
8051+static inline
8052+int v4_nx_addr_match(struct nx_addr_v4 *nxa, struct nx_addr_v4 *addr, uint16_t mask)
8053+{
8054+ /* FIXME: needs full range checks */
8055+ return v4_addr_match(nxa, addr->ip[0].s_addr, mask);
8056+}
d337f35e 8057+
d33d7b00
AM
8058+static inline
8059+int v4_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v4 *nxa, uint16_t mask)
8060+{
8061+ struct nx_addr_v4 *ptr;
7a9e40b8 8062+ unsigned long irqflags;
4bf69007 8063+ int ret = 1;
d337f35e 8064+
7a9e40b8 8065+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
d33d7b00
AM
8066+ for (ptr = &nxi->v4; ptr; ptr = ptr->next)
8067+ if (v4_nx_addr_match(ptr, nxa, mask))
4bf69007
AM
8068+ goto out_unlock;
8069+ ret = 0;
8070+out_unlock:
7a9e40b8 8071+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007 8072+ return ret;
d33d7b00 8073+}
d337f35e 8074+
d33d7b00 8075+#include <net/inet_sock.h>
d337f35e 8076+
d33d7b00
AM
8077+/*
8078+ * Check if a given address matches for a socket
8079+ *
8080+ * nxi: the socket's nx_info if any
8081+ * addr: to be verified address
8082+ */
8083+static inline
8084+int v4_sock_addr_match (
8085+ struct nx_info *nxi,
8086+ struct inet_sock *inet,
8087+ __be32 addr)
8088+{
8089+ __be32 saddr = inet->inet_rcv_saddr;
8090+ __be32 bcast = nxi ? nxi->v4_bcast.s_addr : INADDR_BROADCAST;
d337f35e 8091+
d33d7b00
AM
8092+ if (addr && (saddr == addr || bcast == addr))
8093+ return 1;
8094+ if (!saddr)
8095+ return v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND);
8096+ return 0;
8097+}
d337f35e 8098+
d337f35e 8099+
d33d7b00 8100+/* inet related checks and helpers */
d337f35e
JR
8101+
8102+
d33d7b00
AM
8103+struct in_ifaddr;
8104+struct net_device;
8105+struct sock;
d337f35e 8106+
d33d7b00 8107+#ifdef CONFIG_INET
d337f35e 8108+
d33d7b00
AM
8109+#include <linux/netdevice.h>
8110+#include <linux/inetdevice.h>
8111+#include <net/inet_sock.h>
8112+#include <net/inet_timewait_sock.h>
d337f35e 8113+
d337f35e 8114+
d33d7b00
AM
8115+int dev_in_nx_info(struct net_device *, struct nx_info *);
8116+int v4_dev_in_nx_info(struct net_device *, struct nx_info *);
8117+int nx_v4_addr_conflict(struct nx_info *, struct nx_info *);
d337f35e 8118+
d337f35e 8119+
d33d7b00
AM
8120+/*
8121+ * check if address is covered by socket
8122+ *
8123+ * sk: the socket to check against
8124+ * addr: the address in question (must be != 0)
8125+ */
d337f35e 8126+
d33d7b00
AM
8127+static inline
8128+int __v4_addr_match_socket(const struct sock *sk, struct nx_addr_v4 *nxa)
8129+{
8130+ struct nx_info *nxi = sk->sk_nx_info;
c2e5f7c8 8131+ __be32 saddr = sk->sk_rcv_saddr;
d337f35e 8132+
d33d7b00
AM
8133+ vxdprintk(VXD_CBIT(net, 5),
8134+ "__v4_addr_in_socket(%p," NXAV4_FMT ") %p:" NIPQUAD_FMT " %p;%lx",
8135+ sk, NXAV4(nxa), nxi, NIPQUAD(saddr), sk->sk_socket,
8136+ (sk->sk_socket?sk->sk_socket->flags:0));
d337f35e 8137+
d33d7b00
AM
8138+ if (saddr) { /* direct address match */
8139+ return v4_addr_match(nxa, saddr, -1);
8140+ } else if (nxi) { /* match against nx_info */
8141+ return v4_nx_addr_in_nx_info(nxi, nxa, -1);
8142+ } else { /* unrestricted any socket */
8143+ return 1;
8144+ }
8145+}
d337f35e
JR
8146+
8147+
d337f35e 8148+
d33d7b00
AM
8149+static inline
8150+int nx_dev_visible(struct nx_info *nxi, struct net_device *dev)
8151+{
8152+ vxdprintk(VXD_CBIT(net, 1),
8153+ "nx_dev_visible(%p[#%u],%p " VS_Q("%s") ") %d",
8154+ nxi, nxi ? nxi->nx_id : 0, dev, dev->name,
8155+ nxi ? dev_in_nx_info(dev, nxi) : 0);
d337f35e 8156+
d33d7b00
AM
8157+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8158+ return 1;
8159+ if (dev_in_nx_info(dev, nxi))
8160+ return 1;
8161+ return 0;
8162+}
d337f35e
JR
8163+
8164+
d33d7b00
AM
8165+static inline
8166+int v4_ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
8167+{
8168+ if (!nxi)
8169+ return 1;
8170+ if (!ifa)
8171+ return 0;
8172+ return v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW);
8173+}
d337f35e 8174+
d33d7b00
AM
8175+static inline
8176+int nx_v4_ifa_visible(struct nx_info *nxi, struct in_ifaddr *ifa)
8177+{
8178+ vxdprintk(VXD_CBIT(net, 1), "nx_v4_ifa_visible(%p[#%u],%p) %d",
8179+ nxi, nxi ? nxi->nx_id : 0, ifa,
8180+ nxi ? v4_ifa_in_nx_info(ifa, nxi) : 0);
d337f35e 8181+
d33d7b00
AM
8182+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8183+ return 1;
8184+ if (v4_ifa_in_nx_info(ifa, nxi))
8185+ return 1;
8186+ return 0;
8187+}
d337f35e 8188+
d337f35e 8189+
d33d7b00
AM
8190+struct nx_v4_sock_addr {
8191+ __be32 saddr; /* Address used for validation */
8192+ __be32 baddr; /* Address used for socket bind */
8193+};
d337f35e 8194+
d33d7b00
AM
8195+static inline
8196+int v4_map_sock_addr(struct inet_sock *inet, struct sockaddr_in *addr,
8197+ struct nx_v4_sock_addr *nsa)
8198+{
8199+ struct sock *sk = &inet->sk;
8200+ struct nx_info *nxi = sk->sk_nx_info;
8201+ __be32 saddr = addr->sin_addr.s_addr;
8202+ __be32 baddr = saddr;
d337f35e 8203+
d33d7b00
AM
8204+ vxdprintk(VXD_CBIT(net, 3),
8205+ "inet_bind(%p)* %p,%p;%lx " NIPQUAD_FMT,
8206+ sk, sk->sk_nx_info, sk->sk_socket,
8207+ (sk->sk_socket ? sk->sk_socket->flags : 0),
8208+ NIPQUAD(saddr));
d337f35e 8209+
d33d7b00
AM
8210+ if (nxi) {
8211+ if (saddr == INADDR_ANY) {
8212+ if (nx_info_flags(nxi, NXF_SINGLE_IP, 0))
8213+ baddr = nxi->v4.ip[0].s_addr;
8214+ } else if (saddr == IPI_LOOPBACK) {
8215+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
8216+ baddr = nxi->v4_lback.s_addr;
9795bf04
AM
8217+ } else if (!ipv4_is_multicast(saddr) ||
8218+ !nx_info_ncaps(nxi, NXC_MULTICAST)) {
8219+ /* normal address bind */
d33d7b00
AM
8220+ if (!v4_addr_in_nx_info(nxi, saddr, NXA_MASK_BIND))
8221+ return -EADDRNOTAVAIL;
8222+ }
8223+ }
d337f35e 8224+
d33d7b00
AM
8225+ vxdprintk(VXD_CBIT(net, 3),
8226+ "inet_bind(%p) " NIPQUAD_FMT ", " NIPQUAD_FMT,
8227+ sk, NIPQUAD(saddr), NIPQUAD(baddr));
d337f35e 8228+
d33d7b00
AM
8229+ nsa->saddr = saddr;
8230+ nsa->baddr = baddr;
8231+ return 0;
8232+}
d337f35e 8233+
d33d7b00
AM
8234+static inline
8235+void v4_set_sock_addr(struct inet_sock *inet, struct nx_v4_sock_addr *nsa)
8236+{
8237+ inet->inet_saddr = nsa->baddr;
8238+ inet->inet_rcv_saddr = nsa->baddr;
8239+}
d337f35e 8240+
d337f35e 8241+
d33d7b00
AM
8242+/*
8243+ * helper to simplify inet_lookup_listener
8244+ *
8245+ * nxi: the socket's nx_info if any
8246+ * addr: to be verified address
8247+ * saddr: socket address
8248+ */
8249+static inline int v4_inet_addr_match (
8250+ struct nx_info *nxi,
8251+ __be32 addr,
8252+ __be32 saddr)
8253+{
8254+ if (addr && (saddr == addr))
8255+ return 1;
8256+ if (!saddr)
8257+ return nxi ? v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND) : 1;
8258+ return 0;
8259+}
d337f35e 8260+
d33d7b00
AM
8261+static inline __be32 nx_map_sock_lback(struct nx_info *nxi, __be32 addr)
8262+{
8263+ if (nx_info_flags(nxi, NXF_HIDE_LBACK, 0) &&
8264+ (addr == nxi->v4_lback.s_addr))
8265+ return IPI_LOOPBACK;
8266+ return addr;
8267+}
d337f35e 8268+
d33d7b00
AM
8269+static inline
8270+int nx_info_has_v4(struct nx_info *nxi)
8271+{
8272+ if (!nxi)
8273+ return 1;
8274+ if (NX_IPV4(nxi))
8275+ return 1;
8276+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
8277+ return 1;
8278+ return 0;
8279+}
d337f35e 8280+
d33d7b00 8281+#else /* CONFIG_INET */
d337f35e 8282+
d33d7b00
AM
8283+static inline
8284+int nx_dev_visible(struct nx_info *n, struct net_device *d)
8285+{
8286+ return 1;
8287+}
d337f35e 8288+
d33d7b00
AM
8289+static inline
8290+int nx_v4_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
8291+{
8292+ return 1;
8293+}
d337f35e 8294+
d33d7b00
AM
8295+static inline
8296+int v4_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
8297+{
8298+ return 1;
8299+}
d337f35e 8300+
d33d7b00
AM
8301+static inline
8302+int nx_info_has_v4(struct nx_info *nxi)
8303+{
8304+ return 0;
8305+}
d337f35e 8306+
d33d7b00 8307+#endif /* CONFIG_INET */
d337f35e 8308+
d33d7b00
AM
8309+#define current_nx_info_has_v4() \
8310+ nx_info_has_v4(current_nx_info())
d337f35e 8311+
d33d7b00
AM
8312+#else
8313+// #warning duplicate inclusion
3bac966d 8314+#endif
8931d859
AM
8315diff -NurpP --minimal linux-4.4.161/include/linux/vs_inet6.h linux-4.4.161-vs2.3.9.8/include/linux/vs_inet6.h
8316--- linux-4.4.161/include/linux/vs_inet6.h 1970-01-01 00:00:00.000000000 +0000
8317+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_inet6.h 2018-10-20 04:57:21.000000000 +0000
5cb1760b 8318@@ -0,0 +1,257 @@
d33d7b00
AM
8319+#ifndef _VS_INET6_H
8320+#define _VS_INET6_H
4a036bed 8321+
d33d7b00
AM
8322+#include "vserver/base.h"
8323+#include "vserver/network.h"
8324+#include "vserver/debug.h"
d337f35e 8325+
d33d7b00 8326+#include <net/ipv6.h>
d337f35e 8327+
d33d7b00
AM
8328+#define NXAV6(a) &(a)->ip, &(a)->mask, (a)->prefix, (a)->type
8329+#define NXAV6_FMT "[%pI6/%pI6/%d:%04x]"
7e46296a 8330+
7e46296a 8331+
d33d7b00 8332+#ifdef CONFIG_IPV6
7e46296a 8333+
d33d7b00
AM
8334+static inline
8335+int v6_addr_match(struct nx_addr_v6 *nxa,
8336+ const struct in6_addr *addr, uint16_t mask)
8337+{
8338+ int ret = 0;
7e46296a 8339+
d33d7b00
AM
8340+ switch (nxa->type & mask) {
8341+ case NXA_TYPE_MASK:
8342+ ret = ipv6_masked_addr_cmp(&nxa->ip, &nxa->mask, addr);
8343+ break;
8344+ case NXA_TYPE_ADDR:
8345+ ret = ipv6_addr_equal(&nxa->ip, addr);
8346+ break;
8347+ case NXA_TYPE_ANY:
8348+ ret = 1;
8349+ break;
8350+ }
8351+ vxdprintk(VXD_CBIT(net, 0),
8352+ "v6_addr_match(%p" NXAV6_FMT ",%pI6,%04x) = %d",
8353+ nxa, NXAV6(nxa), addr, mask, ret);
8354+ return ret;
8355+}
7e46296a 8356+
d33d7b00
AM
8357+static inline
8358+int v6_addr_in_nx_info(struct nx_info *nxi,
8359+ const struct in6_addr *addr, uint16_t mask)
8360+{
8361+ struct nx_addr_v6 *nxa;
7a9e40b8 8362+ unsigned long irqflags;
d33d7b00 8363+ int ret = 1;
d337f35e 8364+
d33d7b00
AM
8365+ if (!nxi)
8366+ goto out;
4bf69007 8367+
7a9e40b8 8368+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
d33d7b00
AM
8369+ for (nxa = &nxi->v6; nxa; nxa = nxa->next)
8370+ if (v6_addr_match(nxa, addr, mask))
4bf69007 8371+ goto out_unlock;
d33d7b00 8372+ ret = 0;
4bf69007 8373+out_unlock:
7a9e40b8 8374+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
d33d7b00
AM
8375+out:
8376+ vxdprintk(VXD_CBIT(net, 0),
8377+ "v6_addr_in_nx_info(%p[#%u],%pI6,%04x) = %d",
8378+ nxi, nxi ? nxi->nx_id : 0, addr, mask, ret);
8379+ return ret;
8380+}
d337f35e 8381+
d33d7b00
AM
8382+static inline
8383+int v6_nx_addr_match(struct nx_addr_v6 *nxa, struct nx_addr_v6 *addr, uint16_t mask)
8384+{
8385+ /* FIXME: needs full range checks */
8386+ return v6_addr_match(nxa, &addr->ip, mask);
8387+}
d337f35e 8388+
d33d7b00
AM
8389+static inline
8390+int v6_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v6 *nxa, uint16_t mask)
8391+{
8392+ struct nx_addr_v6 *ptr;
7a9e40b8 8393+ unsigned long irqflags;
4bf69007 8394+ int ret = 1;
d337f35e 8395+
7a9e40b8 8396+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
d33d7b00
AM
8397+ for (ptr = &nxi->v6; ptr; ptr = ptr->next)
8398+ if (v6_nx_addr_match(ptr, nxa, mask))
4bf69007
AM
8399+ goto out_unlock;
8400+ ret = 0;
8401+out_unlock:
7a9e40b8 8402+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007 8403+ return ret;
d33d7b00 8404+}
d337f35e 8405+
d337f35e 8406+
d33d7b00
AM
8407+/*
8408+ * Check if a given address matches for a socket
8409+ *
8410+ * nxi: the socket's nx_info if any
8411+ * addr: to be verified address
8412+ */
8413+static inline
8414+int v6_sock_addr_match (
8415+ struct nx_info *nxi,
8416+ struct inet_sock *inet,
8417+ struct in6_addr *addr)
8418+{
8419+ struct sock *sk = &inet->sk;
c2e5f7c8 8420+ const struct in6_addr *saddr = inet6_rcv_saddr(sk);
d337f35e 8421+
d33d7b00
AM
8422+ if (!ipv6_addr_any(addr) &&
8423+ ipv6_addr_equal(saddr, addr))
8424+ return 1;
8425+ if (ipv6_addr_any(saddr))
8426+ return v6_addr_in_nx_info(nxi, addr, -1);
8427+ return 0;
8428+}
d337f35e 8429+
d33d7b00
AM
8430+/*
8431+ * check if address is covered by socket
8432+ *
8433+ * sk: the socket to check against
8434+ * addr: the address in question (must be != 0)
8435+ */
d337f35e 8436+
d33d7b00
AM
8437+static inline
8438+int __v6_addr_match_socket(const struct sock *sk, struct nx_addr_v6 *nxa)
8439+{
8440+ struct nx_info *nxi = sk->sk_nx_info;
c2e5f7c8 8441+ const struct in6_addr *saddr = inet6_rcv_saddr(sk);
d337f35e 8442+
d33d7b00
AM
8443+ vxdprintk(VXD_CBIT(net, 5),
8444+ "__v6_addr_in_socket(%p," NXAV6_FMT ") %p:%pI6 %p;%lx",
8445+ sk, NXAV6(nxa), nxi, saddr, sk->sk_socket,
8446+ (sk->sk_socket?sk->sk_socket->flags:0));
d337f35e 8447+
d33d7b00
AM
8448+ if (!ipv6_addr_any(saddr)) { /* direct address match */
8449+ return v6_addr_match(nxa, saddr, -1);
8450+ } else if (nxi) { /* match against nx_info */
8451+ return v6_nx_addr_in_nx_info(nxi, nxa, -1);
8452+ } else { /* unrestricted any socket */
8453+ return 1;
8454+ }
8455+}
d337f35e 8456+
d337f35e 8457+
d33d7b00 8458+/* inet related checks and helpers */
d337f35e 8459+
d337f35e 8460+
d33d7b00
AM
8461+struct in_ifaddr;
8462+struct net_device;
8463+struct sock;
d337f35e
JR
8464+
8465+
d33d7b00
AM
8466+#include <linux/netdevice.h>
8467+#include <linux/inetdevice.h>
8468+#include <net/inet_timewait_sock.h>
d337f35e 8469+
d337f35e 8470+
d33d7b00
AM
8471+int dev_in_nx_info(struct net_device *, struct nx_info *);
8472+int v6_dev_in_nx_info(struct net_device *, struct nx_info *);
8473+int nx_v6_addr_conflict(struct nx_info *, struct nx_info *);
d337f35e
JR
8474+
8475+
3bac966d 8476+
d33d7b00
AM
8477+static inline
8478+int v6_ifa_in_nx_info(struct inet6_ifaddr *ifa, struct nx_info *nxi)
adc1caaa 8479+{
d33d7b00
AM
8480+ if (!nxi)
8481+ return 1;
8482+ if (!ifa)
8483+ return 0;
8484+ return v6_addr_in_nx_info(nxi, &ifa->addr, -1);
8485+}
d337f35e 8486+
d33d7b00
AM
8487+static inline
8488+int nx_v6_ifa_visible(struct nx_info *nxi, struct inet6_ifaddr *ifa)
8489+{
8490+ vxdprintk(VXD_CBIT(net, 1), "nx_v6_ifa_visible(%p[#%u],%p) %d",
8491+ nxi, nxi ? nxi->nx_id : 0, ifa,
8492+ nxi ? v6_ifa_in_nx_info(ifa, nxi) : 0);
d337f35e 8493+
d33d7b00
AM
8494+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8495+ return 1;
8496+ if (v6_ifa_in_nx_info(ifa, nxi))
8497+ return 1;
8498+ return 0;
adc1caaa 8499+}
d337f35e 8500+
d337f35e 8501+
d33d7b00
AM
8502+struct nx_v6_sock_addr {
8503+ struct in6_addr saddr; /* Address used for validation */
8504+ struct in6_addr baddr; /* Address used for socket bind */
8505+};
8506+
8507+static inline
8508+int v6_map_sock_addr(struct inet_sock *inet, struct sockaddr_in6 *addr,
8509+ struct nx_v6_sock_addr *nsa)
8510+{
8511+ // struct sock *sk = &inet->sk;
8512+ // struct nx_info *nxi = sk->sk_nx_info;
8513+ struct in6_addr saddr = addr->sin6_addr;
8514+ struct in6_addr baddr = saddr;
3bac966d 8515+
d33d7b00
AM
8516+ nsa->saddr = saddr;
8517+ nsa->baddr = baddr;
8518+ return 0;
8519+}
3bac966d 8520+
d33d7b00
AM
8521+static inline
8522+void v6_set_sock_addr(struct inet_sock *inet, struct nx_v6_sock_addr *nsa)
8523+{
8524+ // struct sock *sk = &inet->sk;
8525+ // struct in6_addr *saddr = inet6_rcv_saddr(sk);
3bac966d 8526+
d33d7b00
AM
8527+ // *saddr = nsa->baddr;
8528+ // inet->inet_saddr = nsa->baddr;
8529+}
3bac966d 8530+
d33d7b00
AM
8531+static inline
8532+int nx_info_has_v6(struct nx_info *nxi)
8533+{
8534+ if (!nxi)
8535+ return 1;
8536+ if (NX_IPV6(nxi))
8537+ return 1;
8538+ return 0;
8539+}
3bac966d 8540+
d33d7b00 8541+#else /* CONFIG_IPV6 */
d337f35e 8542+
2380c486 8543+static inline
d33d7b00 8544+int nx_v6_dev_visible(struct nx_info *n, struct net_device *d)
2380c486 8545+{
d33d7b00 8546+ return 1;
d337f35e
JR
8547+}
8548+
3bac966d 8549+
adc1caaa 8550+static inline
d33d7b00 8551+int nx_v6_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
adc1caaa 8552+{
d33d7b00 8553+ return 1;
adc1caaa 8554+}
2380c486 8555+
d33d7b00
AM
8556+static inline
8557+int v6_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
8558+{
8559+ return 1;
8560+}
8561+
8562+static inline
8563+int nx_info_has_v6(struct nx_info *nxi)
8564+{
8565+ return 0;
8566+}
2380c486 8567+
d33d7b00 8568+#endif /* CONFIG_IPV6 */
d337f35e 8569+
d33d7b00
AM
8570+#define current_nx_info_has_v6() \
8571+ nx_info_has_v6(current_nx_info())
3bac966d 8572+
d337f35e 8573+#else
d33d7b00 8574+#warning duplicate inclusion
d337f35e 8575+#endif
8931d859
AM
8576diff -NurpP --minimal linux-4.4.161/include/linux/vs_limit.h linux-4.4.161-vs2.3.9.8/include/linux/vs_limit.h
8577--- linux-4.4.161/include/linux/vs_limit.h 1970-01-01 00:00:00.000000000 +0000
8578+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_limit.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00
AM
8579@@ -0,0 +1,140 @@
8580+#ifndef _VS_LIMIT_H
8581+#define _VS_LIMIT_H
d337f35e 8582+
d33d7b00
AM
8583+#include "vserver/limit.h"
8584+#include "vserver/base.h"
8585+#include "vserver/context.h"
8586+#include "vserver/debug.h"
8587+#include "vserver/context.h"
8588+#include "vserver/limit_int.h"
d337f35e
JR
8589+
8590+
d33d7b00
AM
8591+#define vx_acc_cres(v, d, p, r) \
8592+ __vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
d337f35e 8593+
d33d7b00
AM
8594+#define vx_acc_cres_cond(x, d, p, r) \
8595+ __vx_acc_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
8596+ r, d, p, __FILE__, __LINE__)
d337f35e
JR
8597+
8598+
d33d7b00
AM
8599+#define vx_add_cres(v, a, p, r) \
8600+ __vx_add_cres(v, r, a, p, __FILE__, __LINE__)
8601+#define vx_sub_cres(v, a, p, r) vx_add_cres(v, -(a), p, r)
d337f35e 8602+
d33d7b00
AM
8603+#define vx_add_cres_cond(x, a, p, r) \
8604+ __vx_add_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
8605+ r, a, p, __FILE__, __LINE__)
8606+#define vx_sub_cres_cond(x, a, p, r) vx_add_cres_cond(x, -(a), p, r)
d337f35e 8607+
d337f35e 8608+
d33d7b00 8609+/* process and file limits */
d337f35e 8610+
d33d7b00
AM
8611+#define vx_nproc_inc(p) \
8612+ vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
d337f35e 8613+
d33d7b00
AM
8614+#define vx_nproc_dec(p) \
8615+ vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
d337f35e 8616+
d33d7b00
AM
8617+#define vx_files_inc(f) \
8618+ vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
d337f35e 8619+
d33d7b00
AM
8620+#define vx_files_dec(f) \
8621+ vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
d337f35e 8622+
d33d7b00
AM
8623+#define vx_locks_inc(l) \
8624+ vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
d337f35e 8625+
d33d7b00
AM
8626+#define vx_locks_dec(l) \
8627+ vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
d337f35e 8628+
d33d7b00
AM
8629+#define vx_openfd_inc(f) \
8630+ vx_acc_cres(current_vx_info(), 1, (void *)(long)(f), VLIMIT_OPENFD)
d337f35e 8631+
d33d7b00
AM
8632+#define vx_openfd_dec(f) \
8633+ vx_acc_cres(current_vx_info(),-1, (void *)(long)(f), VLIMIT_OPENFD)
d337f35e 8634+
d337f35e 8635+
d33d7b00
AM
8636+#define vx_cres_avail(v, n, r) \
8637+ __vx_cres_avail(v, r, n, __FILE__, __LINE__)
d337f35e 8638+
d337f35e 8639+
d33d7b00
AM
8640+#define vx_nproc_avail(n) \
8641+ vx_cres_avail(current_vx_info(), n, RLIMIT_NPROC)
d337f35e 8642+
d33d7b00
AM
8643+#define vx_files_avail(n) \
8644+ vx_cres_avail(current_vx_info(), n, RLIMIT_NOFILE)
d337f35e 8645+
d33d7b00
AM
8646+#define vx_locks_avail(n) \
8647+ vx_cres_avail(current_vx_info(), n, RLIMIT_LOCKS)
d337f35e 8648+
d33d7b00
AM
8649+#define vx_openfd_avail(n) \
8650+ vx_cres_avail(current_vx_info(), n, VLIMIT_OPENFD)
d337f35e 8651+
d337f35e 8652+
d33d7b00 8653+/* dentry limits */
d337f35e 8654+
d33d7b00 8655+#define vx_dentry_inc(d) do { \
c2e5f7c8 8656+ if (d_count(d) == 1) \
d33d7b00
AM
8657+ vx_acc_cres(current_vx_info(), 1, d, VLIMIT_DENTRY); \
8658+ } while (0)
d337f35e 8659+
d33d7b00 8660+#define vx_dentry_dec(d) do { \
c2e5f7c8 8661+ if (d_count(d) == 0) \
d33d7b00
AM
8662+ vx_acc_cres(current_vx_info(),-1, d, VLIMIT_DENTRY); \
8663+ } while (0)
d337f35e 8664+
d33d7b00
AM
8665+#define vx_dentry_avail(n) \
8666+ vx_cres_avail(current_vx_info(), n, VLIMIT_DENTRY)
d337f35e 8667+
d337f35e 8668+
d33d7b00 8669+/* socket limits */
d337f35e 8670+
d33d7b00
AM
8671+#define vx_sock_inc(s) \
8672+ vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
d337f35e 8673+
d33d7b00
AM
8674+#define vx_sock_dec(s) \
8675+ vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
d337f35e 8676+
d33d7b00
AM
8677+#define vx_sock_avail(n) \
8678+ vx_cres_avail(current_vx_info(), n, VLIMIT_NSOCK)
d337f35e 8679+
d337f35e 8680+
d33d7b00 8681+/* ipc resource limits */
d337f35e 8682+
d33d7b00
AM
8683+#define vx_ipcmsg_add(v, u, a) \
8684+ vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
d337f35e 8685+
d33d7b00
AM
8686+#define vx_ipcmsg_sub(v, u, a) \
8687+ vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
d337f35e 8688+
d33d7b00
AM
8689+#define vx_ipcmsg_avail(v, a) \
8690+ vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
d337f35e 8691+
d337f35e 8692+
d33d7b00
AM
8693+#define vx_ipcshm_add(v, k, a) \
8694+ vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
d337f35e 8695+
d33d7b00
AM
8696+#define vx_ipcshm_sub(v, k, a) \
8697+ vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
d337f35e 8698+
d33d7b00
AM
8699+#define vx_ipcshm_avail(v, a) \
8700+ vx_cres_avail(v, a, VLIMIT_SHMEM)
d337f35e
JR
8701+
8702+
d33d7b00
AM
8703+#define vx_semary_inc(a) \
8704+ vx_acc_cres(current_vx_info(), 1, a, VLIMIT_SEMARY)
d337f35e 8705+
d33d7b00
AM
8706+#define vx_semary_dec(a) \
8707+ vx_acc_cres(current_vx_info(), -1, a, VLIMIT_SEMARY)
d337f35e 8708+
d337f35e 8709+
d33d7b00
AM
8710+#define vx_nsems_add(a,n) \
8711+ vx_add_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
d337f35e 8712+
d33d7b00
AM
8713+#define vx_nsems_sub(a,n) \
8714+ vx_sub_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
d337f35e
JR
8715+
8716+
d33d7b00
AM
8717+#else
8718+#warning duplicate inclusion
8719+#endif
8931d859
AM
8720diff -NurpP --minimal linux-4.4.161/include/linux/vs_network.h linux-4.4.161-vs2.3.9.8/include/linux/vs_network.h
8721--- linux-4.4.161/include/linux/vs_network.h 1970-01-01 00:00:00.000000000 +0000
8722+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_network.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00
AM
8723@@ -0,0 +1,169 @@
8724+#ifndef _NX_VS_NETWORK_H
8725+#define _NX_VS_NETWORK_H
7e46296a 8726+
d33d7b00
AM
8727+#include "vserver/context.h"
8728+#include "vserver/network.h"
8729+#include "vserver/base.h"
8730+#include "vserver/check.h"
8731+#include "vserver/debug.h"
2380c486 8732+
d33d7b00 8733+#include <linux/sched.h>
2380c486 8734+
2380c486 8735+
d33d7b00 8736+#define get_nx_info(i) __get_nx_info(i, __FILE__, __LINE__)
2380c486 8737+
d33d7b00
AM
8738+static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
8739+ const char *_file, int _line)
8740+{
8741+ if (!nxi)
8742+ return NULL;
d337f35e 8743+
d33d7b00
AM
8744+ vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
8745+ nxi, nxi ? nxi->nx_id : 0,
8746+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8747+ _file, _line);
d337f35e 8748+
d33d7b00
AM
8749+ atomic_inc(&nxi->nx_usecnt);
8750+ return nxi;
8751+}
d337f35e
JR
8752+
8753+
d33d7b00 8754+extern void free_nx_info(struct nx_info *);
d337f35e 8755+
d33d7b00 8756+#define put_nx_info(i) __put_nx_info(i, __FILE__, __LINE__)
d337f35e 8757+
d33d7b00
AM
8758+static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
8759+{
8760+ if (!nxi)
8761+ return;
d337f35e 8762+
d33d7b00
AM
8763+ vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
8764+ nxi, nxi ? nxi->nx_id : 0,
8765+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8766+ _file, _line);
d337f35e 8767+
d33d7b00
AM
8768+ if (atomic_dec_and_test(&nxi->nx_usecnt))
8769+ free_nx_info(nxi);
8770+}
d337f35e 8771+
d337f35e 8772+
d33d7b00 8773+#define init_nx_info(p, i) __init_nx_info(p, i, __FILE__, __LINE__)
d337f35e 8774+
d33d7b00
AM
8775+static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
8776+ const char *_file, int _line)
8777+{
8778+ if (nxi) {
8779+ vxlprintk(VXD_CBIT(nid, 3),
8780+ "init_nx_info(%p[#%d.%d])",
8781+ nxi, nxi ? nxi->nx_id : 0,
8782+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8783+ _file, _line);
d337f35e 8784+
d33d7b00
AM
8785+ atomic_inc(&nxi->nx_usecnt);
8786+ }
8787+ *nxp = nxi;
8788+}
d337f35e 8789+
d337f35e 8790+
d33d7b00 8791+#define set_nx_info(p, i) __set_nx_info(p, i, __FILE__, __LINE__)
d337f35e 8792+
d33d7b00
AM
8793+static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
8794+ const char *_file, int _line)
8795+{
8796+ struct nx_info *nxo;
d337f35e 8797+
d33d7b00
AM
8798+ if (!nxi)
8799+ return;
d337f35e 8800+
d33d7b00
AM
8801+ vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
8802+ nxi, nxi ? nxi->nx_id : 0,
8803+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8804+ _file, _line);
d337f35e 8805+
d33d7b00
AM
8806+ atomic_inc(&nxi->nx_usecnt);
8807+ nxo = xchg(nxp, nxi);
8808+ BUG_ON(nxo);
8809+}
d337f35e 8810+
d33d7b00 8811+#define clr_nx_info(p) __clr_nx_info(p, __FILE__, __LINE__)
d337f35e 8812+
d33d7b00
AM
8813+static inline void __clr_nx_info(struct nx_info **nxp,
8814+ const char *_file, int _line)
8815+{
8816+ struct nx_info *nxo;
d337f35e 8817+
d33d7b00
AM
8818+ nxo = xchg(nxp, NULL);
8819+ if (!nxo)
8820+ return;
d337f35e 8821+
d33d7b00
AM
8822+ vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
8823+ nxo, nxo ? nxo->nx_id : 0,
8824+ nxo ? atomic_read(&nxo->nx_usecnt) : 0,
8825+ _file, _line);
d337f35e 8826+
d33d7b00
AM
8827+ if (atomic_dec_and_test(&nxo->nx_usecnt))
8828+ free_nx_info(nxo);
8829+}
d337f35e
JR
8830+
8831+
d33d7b00 8832+#define claim_nx_info(v, p) __claim_nx_info(v, p, __FILE__, __LINE__)
d337f35e 8833+
d33d7b00
AM
8834+static inline void __claim_nx_info(struct nx_info *nxi,
8835+ struct task_struct *task, const char *_file, int _line)
8836+{
8837+ vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
8838+ nxi, nxi ? nxi->nx_id : 0,
8839+ nxi?atomic_read(&nxi->nx_usecnt):0,
8840+ nxi?atomic_read(&nxi->nx_tasks):0,
8841+ task, _file, _line);
d337f35e 8842+
d33d7b00
AM
8843+ atomic_inc(&nxi->nx_tasks);
8844+}
d337f35e 8845+
d337f35e 8846+
d33d7b00 8847+extern void unhash_nx_info(struct nx_info *);
d337f35e 8848+
d33d7b00 8849+#define release_nx_info(v, p) __release_nx_info(v, p, __FILE__, __LINE__)
d337f35e 8850+
d33d7b00
AM
8851+static inline void __release_nx_info(struct nx_info *nxi,
8852+ struct task_struct *task, const char *_file, int _line)
8853+{
8854+ vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
8855+ nxi, nxi ? nxi->nx_id : 0,
8856+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8857+ nxi ? atomic_read(&nxi->nx_tasks) : 0,
8858+ task, _file, _line);
ab30d09f 8859+
d33d7b00 8860+ might_sleep();
d337f35e 8861+
d33d7b00
AM
8862+ if (atomic_dec_and_test(&nxi->nx_tasks))
8863+ unhash_nx_info(nxi);
8864+}
d337f35e
JR
8865+
8866+
d33d7b00 8867+#define task_get_nx_info(i) __task_get_nx_info(i, __FILE__, __LINE__)
d337f35e 8868+
d33d7b00
AM
8869+static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
8870+ const char *_file, int _line)
8871+{
8872+ struct nx_info *nxi;
d337f35e 8873+
d33d7b00
AM
8874+ task_lock(p);
8875+ vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
8876+ p, _file, _line);
8877+ nxi = __get_nx_info(p->nx_info, _file, _line);
8878+ task_unlock(p);
8879+ return nxi;
8880+}
d337f35e 8881+
d337f35e 8882+
d33d7b00
AM
8883+static inline void exit_nx_info(struct task_struct *p)
8884+{
8885+ if (p->nx_info)
8886+ release_nx_info(p->nx_info, p);
8887+}
adc1caaa 8888+
d337f35e 8889+
2380c486 8890+#else
d33d7b00 8891+#warning duplicate inclusion
2380c486 8892+#endif
8931d859
AM
8893diff -NurpP --minimal linux-4.4.161/include/linux/vs_pid.h linux-4.4.161-vs2.3.9.8/include/linux/vs_pid.h
8894--- linux-4.4.161/include/linux/vs_pid.h 1970-01-01 00:00:00.000000000 +0000
8895+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_pid.h 2018-10-20 04:57:21.000000000 +0000
b3b0d4fd 8896@@ -0,0 +1,50 @@
d33d7b00
AM
8897+#ifndef _VS_PID_H
8898+#define _VS_PID_H
d337f35e 8899+
d33d7b00
AM
8900+#include "vserver/base.h"
8901+#include "vserver/check.h"
8902+#include "vserver/context.h"
8903+#include "vserver/debug.h"
8904+#include "vserver/pid.h"
8905+#include <linux/pid_namespace.h>
d337f35e 8906+
d337f35e 8907+
d33d7b00 8908+#define VXF_FAKE_INIT (VXF_INFO_INIT | VXF_STATE_INIT)
d337f35e 8909+
d33d7b00
AM
8910+static inline
8911+int vx_proc_task_visible(struct task_struct *task)
8912+{
8913+ if ((task->pid == 1) &&
8914+ !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
8915+ /* show a blend through init */
8916+ goto visible;
8917+ if (vx_check(vx_task_xid(task), VS_WATCH | VS_IDENT))
8918+ goto visible;
8919+ return 0;
8920+visible:
8921+ return 1;
8922+}
d337f35e 8923+
d33d7b00 8924+#define find_task_by_real_pid(pid) find_task_by_pid_ns(pid, &init_pid_ns)
d337f35e 8925+
d337f35e 8926+
d33d7b00
AM
8927+static inline
8928+struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid)
8929+{
8930+ struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
d337f35e 8931+
d33d7b00
AM
8932+ if (task && !vx_proc_task_visible(task)) {
8933+ vxdprintk(VXD_CBIT(misc, 6),
8934+ "dropping task (get) %p[#%u,%u] for %p[#%u,%u]",
8935+ task, task->xid, task->pid,
8936+ current, current->xid, current->pid);
8937+ put_task_struct(task);
8938+ task = NULL;
8939+ }
8940+ return task;
8941+}
d337f35e 8942+
d337f35e 8943+
d33d7b00
AM
8944+#else
8945+#warning duplicate inclusion
8946+#endif
8931d859
AM
8947diff -NurpP --minimal linux-4.4.161/include/linux/vs_sched.h linux-4.4.161-vs2.3.9.8/include/linux/vs_sched.h
8948--- linux-4.4.161/include/linux/vs_sched.h 1970-01-01 00:00:00.000000000 +0000
8949+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_sched.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00
AM
8950@@ -0,0 +1,40 @@
8951+#ifndef _VS_SCHED_H
8952+#define _VS_SCHED_H
d337f35e 8953+
d33d7b00
AM
8954+#include "vserver/base.h"
8955+#include "vserver/context.h"
8956+#include "vserver/sched.h"
d337f35e
JR
8957+
8958+
d33d7b00
AM
8959+#define MAX_PRIO_BIAS 20
8960+#define MIN_PRIO_BIAS -20
d337f35e 8961+
d33d7b00
AM
8962+static inline
8963+int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
8964+{
8965+ struct vx_info *vxi = p->vx_info;
d337f35e 8966+
d33d7b00
AM
8967+ if (vxi)
8968+ prio += vx_cpu(vxi, sched_pc).prio_bias;
8969+ return prio;
8970+}
d337f35e 8971+
d33d7b00
AM
8972+static inline void vx_account_user(struct vx_info *vxi,
8973+ cputime_t cputime, int nice)
8974+{
8975+ if (!vxi)
8976+ return;
8977+ vx_cpu(vxi, sched_pc).user_ticks += cputime;
8978+}
d337f35e 8979+
d33d7b00
AM
8980+static inline void vx_account_system(struct vx_info *vxi,
8981+ cputime_t cputime, int idle)
8982+{
8983+ if (!vxi)
8984+ return;
8985+ vx_cpu(vxi, sched_pc).sys_ticks += cputime;
8986+}
d337f35e 8987+
d33d7b00
AM
8988+#else
8989+#warning duplicate inclusion
8990+#endif
8931d859
AM
8991diff -NurpP --minimal linux-4.4.161/include/linux/vs_socket.h linux-4.4.161-vs2.3.9.8/include/linux/vs_socket.h
8992--- linux-4.4.161/include/linux/vs_socket.h 1970-01-01 00:00:00.000000000 +0000
8993+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_socket.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00
AM
8994@@ -0,0 +1,67 @@
8995+#ifndef _VS_SOCKET_H
8996+#define _VS_SOCKET_H
d337f35e 8997+
d33d7b00
AM
8998+#include "vserver/debug.h"
8999+#include "vserver/base.h"
9000+#include "vserver/cacct.h"
9001+#include "vserver/context.h"
9002+#include "vserver/tag.h"
d337f35e 9003+
d337f35e 9004+
d33d7b00 9005+/* socket accounting */
d337f35e 9006+
d33d7b00 9007+#include <linux/socket.h>
d337f35e 9008+
d33d7b00
AM
9009+static inline int vx_sock_type(int family)
9010+{
9011+ switch (family) {
9012+ case PF_UNSPEC:
9013+ return VXA_SOCK_UNSPEC;
9014+ case PF_UNIX:
9015+ return VXA_SOCK_UNIX;
9016+ case PF_INET:
9017+ return VXA_SOCK_INET;
9018+ case PF_INET6:
9019+ return VXA_SOCK_INET6;
9020+ case PF_PACKET:
9021+ return VXA_SOCK_PACKET;
9022+ default:
9023+ return VXA_SOCK_OTHER;
9024+ }
9025+}
d337f35e 9026+
d33d7b00
AM
9027+#define vx_acc_sock(v, f, p, s) \
9028+ __vx_acc_sock(v, f, p, s, __FILE__, __LINE__)
d337f35e 9029+
d33d7b00
AM
9030+static inline void __vx_acc_sock(struct vx_info *vxi,
9031+ int family, int pos, int size, char *file, int line)
9032+{
9033+ if (vxi) {
9034+ int type = vx_sock_type(family);
d337f35e 9035+
d33d7b00
AM
9036+ atomic_long_inc(&vxi->cacct.sock[type][pos].count);
9037+ atomic_long_add(size, &vxi->cacct.sock[type][pos].total);
9038+ }
9039+}
d337f35e 9040+
d33d7b00
AM
9041+#define vx_sock_recv(sk, s) \
9042+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, s)
9043+#define vx_sock_send(sk, s) \
9044+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, s)
9045+#define vx_sock_fail(sk, s) \
9046+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, s)
d337f35e 9047+
d337f35e 9048+
d33d7b00
AM
9049+#define sock_vx_init(s) do { \
9050+ (s)->sk_xid = 0; \
9051+ (s)->sk_vx_info = NULL; \
9052+ } while (0)
d337f35e 9053+
d33d7b00
AM
9054+#define sock_nx_init(s) do { \
9055+ (s)->sk_nid = 0; \
9056+ (s)->sk_nx_info = NULL; \
9057+ } while (0)
d337f35e 9058+
d33d7b00
AM
9059+#else
9060+#warning duplicate inclusion
9061+#endif
8931d859
AM
9062diff -NurpP --minimal linux-4.4.161/include/linux/vs_tag.h linux-4.4.161-vs2.3.9.8/include/linux/vs_tag.h
9063--- linux-4.4.161/include/linux/vs_tag.h 1970-01-01 00:00:00.000000000 +0000
9064+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_tag.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00
AM
9065@@ -0,0 +1,47 @@
9066+#ifndef _VS_TAG_H
9067+#define _VS_TAG_H
d337f35e 9068+
d33d7b00 9069+#include <linux/vserver/tag.h>
d337f35e 9070+
d33d7b00 9071+/* check conditions */
d337f35e 9072+
d33d7b00
AM
9073+#define DX_ADMIN 0x0001
9074+#define DX_WATCH 0x0002
9075+#define DX_HOSTID 0x0008
d337f35e 9076+
d33d7b00 9077+#define DX_IDENT 0x0010
d337f35e 9078+
d33d7b00 9079+#define DX_ARG_MASK 0x0010
d337f35e 9080+
d337f35e 9081+
d33d7b00 9082+#define dx_task_tag(t) ((t)->tag)
d337f35e 9083+
d33d7b00 9084+#define dx_current_tag() dx_task_tag(current)
d337f35e 9085+
d33d7b00 9086+#define dx_check(c, m) __dx_check(dx_current_tag(), c, m)
d337f35e 9087+
d33d7b00 9088+#define dx_weak_check(c, m) ((m) ? dx_check(c, m) : 1)
d337f35e
JR
9089+
9090+
d33d7b00
AM
9091+/*
9092+ * check current context for ADMIN/WATCH and
9093+ * optionally against supplied argument
9094+ */
61333608 9095+static inline int __dx_check(vtag_t cid, vtag_t id, unsigned int mode)
d33d7b00
AM
9096+{
9097+ if (mode & DX_ARG_MASK) {
9098+ if ((mode & DX_IDENT) && (id == cid))
9099+ return 1;
9100+ }
9101+ return (((mode & DX_ADMIN) && (cid == 0)) ||
9102+ ((mode & DX_WATCH) && (cid == 1)) ||
9103+ ((mode & DX_HOSTID) && (id == 0)));
9104+}
d337f35e 9105+
d33d7b00
AM
9106+struct inode;
9107+int dx_permission(const struct inode *inode, int mask);
d337f35e 9108+
d337f35e 9109+
d33d7b00
AM
9110+#else
9111+#warning duplicate inclusion
9112+#endif
8931d859
AM
9113diff -NurpP --minimal linux-4.4.161/include/linux/vs_time.h linux-4.4.161-vs2.3.9.8/include/linux/vs_time.h
9114--- linux-4.4.161/include/linux/vs_time.h 1970-01-01 00:00:00.000000000 +0000
9115+++ linux-4.4.161-vs2.3.9.8/include/linux/vs_time.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00
AM
9116@@ -0,0 +1,19 @@
9117+#ifndef _VS_TIME_H
9118+#define _VS_TIME_H
d337f35e 9119+
d337f35e 9120+
d33d7b00 9121+/* time faking stuff */
d337f35e 9122+
d33d7b00 9123+#ifdef CONFIG_VSERVER_VTIME
d337f35e 9124+
d33d7b00 9125+extern void vx_adjust_timespec(struct timespec *ts);
763640ca 9126+extern int vx_settimeofday(const struct timespec *ts);
d337f35e 9127+
d33d7b00
AM
9128+#else
9129+#define vx_adjust_timespec(t) do { } while (0)
9130+#define vx_settimeofday(t) do_settimeofday(t)
9131+#endif
d337f35e 9132+
d33d7b00
AM
9133+#else
9134+#warning duplicate inclusion
9135+#endif
8931d859
AM
9136diff -NurpP --minimal linux-4.4.161/include/linux/vserver/base.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/base.h
9137--- linux-4.4.161/include/linux/vserver/base.h 1970-01-01 00:00:00.000000000 +0000
9138+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/base.h 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 9139@@ -0,0 +1,184 @@
4bf69007
AM
9140+#ifndef _VSERVER_BASE_H
9141+#define _VSERVER_BASE_H
d337f35e 9142+
d337f35e 9143+
d33d7b00 9144+/* context state changes */
d337f35e 9145+
d33d7b00
AM
9146+enum {
9147+ VSC_STARTUP = 1,
9148+ VSC_SHUTDOWN,
d337f35e 9149+
d33d7b00
AM
9150+ VSC_NETUP,
9151+ VSC_NETDOWN,
3bac966d 9152+};
d337f35e 9153+
d337f35e
JR
9154+
9155+
d33d7b00 9156+#define vx_task_xid(t) ((t)->xid)
d337f35e 9157+
d33d7b00 9158+#define vx_current_xid() vx_task_xid(current)
d337f35e 9159+
d33d7b00 9160+#define current_vx_info() (current->vx_info)
d337f35e 9161+
ba86f833 9162+
d33d7b00 9163+#define nx_task_nid(t) ((t)->nid)
ba86f833 9164+
d33d7b00 9165+#define nx_current_nid() nx_task_nid(current)
d337f35e 9166+
d33d7b00 9167+#define current_nx_info() (current->nx_info)
d337f35e 9168+
d337f35e 9169+
d33d7b00 9170+/* generic flag merging */
d337f35e 9171+
d33d7b00 9172+#define vs_check_flags(v, m, f) (((v) & (m)) ^ (f))
d337f35e 9173+
d33d7b00 9174+#define vs_mask_flags(v, f, m) (((v) & ~(m)) | ((f) & (m)))
d337f35e 9175+
d33d7b00 9176+#define vs_mask_mask(v, f, m) (((v) & ~(m)) | ((v) & (f) & (m)))
d337f35e 9177+
d33d7b00 9178+#define vs_check_bit(v, n) ((v) & (1LL << (n)))
d337f35e 9179+
d337f35e 9180+
d33d7b00 9181+/* context flags */
d337f35e 9182+
d33d7b00 9183+#define __vx_flags(v) ((v) ? (v)->vx_flags : 0)
d337f35e 9184+
d33d7b00 9185+#define vx_current_flags() __vx_flags(current_vx_info())
d337f35e 9186+
d33d7b00
AM
9187+#define vx_info_flags(v, m, f) \
9188+ vs_check_flags(__vx_flags(v), m, f)
d337f35e 9189+
d33d7b00
AM
9190+#define task_vx_flags(t, m, f) \
9191+ ((t) && vx_info_flags((t)->vx_info, m, f))
d337f35e 9192+
d33d7b00 9193+#define vx_flags(m, f) vx_info_flags(current_vx_info(), m, f)
d337f35e
JR
9194+
9195+
d33d7b00 9196+/* context caps */
d337f35e 9197+
d33d7b00 9198+#define __vx_ccaps(v) ((v) ? (v)->vx_ccaps : 0)
d337f35e 9199+
d33d7b00 9200+#define vx_current_ccaps() __vx_ccaps(current_vx_info())
d337f35e 9201+
d33d7b00 9202+#define vx_info_ccaps(v, c) (__vx_ccaps(v) & (c))
d337f35e 9203+
d33d7b00 9204+#define vx_ccaps(c) vx_info_ccaps(current_vx_info(), (c))
d337f35e 9205+
d337f35e
JR
9206+
9207+
d33d7b00 9208+/* network flags */
2380c486 9209+
d33d7b00 9210+#define __nx_flags(n) ((n) ? (n)->nx_flags : 0)
d337f35e 9211+
d33d7b00 9212+#define nx_current_flags() __nx_flags(current_nx_info())
d337f35e 9213+
d33d7b00
AM
9214+#define nx_info_flags(n, m, f) \
9215+ vs_check_flags(__nx_flags(n), m, f)
d337f35e 9216+
d33d7b00
AM
9217+#define task_nx_flags(t, m, f) \
9218+ ((t) && nx_info_flags((t)->nx_info, m, f))
d337f35e 9219+
d33d7b00 9220+#define nx_flags(m, f) nx_info_flags(current_nx_info(), m, f)
d337f35e 9221+
d337f35e 9222+
d33d7b00 9223+/* network caps */
d337f35e 9224+
d33d7b00 9225+#define __nx_ncaps(n) ((n) ? (n)->nx_ncaps : 0)
d337f35e 9226+
d33d7b00 9227+#define nx_current_ncaps() __nx_ncaps(current_nx_info())
d337f35e 9228+
d33d7b00 9229+#define nx_info_ncaps(n, c) (__nx_ncaps(n) & (c))
d337f35e 9230+
d33d7b00 9231+#define nx_ncaps(c) nx_info_ncaps(current_nx_info(), c)
d337f35e 9232+
d337f35e 9233+
d33d7b00 9234+/* context mask capabilities */
d337f35e 9235+
d33d7b00 9236+#define __vx_mcaps(v) ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
d337f35e 9237+
d33d7b00 9238+#define vx_info_mcaps(v, c) (__vx_mcaps(v) & (c))
d337f35e 9239+
d33d7b00 9240+#define vx_mcaps(c) vx_info_mcaps(current_vx_info(), c)
d337f35e
JR
9241+
9242+
d33d7b00 9243+/* context bcap mask */
d337f35e 9244+
d33d7b00 9245+#define __vx_bcaps(v) ((v)->vx_bcaps)
d337f35e 9246+
d33d7b00 9247+#define vx_current_bcaps() __vx_bcaps(current_vx_info())
d337f35e 9248+
d337f35e 9249+
d33d7b00 9250+/* mask given bcaps */
adc1caaa 9251+
d33d7b00 9252+#define vx_info_mbcaps(v, c) ((v) ? cap_intersect(__vx_bcaps(v), c) : c)
2380c486 9253+
d33d7b00 9254+#define vx_mbcaps(c) vx_info_mbcaps(current_vx_info(), c)
d337f35e
JR
9255+
9256+
d33d7b00 9257+/* masked cap_bset */
2380c486 9258+
d33d7b00 9259+#define vx_info_cap_bset(v) vx_info_mbcaps(v, current->cap_bset)
2380c486 9260+
d33d7b00 9261+#define vx_current_cap_bset() vx_info_cap_bset(current_vx_info())
d337f35e 9262+
d33d7b00
AM
9263+#if 0
9264+#define vx_info_mbcap(v, b) \
9265+ (!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
9266+ vx_info_bcaps(v, b) : (b))
d337f35e 9267+
d33d7b00
AM
9268+#define task_vx_mbcap(t, b) \
9269+ vx_info_mbcap((t)->vx_info, (t)->b)
9270+
9271+#define vx_mbcap(b) task_vx_mbcap(current, b)
3bac966d 9272+#endif
d337f35e 9273+
d33d7b00 9274+#define vx_cap_raised(v, c, f) cap_raised(vx_info_mbcaps(v, c), f)
d337f35e 9275+
d33d7b00
AM
9276+#define vx_capable(b, c) (capable(b) || \
9277+ (cap_raised(current_cap(), b) && vx_ccaps(c)))
d337f35e 9278+
763640ca
JR
9279+#define vx_ns_capable(n, b, c) (ns_capable(n, b) || \
9280+ (cap_raised(current_cap(), b) && vx_ccaps(c)))
9281+
d33d7b00
AM
9282+#define nx_capable(b, c) (capable(b) || \
9283+ (cap_raised(current_cap(), b) && nx_ncaps(c)))
d337f35e 9284+
c2e5f7c8
JR
9285+#define nx_ns_capable(n, b, c) (ns_capable(n, b) || \
9286+ (cap_raised(current_cap(), b) && nx_ncaps(c)))
9287+
d33d7b00
AM
9288+#define vx_task_initpid(t, n) \
9289+ ((t)->vx_info && \
9290+ ((t)->vx_info->vx_initpid == (n)))
d337f35e 9291+
d33d7b00 9292+#define vx_current_initpid(n) vx_task_initpid(current, n)
d337f35e 9293+
d337f35e 9294+
d33d7b00 9295+/* context unshare mask */
d337f35e 9296+
d33d7b00 9297+#define __vx_umask(v) ((v)->vx_umask)
7e46296a 9298+
d33d7b00 9299+#define vx_current_umask() __vx_umask(current_vx_info())
7e46296a 9300+
d33d7b00
AM
9301+#define vx_can_unshare(b, f) (capable(b) || \
9302+ (cap_raised(current_cap(), b) && \
9303+ !((f) & ~vx_current_umask())))
7e46296a 9304+
b00e13aa
AM
9305+#define vx_ns_can_unshare(n, b, f) (ns_capable(n, b) || \
9306+ (cap_raised(current_cap(), b) && \
9307+ !((f) & ~vx_current_umask())))
7e46296a 9308+
265d6dcc
JR
9309+#define __vx_wmask(v) ((v)->vx_wmask)
9310+
9311+#define vx_current_wmask() __vx_wmask(current_vx_info())
9312+
9313+
d33d7b00 9314+#define __vx_state(v) ((v) ? ((v)->vx_state) : 0)
7e46296a 9315+
d33d7b00 9316+#define vx_info_state(v, m) (__vx_state(v) & (m))
d337f35e 9317+
d337f35e 9318+
d33d7b00 9319+#define __nx_state(n) ((n) ? ((n)->nx_state) : 0)
d337f35e 9320+
d33d7b00 9321+#define nx_info_state(n, m) (__nx_state(n) & (m))
d337f35e 9322+
d33d7b00 9323+#endif
8931d859
AM
9324diff -NurpP --minimal linux-4.4.161/include/linux/vserver/cacct.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/cacct.h
9325--- linux-4.4.161/include/linux/vserver/cacct.h 1970-01-01 00:00:00.000000000 +0000
9326+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/cacct.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00 9327@@ -0,0 +1,15 @@
4bf69007
AM
9328+#ifndef _VSERVER_CACCT_H
9329+#define _VSERVER_CACCT_H
d337f35e 9330+
d337f35e 9331+
d33d7b00
AM
9332+enum sock_acc_field {
9333+ VXA_SOCK_UNSPEC = 0,
9334+ VXA_SOCK_UNIX,
9335+ VXA_SOCK_INET,
9336+ VXA_SOCK_INET6,
9337+ VXA_SOCK_PACKET,
9338+ VXA_SOCK_OTHER,
9339+ VXA_SOCK_SIZE /* array size */
9340+};
d337f35e 9341+
4bf69007 9342+#endif /* _VSERVER_CACCT_H */
8931d859
AM
9343diff -NurpP --minimal linux-4.4.161/include/linux/vserver/cacct_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/cacct_cmd.h
9344--- linux-4.4.161/include/linux/vserver/cacct_cmd.h 1970-01-01 00:00:00.000000000 +0000
9345+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/cacct_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
9346@@ -0,0 +1,10 @@
9347+#ifndef _VSERVER_CACCT_CMD_H
9348+#define _VSERVER_CACCT_CMD_H
d337f35e 9349+
d337f35e 9350+
3bac966d 9351+#include <linux/compiler.h>
4bf69007 9352+#include <uapi/vserver/cacct_cmd.h>
d337f35e 9353+
d33d7b00 9354+extern int vc_sock_stat(struct vx_info *, void __user *);
d337f35e 9355+
4bf69007 9356+#endif /* _VSERVER_CACCT_CMD_H */
8931d859
AM
9357diff -NurpP --minimal linux-4.4.161/include/linux/vserver/cacct_def.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/cacct_def.h
9358--- linux-4.4.161/include/linux/vserver/cacct_def.h 1970-01-01 00:00:00.000000000 +0000
9359+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/cacct_def.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00 9360@@ -0,0 +1,43 @@
4bf69007
AM
9361+#ifndef _VSERVER_CACCT_DEF_H
9362+#define _VSERVER_CACCT_DEF_H
d337f35e 9363+
d33d7b00
AM
9364+#include <asm/atomic.h>
9365+#include <linux/vserver/cacct.h>
d337f35e
JR
9366+
9367+
d33d7b00
AM
9368+struct _vx_sock_acc {
9369+ atomic_long_t count;
9370+ atomic_long_t total;
9371+};
d337f35e 9372+
d33d7b00 9373+/* context sub struct */
d337f35e 9374+
d33d7b00
AM
9375+struct _vx_cacct {
9376+ struct _vx_sock_acc sock[VXA_SOCK_SIZE][3];
9377+ atomic_t slab[8];
9378+ atomic_t page[6][8];
9379+};
d337f35e 9380+
d33d7b00 9381+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 9382+
d33d7b00
AM
9383+static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
9384+{
9385+ int i, j;
d337f35e 9386+
d33d7b00
AM
9387+ printk("\t_vx_cacct:");
9388+ for (i = 0; i < 6; i++) {
9389+ struct _vx_sock_acc *ptr = cacct->sock[i];
d337f35e 9390+
d33d7b00
AM
9391+ printk("\t [%d] =", i);
9392+ for (j = 0; j < 3; j++) {
9393+ printk(" [%d] = %8lu, %8lu", j,
9394+ atomic_long_read(&ptr[j].count),
9395+ atomic_long_read(&ptr[j].total));
9396+ }
9397+ printk("\n");
9398+ }
9399+}
2380c486 9400+
d33d7b00 9401+#endif
d337f35e 9402+
4bf69007 9403+#endif /* _VSERVER_CACCT_DEF_H */
8931d859
AM
9404diff -NurpP --minimal linux-4.4.161/include/linux/vserver/cacct_int.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/cacct_int.h
9405--- linux-4.4.161/include/linux/vserver/cacct_int.h 1970-01-01 00:00:00.000000000 +0000
9406+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/cacct_int.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
9407@@ -0,0 +1,17 @@
9408+#ifndef _VSERVER_CACCT_INT_H
9409+#define _VSERVER_CACCT_INT_H
d337f35e 9410+
d33d7b00
AM
9411+static inline
9412+unsigned long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
9413+{
9414+ return atomic_long_read(&cacct->sock[type][pos].count);
9415+}
d337f35e 9416+
d337f35e 9417+
d33d7b00
AM
9418+static inline
9419+unsigned long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
9420+{
9421+ return atomic_long_read(&cacct->sock[type][pos].total);
9422+}
d337f35e 9423+
4bf69007 9424+#endif /* _VSERVER_CACCT_INT_H */
8931d859
AM
9425diff -NurpP --minimal linux-4.4.161/include/linux/vserver/check.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/check.h
9426--- linux-4.4.161/include/linux/vserver/check.h 1970-01-01 00:00:00.000000000 +0000
9427+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/check.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00 9428@@ -0,0 +1,89 @@
4bf69007
AM
9429+#ifndef _VSERVER_CHECK_H
9430+#define _VSERVER_CHECK_H
d337f35e 9431+
d337f35e 9432+
d33d7b00 9433+#define MAX_S_CONTEXT 65535 /* Arbitrary limit */
d337f35e 9434+
d33d7b00
AM
9435+#ifdef CONFIG_VSERVER_DYNAMIC_IDS
9436+#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */
9437+#else
9438+#define MIN_D_CONTEXT 65536
9439+#endif
d337f35e 9440+
d33d7b00 9441+/* check conditions */
d337f35e 9442+
d33d7b00
AM
9443+#define VS_ADMIN 0x0001
9444+#define VS_WATCH 0x0002
9445+#define VS_HIDE 0x0004
9446+#define VS_HOSTID 0x0008
d337f35e 9447+
d33d7b00
AM
9448+#define VS_IDENT 0x0010
9449+#define VS_EQUIV 0x0020
9450+#define VS_PARENT 0x0040
9451+#define VS_CHILD 0x0080
d337f35e 9452+
d33d7b00 9453+#define VS_ARG_MASK 0x00F0
d337f35e 9454+
d33d7b00
AM
9455+#define VS_DYNAMIC 0x0100
9456+#define VS_STATIC 0x0200
d337f35e 9457+
d33d7b00 9458+#define VS_ATR_MASK 0x0F00
d337f35e 9459+
d33d7b00
AM
9460+#ifdef CONFIG_VSERVER_PRIVACY
9461+#define VS_ADMIN_P (0)
9462+#define VS_WATCH_P (0)
9463+#else
9464+#define VS_ADMIN_P VS_ADMIN
9465+#define VS_WATCH_P VS_WATCH
9466+#endif
d337f35e 9467+
d33d7b00
AM
9468+#define VS_HARDIRQ 0x1000
9469+#define VS_SOFTIRQ 0x2000
9470+#define VS_IRQ 0x4000
d337f35e 9471+
d33d7b00 9472+#define VS_IRQ_MASK 0xF000
d337f35e 9473+
d33d7b00 9474+#include <linux/hardirq.h>
d337f35e 9475+
d33d7b00
AM
9476+/*
9477+ * check current context for ADMIN/WATCH and
9478+ * optionally against supplied argument
9479+ */
9480+static inline int __vs_check(int cid, int id, unsigned int mode)
9481+{
9482+ if (mode & VS_ARG_MASK) {
9483+ if ((mode & VS_IDENT) && (id == cid))
9484+ return 1;
9485+ }
9486+ if (mode & VS_ATR_MASK) {
9487+ if ((mode & VS_DYNAMIC) &&
9488+ (id >= MIN_D_CONTEXT) &&
9489+ (id <= MAX_S_CONTEXT))
9490+ return 1;
9491+ if ((mode & VS_STATIC) &&
9492+ (id > 1) && (id < MIN_D_CONTEXT))
9493+ return 1;
9494+ }
9495+ if (mode & VS_IRQ_MASK) {
9496+ if ((mode & VS_IRQ) && unlikely(in_interrupt()))
9497+ return 1;
9498+ if ((mode & VS_HARDIRQ) && unlikely(in_irq()))
9499+ return 1;
9500+ if ((mode & VS_SOFTIRQ) && unlikely(in_softirq()))
9501+ return 1;
9502+ }
9503+ return (((mode & VS_ADMIN) && (cid == 0)) ||
9504+ ((mode & VS_WATCH) && (cid == 1)) ||
9505+ ((mode & VS_HOSTID) && (id == 0)));
9506+}
d337f35e 9507+
d33d7b00 9508+#define vx_check(c, m) __vs_check(vx_current_xid(), c, (m) | VS_IRQ)
d337f35e 9509+
d33d7b00 9510+#define vx_weak_check(c, m) ((m) ? vx_check(c, m) : 1)
2380c486 9511+
d337f35e 9512+
d33d7b00 9513+#define nx_check(c, m) __vs_check(nx_current_nid(), c, m)
d337f35e 9514+
d33d7b00 9515+#define nx_weak_check(c, m) ((m) ? nx_check(c, m) : 1)
d337f35e 9516+
d33d7b00 9517+#endif
8931d859
AM
9518diff -NurpP --minimal linux-4.4.161/include/linux/vserver/context.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/context.h
9519--- linux-4.4.161/include/linux/vserver/context.h 1970-01-01 00:00:00.000000000 +0000
9520+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/context.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
9521@@ -0,0 +1,110 @@
9522+#ifndef _VSERVER_CONTEXT_H
9523+#define _VSERVER_CONTEXT_H
d337f35e
JR
9524+
9525+
d33d7b00
AM
9526+#include <linux/list.h>
9527+#include <linux/spinlock.h>
9528+#include <linux/rcupdate.h>
4bf69007 9529+#include <uapi/vserver/context.h>
d337f35e 9530+
d33d7b00
AM
9531+#include "limit_def.h"
9532+#include "sched_def.h"
9533+#include "cvirt_def.h"
9534+#include "cacct_def.h"
9535+#include "device_def.h"
d337f35e 9536+
d33d7b00 9537+#define VX_SPACES 2
d337f35e 9538+
d33d7b00
AM
9539+struct _vx_info_pc {
9540+ struct _vx_sched_pc sched_pc;
9541+ struct _vx_cvirt_pc cvirt_pc;
9542+};
d337f35e 9543+
d33d7b00
AM
9544+struct _vx_space {
9545+ unsigned long vx_nsmask; /* assignment mask */
9546+ struct nsproxy *vx_nsproxy; /* private namespaces */
9547+ struct fs_struct *vx_fs; /* private namespace fs */
9548+ const struct cred *vx_cred; /* task credentials */
9549+};
d337f35e 9550+
d33d7b00
AM
9551+struct vx_info {
9552+ struct hlist_node vx_hlist; /* linked list of contexts */
61333608 9553+ vxid_t vx_id; /* context id */
d33d7b00
AM
9554+ atomic_t vx_usecnt; /* usage count */
9555+ atomic_t vx_tasks; /* tasks count */
9556+ struct vx_info *vx_parent; /* parent context */
9557+ int vx_state; /* context state */
d337f35e 9558+
d33d7b00 9559+ struct _vx_space space[VX_SPACES]; /* namespace store */
d337f35e 9560+
d33d7b00
AM
9561+ uint64_t vx_flags; /* context flags */
9562+ uint64_t vx_ccaps; /* context caps (vserver) */
763640ca 9563+ uint64_t vx_umask; /* unshare mask (guest) */
265d6dcc 9564+ uint64_t vx_wmask; /* warn mask (guest) */
d33d7b00 9565+ kernel_cap_t vx_bcaps; /* bounding caps (system) */
d337f35e 9566+
d33d7b00
AM
9567+ struct task_struct *vx_reaper; /* guest reaper process */
9568+ pid_t vx_initpid; /* PID of guest init */
9569+ int64_t vx_badness_bias; /* OOM points bias */
d337f35e 9570+
d33d7b00
AM
9571+ struct _vx_limit limit; /* vserver limits */
9572+ struct _vx_sched sched; /* vserver scheduler */
9573+ struct _vx_cvirt cvirt; /* virtual/bias stuff */
9574+ struct _vx_cacct cacct; /* context accounting */
d337f35e 9575+
d33d7b00 9576+ struct _vx_device dmap; /* default device map targets */
d337f35e 9577+
d33d7b00
AM
9578+#ifndef CONFIG_SMP
9579+ struct _vx_info_pc info_pc; /* per cpu data */
9580+#else
9581+ struct _vx_info_pc *ptr_pc; /* per cpu array */
9582+#endif
d337f35e 9583+
d33d7b00
AM
9584+ wait_queue_head_t vx_wait; /* context exit waitqueue */
9585+ int reboot_cmd; /* last sys_reboot() cmd */
9586+ int exit_code; /* last process exit code */
d337f35e 9587+
d33d7b00
AM
9588+ char vx_name[65]; /* vserver name */
9589+};
d337f35e 9590+
d33d7b00
AM
9591+#ifndef CONFIG_SMP
9592+#define vx_ptr_pc(vxi) (&(vxi)->info_pc)
9593+#define vx_per_cpu(vxi, v, id) vx_ptr_pc(vxi)->v
9594+#else
9595+#define vx_ptr_pc(vxi) ((vxi)->ptr_pc)
9596+#define vx_per_cpu(vxi, v, id) per_cpu_ptr(vx_ptr_pc(vxi), id)->v
9597+#endif
d337f35e 9598+
d33d7b00 9599+#define vx_cpu(vxi, v) vx_per_cpu(vxi, v, smp_processor_id())
d337f35e 9600+
d337f35e 9601+
d33d7b00
AM
9602+struct vx_info_save {
9603+ struct vx_info *vxi;
61333608 9604+ vxid_t xid;
d33d7b00 9605+};
d337f35e
JR
9606+
9607+
d33d7b00 9608+/* status flags */
d337f35e 9609+
d33d7b00
AM
9610+#define VXS_HASHED 0x0001
9611+#define VXS_PAUSED 0x0010
9612+#define VXS_SHUTDOWN 0x0100
9613+#define VXS_HELPER 0x1000
9614+#define VXS_RELEASED 0x8000
d337f35e 9615+
d337f35e 9616+
d33d7b00
AM
9617+extern void claim_vx_info(struct vx_info *, struct task_struct *);
9618+extern void release_vx_info(struct vx_info *, struct task_struct *);
adc1caaa 9619+
d33d7b00
AM
9620+extern struct vx_info *lookup_vx_info(int);
9621+extern struct vx_info *lookup_or_create_vx_info(int);
d337f35e 9622+
d33d7b00 9623+extern int get_xid_list(int, unsigned int *, int);
61333608 9624+extern int xid_is_hashed(vxid_t);
d337f35e 9625+
d33d7b00 9626+extern int vx_migrate_task(struct task_struct *, struct vx_info *, int);
d337f35e 9627+
d33d7b00 9628+extern long vs_state_change(struct vx_info *, unsigned int);
d337f35e 9629+
d337f35e 9630+
4bf69007 9631+#endif /* _VSERVER_CONTEXT_H */
8931d859
AM
9632diff -NurpP --minimal linux-4.4.161/include/linux/vserver/context_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/context_cmd.h
9633--- linux-4.4.161/include/linux/vserver/context_cmd.h 1970-01-01 00:00:00.000000000 +0000
9634+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/context_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
9635@@ -0,0 +1,33 @@
9636+#ifndef _VSERVER_CONTEXT_CMD_H
9637+#define _VSERVER_CONTEXT_CMD_H
d337f35e 9638+
4bf69007 9639+#include <uapi/vserver/context_cmd.h>
d337f35e 9640+
d33d7b00 9641+extern int vc_task_xid(uint32_t);
d337f35e 9642+
d33d7b00 9643+extern int vc_vx_info(struct vx_info *, void __user *);
d337f35e 9644+
d33d7b00 9645+extern int vc_ctx_stat(struct vx_info *, void __user *);
d337f35e 9646+
4bf69007
AM
9647+extern int vc_ctx_create(uint32_t, void __user *);
9648+extern int vc_ctx_migrate(struct vx_info *, void __user *);
d337f35e 9649+
4bf69007
AM
9650+extern int vc_get_cflags(struct vx_info *, void __user *);
9651+extern int vc_set_cflags(struct vx_info *, void __user *);
d337f35e 9652+
4bf69007
AM
9653+extern int vc_get_ccaps(struct vx_info *, void __user *);
9654+extern int vc_set_ccaps(struct vx_info *, void __user *);
d337f35e 9655+
4bf69007
AM
9656+extern int vc_get_bcaps(struct vx_info *, void __user *);
9657+extern int vc_set_bcaps(struct vx_info *, void __user *);
d337f35e 9658+
4bf69007
AM
9659+extern int vc_get_umask(struct vx_info *, void __user *);
9660+extern int vc_set_umask(struct vx_info *, void __user *);
d33d7b00 9661+
4bf69007
AM
9662+extern int vc_get_wmask(struct vx_info *, void __user *);
9663+extern int vc_set_wmask(struct vx_info *, void __user *);
d33d7b00 9664+
4bf69007
AM
9665+extern int vc_get_badness(struct vx_info *, void __user *);
9666+extern int vc_set_badness(struct vx_info *, void __user *);
d337f35e 9667+
4bf69007 9668+#endif /* _VSERVER_CONTEXT_CMD_H */
8931d859
AM
9669diff -NurpP --minimal linux-4.4.161/include/linux/vserver/cvirt.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/cvirt.h
9670--- linux-4.4.161/include/linux/vserver/cvirt.h 1970-01-01 00:00:00.000000000 +0000
9671+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/cvirt.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
9672@@ -0,0 +1,18 @@
9673+#ifndef _VSERVER_CVIRT_H
9674+#define _VSERVER_CVIRT_H
d337f35e 9675+
4bf69007 9676+struct timespec;
d337f35e 9677+
4bf69007 9678+void vx_vsi_boottime(struct timespec *);
d337f35e 9679+
4bf69007 9680+void vx_vsi_uptime(struct timespec *, struct timespec *);
d337f35e 9681+
d337f35e 9682+
4bf69007 9683+struct vx_info;
d337f35e 9684+
4bf69007 9685+void vx_update_load(struct vx_info *);
d337f35e 9686+
d337f35e 9687+
4bf69007 9688+int vx_do_syslog(int, char __user *, int);
d337f35e 9689+
4bf69007 9690+#endif /* _VSERVER_CVIRT_H */
8931d859
AM
9691diff -NurpP --minimal linux-4.4.161/include/linux/vserver/cvirt_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/cvirt_cmd.h
9692--- linux-4.4.161/include/linux/vserver/cvirt_cmd.h 1970-01-01 00:00:00.000000000 +0000
9693+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/cvirt_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
9694@@ -0,0 +1,13 @@
9695+#ifndef _VSERVER_CVIRT_CMD_H
9696+#define _VSERVER_CVIRT_CMD_H
d337f35e 9697+
d337f35e 9698+
4bf69007
AM
9699+#include <linux/compiler.h>
9700+#include <uapi/vserver/cvirt_cmd.h>
d337f35e 9701+
4bf69007
AM
9702+extern int vc_set_vhi_name(struct vx_info *, void __user *);
9703+extern int vc_get_vhi_name(struct vx_info *, void __user *);
d337f35e 9704+
4bf69007 9705+extern int vc_virt_stat(struct vx_info *, void __user *);
d337f35e 9706+
4bf69007 9707+#endif /* _VSERVER_CVIRT_CMD_H */
8931d859
AM
9708diff -NurpP --minimal linux-4.4.161/include/linux/vserver/cvirt_def.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/cvirt_def.h
9709--- linux-4.4.161/include/linux/vserver/cvirt_def.h 1970-01-01 00:00:00.000000000 +0000
9710+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/cvirt_def.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
9711@@ -0,0 +1,80 @@
9712+#ifndef _VSERVER_CVIRT_DEF_H
9713+#define _VSERVER_CVIRT_DEF_H
d337f35e 9714+
d33d7b00
AM
9715+#include <linux/jiffies.h>
9716+#include <linux/spinlock.h>
9717+#include <linux/wait.h>
9718+#include <linux/time.h>
9719+#include <asm/atomic.h>
d337f35e 9720+
d337f35e 9721+
d33d7b00
AM
9722+struct _vx_usage_stat {
9723+ uint64_t user;
9724+ uint64_t nice;
9725+ uint64_t system;
9726+ uint64_t softirq;
9727+ uint64_t irq;
9728+ uint64_t idle;
9729+ uint64_t iowait;
9730+};
d337f35e 9731+
d33d7b00
AM
9732+struct _vx_syslog {
9733+ wait_queue_head_t log_wait;
9734+ spinlock_t logbuf_lock; /* lock for the log buffer */
d337f35e 9735+
d33d7b00
AM
9736+ unsigned long log_start; /* next char to be read by syslog() */
9737+ unsigned long con_start; /* next char to be sent to consoles */
9738+ unsigned long log_end; /* most-recently-written-char + 1 */
9739+ unsigned long logged_chars; /* #chars since last read+clear operation */
d337f35e 9740+
d33d7b00
AM
9741+ char log_buf[1024];
9742+};
d337f35e 9743+
d337f35e 9744+
d33d7b00 9745+/* context sub struct */
d337f35e 9746+
d33d7b00
AM
9747+struct _vx_cvirt {
9748+ atomic_t nr_threads; /* number of current threads */
9749+ atomic_t nr_running; /* number of running threads */
9750+ atomic_t nr_uninterruptible; /* number of uninterruptible threads */
d337f35e 9751+
d33d7b00
AM
9752+ atomic_t nr_onhold; /* processes on hold */
9753+ uint32_t onhold_last; /* jiffies when put on hold */
d337f35e 9754+
d33d7b00
AM
9755+ struct timespec bias_ts; /* time offset to the host */
9756+ struct timespec bias_idle;
9757+ struct timespec bias_uptime; /* context creation point */
9758+ uint64_t bias_clock; /* offset in clock_t */
3bac966d 9759+
d33d7b00
AM
9760+ spinlock_t load_lock; /* lock for the load averages */
9761+ atomic_t load_updates; /* nr of load updates done so far */
9762+ uint32_t load_last; /* last time load was calculated */
9763+ uint32_t load[3]; /* load averages 1,5,15 */
d337f35e 9764+
d33d7b00 9765+ atomic_t total_forks; /* number of forks so far */
d337f35e 9766+
d33d7b00
AM
9767+ struct _vx_syslog syslog;
9768+};
d337f35e 9769+
d33d7b00
AM
9770+struct _vx_cvirt_pc {
9771+ struct _vx_usage_stat cpustat;
9772+};
3bac966d 9773+
d337f35e 9774+
d33d7b00 9775+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 9776+
d33d7b00 9777+static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
3bac966d 9778+{
d33d7b00
AM
9779+ printk("\t_vx_cvirt:\n");
9780+ printk("\t threads: %4d, %4d, %4d, %4d\n",
9781+ atomic_read(&cvirt->nr_threads),
9782+ atomic_read(&cvirt->nr_running),
9783+ atomic_read(&cvirt->nr_uninterruptible),
9784+ atomic_read(&cvirt->nr_onhold));
9785+ /* add rest here */
9786+ printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
3bac966d 9787+}
d337f35e 9788+
d33d7b00 9789+#endif
d337f35e 9790+
4bf69007 9791+#endif /* _VSERVER_CVIRT_DEF_H */
8931d859
AM
9792diff -NurpP --minimal linux-4.4.161/include/linux/vserver/debug.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/debug.h
9793--- linux-4.4.161/include/linux/vserver/debug.h 1970-01-01 00:00:00.000000000 +0000
9794+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/debug.h 2018-10-20 04:57:21.000000000 +0000
a4a22af8 9795@@ -0,0 +1,146 @@
4bf69007
AM
9796+#ifndef _VSERVER_DEBUG_H
9797+#define _VSERVER_DEBUG_H
d337f35e 9798+
d337f35e 9799+
dd5f3080 9800+#define VXD_CBIT(n, m) (vs_debug_ ## n & (1 << (m)))
9801+#define VXD_CMIN(n, m) (vs_debug_ ## n > (m))
9802+#define VXD_MASK(n, m) (vs_debug_ ## n & (m))
d337f35e 9803+
d33d7b00
AM
9804+#define VXD_DEV(d) (d), (d)->bd_inode->i_ino, \
9805+ imajor((d)->bd_inode), iminor((d)->bd_inode)
9806+#define VXF_DEV "%p[%lu,%d:%d]"
d337f35e 9807+
d33d7b00
AM
9808+#if defined(CONFIG_QUOTES_UTF8)
9809+#define VS_Q_LQM "\xc2\xbb"
9810+#define VS_Q_RQM "\xc2\xab"
9811+#elif defined(CONFIG_QUOTES_ASCII)
9812+#define VS_Q_LQM "\x27"
9813+#define VS_Q_RQM "\x27"
9814+#else
9815+#define VS_Q_LQM "\xbb"
9816+#define VS_Q_RQM "\xab"
9817+#endif
d337f35e 9818+
d33d7b00 9819+#define VS_Q(f) VS_Q_LQM f VS_Q_RQM
d337f35e
JR
9820+
9821+
d33d7b00
AM
9822+#define vxd_path(p) \
9823+ ({ static char _buffer[PATH_MAX]; \
9824+ d_path(p, _buffer, sizeof(_buffer)); })
d337f35e 9825+
d33d7b00
AM
9826+#define vxd_cond_path(n) \
9827+ ((n) ? vxd_path(&(n)->path) : "<null>" )
d337f35e 9828+
d337f35e 9829+
d33d7b00 9830+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 9831+
dd5f3080 9832+extern unsigned int vs_debug_switch;
9833+extern unsigned int vs_debug_xid;
9834+extern unsigned int vs_debug_nid;
9835+extern unsigned int vs_debug_tag;
9836+extern unsigned int vs_debug_net;
9837+extern unsigned int vs_debug_limit;
9838+extern unsigned int vs_debug_cres;
9839+extern unsigned int vs_debug_dlim;
9840+extern unsigned int vs_debug_quota;
9841+extern unsigned int vs_debug_cvirt;
9842+extern unsigned int vs_debug_space;
9843+extern unsigned int vs_debug_perm;
9844+extern unsigned int vs_debug_misc;
d337f35e 9845+
d337f35e 9846+
d33d7b00
AM
9847+#define VX_LOGLEVEL "vxD: "
9848+#define VX_PROC_FMT "%p: "
9849+#define VX_PROCESS current
d337f35e 9850+
d33d7b00
AM
9851+#define vxdprintk(c, f, x...) \
9852+ do { \
9853+ if (c) \
9854+ printk(VX_LOGLEVEL VX_PROC_FMT f "\n", \
9855+ VX_PROCESS , ##x); \
9856+ } while (0)
d337f35e 9857+
d33d7b00
AM
9858+#define vxlprintk(c, f, x...) \
9859+ do { \
9860+ if (c) \
9861+ printk(VX_LOGLEVEL f " @%s:%d\n", x); \
9862+ } while (0)
d337f35e 9863+
d33d7b00
AM
9864+#define vxfprintk(c, f, x...) \
9865+ do { \
9866+ if (c) \
9867+ printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
9868+ } while (0)
d337f35e 9869+
d337f35e 9870+
d33d7b00 9871+struct vx_info;
d337f35e 9872+
d33d7b00
AM
9873+void dump_vx_info(struct vx_info *, int);
9874+void dump_vx_info_inactive(int);
d337f35e 9875+
d33d7b00 9876+#else /* CONFIG_VSERVER_DEBUG */
d337f35e 9877+
dd5f3080 9878+#define vs_debug_switch 0
9879+#define vs_debug_xid 0
9880+#define vs_debug_nid 0
9881+#define vs_debug_tag 0
9882+#define vs_debug_net 0
9883+#define vs_debug_limit 0
9884+#define vs_debug_cres 0
9885+#define vs_debug_dlim 0
9886+#define vs_debug_quota 0
9887+#define vs_debug_cvirt 0
9888+#define vs_debug_space 0
9889+#define vs_debug_perm 0
9890+#define vs_debug_misc 0
d337f35e 9891+
d33d7b00
AM
9892+#define vxdprintk(x...) do { } while (0)
9893+#define vxlprintk(x...) do { } while (0)
9894+#define vxfprintk(x...) do { } while (0)
2380c486 9895+
d33d7b00 9896+#endif /* CONFIG_VSERVER_DEBUG */
2380c486 9897+
d337f35e 9898+
d33d7b00 9899+#ifdef CONFIG_VSERVER_WARN
d337f35e 9900+
d33d7b00
AM
9901+#define VX_WARNLEVEL KERN_WARNING "vxW: "
9902+#define VX_WARN_TASK "[" VS_Q("%s") ",%u:#%u|%u|%u] "
9903+#define VX_WARN_XID "[xid #%u] "
9904+#define VX_WARN_NID "[nid #%u] "
9905+#define VX_WARN_TAG "[tag #%u] "
d337f35e 9906+
d33d7b00
AM
9907+#define vxwprintk(c, f, x...) \
9908+ do { \
9909+ if (c) \
9910+ printk(VX_WARNLEVEL f "\n", ##x); \
9911+ } while (0)
d337f35e 9912+
d33d7b00 9913+#else /* CONFIG_VSERVER_WARN */
d337f35e 9914+
d33d7b00 9915+#define vxwprintk(x...) do { } while (0)
d337f35e 9916+
d33d7b00 9917+#endif /* CONFIG_VSERVER_WARN */
d337f35e 9918+
d33d7b00
AM
9919+#define vxwprintk_task(c, f, x...) \
9920+ vxwprintk(c, VX_WARN_TASK f, \
9921+ current->comm, current->pid, \
a4a22af8
AM
9922+ current->xid, current->nid, \
9923+ current->tag, ##x)
d33d7b00
AM
9924+#define vxwprintk_xid(c, f, x...) \
9925+ vxwprintk(c, VX_WARN_XID f, current->xid, x)
9926+#define vxwprintk_nid(c, f, x...) \
9927+ vxwprintk(c, VX_WARN_NID f, current->nid, x)
9928+#define vxwprintk_tag(c, f, x...) \
9929+ vxwprintk(c, VX_WARN_TAG f, current->tag, x)
d337f35e 9930+
d33d7b00
AM
9931+#ifdef CONFIG_VSERVER_DEBUG
9932+#define vxd_assert_lock(l) assert_spin_locked(l)
9933+#define vxd_assert(c, f, x...) vxlprintk(!(c), \
9934+ "assertion [" f "] failed.", ##x, __FILE__, __LINE__)
9935+#else
9936+#define vxd_assert_lock(l) do { } while (0)
9937+#define vxd_assert(c, f, x...) do { } while (0)
9938+#endif
d337f35e 9939+
d337f35e 9940+
4bf69007 9941+#endif /* _VSERVER_DEBUG_H */
8931d859
AM
9942diff -NurpP --minimal linux-4.4.161/include/linux/vserver/debug_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/debug_cmd.h
9943--- linux-4.4.161/include/linux/vserver/debug_cmd.h 1970-01-01 00:00:00.000000000 +0000
9944+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/debug_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
9945@@ -0,0 +1,37 @@
9946+#ifndef _VSERVER_DEBUG_CMD_H
9947+#define _VSERVER_DEBUG_CMD_H
d337f35e 9948+
4bf69007 9949+#include <uapi/vserver/debug_cmd.h>
d337f35e
JR
9950+
9951+
d33d7b00 9952+#ifdef CONFIG_COMPAT
d337f35e 9953+
d33d7b00 9954+#include <asm/compat.h>
d337f35e 9955+
d33d7b00
AM
9956+struct vcmd_read_history_v0_x32 {
9957+ uint32_t index;
9958+ uint32_t count;
9959+ compat_uptr_t data_ptr;
3bac966d 9960+};
d337f35e 9961+
d33d7b00
AM
9962+struct vcmd_read_monitor_v0_x32 {
9963+ uint32_t index;
9964+ uint32_t count;
9965+ compat_uptr_t data_ptr;
3bac966d 9966+};
d337f35e 9967+
d33d7b00 9968+#endif /* CONFIG_COMPAT */
d337f35e 9969+
d33d7b00 9970+extern int vc_dump_history(uint32_t);
d337f35e 9971+
d33d7b00
AM
9972+extern int vc_read_history(uint32_t, void __user *);
9973+extern int vc_read_monitor(uint32_t, void __user *);
d337f35e 9974+
d33d7b00 9975+#ifdef CONFIG_COMPAT
d337f35e 9976+
d33d7b00
AM
9977+extern int vc_read_history_x32(uint32_t, void __user *);
9978+extern int vc_read_monitor_x32(uint32_t, void __user *);
d337f35e 9979+
d33d7b00 9980+#endif /* CONFIG_COMPAT */
d337f35e 9981+
4bf69007 9982+#endif /* _VSERVER_DEBUG_CMD_H */
8931d859
AM
9983diff -NurpP --minimal linux-4.4.161/include/linux/vserver/device.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/device.h
9984--- linux-4.4.161/include/linux/vserver/device.h 1970-01-01 00:00:00.000000000 +0000
9985+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/device.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
9986@@ -0,0 +1,9 @@
9987+#ifndef _VSERVER_DEVICE_H
9988+#define _VSERVER_DEVICE_H
d337f35e 9989+
d337f35e 9990+
4bf69007 9991+#include <uapi/vserver/device.h>
d337f35e 9992+
4bf69007 9993+#else /* _VSERVER_DEVICE_H */
d33d7b00 9994+#warning duplicate inclusion
4bf69007 9995+#endif /* _VSERVER_DEVICE_H */
8931d859
AM
9996diff -NurpP --minimal linux-4.4.161/include/linux/vserver/device_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/device_cmd.h
9997--- linux-4.4.161/include/linux/vserver/device_cmd.h 1970-01-01 00:00:00.000000000 +0000
9998+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/device_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
9999@@ -0,0 +1,31 @@
10000+#ifndef _VSERVER_DEVICE_CMD_H
10001+#define _VSERVER_DEVICE_CMD_H
d337f35e 10002+
4bf69007 10003+#include <uapi/vserver/device_cmd.h>
d337f35e 10004+
d337f35e 10005+
d33d7b00 10006+#ifdef CONFIG_COMPAT
d337f35e 10007+
d33d7b00 10008+#include <asm/compat.h>
3bac966d 10009+
d33d7b00
AM
10010+struct vcmd_set_mapping_v0_x32 {
10011+ compat_uptr_t device_ptr;
10012+ compat_uptr_t target_ptr;
10013+ uint32_t flags;
d337f35e
JR
10014+};
10015+
d33d7b00 10016+#endif /* CONFIG_COMPAT */
d337f35e 10017+
d33d7b00 10018+#include <linux/compiler.h>
d337f35e 10019+
d33d7b00
AM
10020+extern int vc_set_mapping(struct vx_info *, void __user *);
10021+extern int vc_unset_mapping(struct vx_info *, void __user *);
d337f35e 10022+
d33d7b00 10023+#ifdef CONFIG_COMPAT
d337f35e 10024+
d33d7b00
AM
10025+extern int vc_set_mapping_x32(struct vx_info *, void __user *);
10026+extern int vc_unset_mapping_x32(struct vx_info *, void __user *);
d337f35e 10027+
d33d7b00 10028+#endif /* CONFIG_COMPAT */
d337f35e 10029+
4bf69007 10030+#endif /* _VSERVER_DEVICE_CMD_H */
8931d859
AM
10031diff -NurpP --minimal linux-4.4.161/include/linux/vserver/device_def.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/device_def.h
10032--- linux-4.4.161/include/linux/vserver/device_def.h 1970-01-01 00:00:00.000000000 +0000
10033+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/device_def.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00 10034@@ -0,0 +1,17 @@
4bf69007
AM
10035+#ifndef _VSERVER_DEVICE_DEF_H
10036+#define _VSERVER_DEVICE_DEF_H
d337f35e 10037+
d33d7b00 10038+#include <linux/types.h>
d337f35e 10039+
d33d7b00
AM
10040+struct vx_dmap_target {
10041+ dev_t target;
10042+ uint32_t flags;
10043+};
d337f35e 10044+
d33d7b00
AM
10045+struct _vx_device {
10046+#ifdef CONFIG_VSERVER_DEVICE
10047+ struct vx_dmap_target targets[2];
10048+#endif
10049+};
d337f35e 10050+
4bf69007 10051+#endif /* _VSERVER_DEVICE_DEF_H */
8931d859
AM
10052diff -NurpP --minimal linux-4.4.161/include/linux/vserver/dlimit.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/dlimit.h
10053--- linux-4.4.161/include/linux/vserver/dlimit.h 1970-01-01 00:00:00.000000000 +0000
10054+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/dlimit.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00 10055@@ -0,0 +1,54 @@
4bf69007
AM
10056+#ifndef _VSERVER_DLIMIT_H
10057+#define _VSERVER_DLIMIT_H
d337f35e 10058+
d33d7b00 10059+#include "switch.h"
3bac966d 10060+
d337f35e 10061+
3bac966d 10062+#ifdef __KERNEL__
d337f35e 10063+
d33d7b00 10064+/* keep in sync with CDLIM_INFINITY */
d337f35e 10065+
d33d7b00 10066+#define DLIM_INFINITY (~0ULL)
d337f35e 10067+
d33d7b00
AM
10068+#include <linux/spinlock.h>
10069+#include <linux/rcupdate.h>
d337f35e 10070+
d33d7b00 10071+struct super_block;
d337f35e 10072+
d33d7b00
AM
10073+struct dl_info {
10074+ struct hlist_node dl_hlist; /* linked list of contexts */
10075+ struct rcu_head dl_rcu; /* the rcu head */
61333608 10076+ vtag_t dl_tag; /* context tag */
d33d7b00
AM
10077+ atomic_t dl_usecnt; /* usage count */
10078+ atomic_t dl_refcnt; /* reference count */
d337f35e 10079+
d33d7b00 10080+ struct super_block *dl_sb; /* associated superblock */
d337f35e 10081+
d33d7b00 10082+ spinlock_t dl_lock; /* protect the values */
d337f35e 10083+
d33d7b00
AM
10084+ unsigned long long dl_space_used; /* used space in bytes */
10085+ unsigned long long dl_space_total; /* maximum space in bytes */
10086+ unsigned long dl_inodes_used; /* used inodes */
10087+ unsigned long dl_inodes_total; /* maximum inodes */
d337f35e 10088+
d33d7b00
AM
10089+ unsigned int dl_nrlmult; /* non root limit mult */
10090+};
d337f35e 10091+
d33d7b00 10092+struct rcu_head;
d337f35e 10093+
d33d7b00
AM
10094+extern void rcu_free_dl_info(struct rcu_head *);
10095+extern void unhash_dl_info(struct dl_info *);
d337f35e 10096+
61333608 10097+extern struct dl_info *locate_dl_info(struct super_block *, vtag_t);
d337f35e 10098+
d337f35e 10099+
d33d7b00 10100+struct kstatfs;
d337f35e 10101+
d33d7b00 10102+extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
d337f35e 10103+
d33d7b00 10104+typedef uint64_t dlsize_t;
d337f35e 10105+
d33d7b00 10106+#endif /* __KERNEL__ */
4bf69007 10107+#else /* _VSERVER_DLIMIT_H */
d33d7b00 10108+#warning duplicate inclusion
4bf69007 10109+#endif /* _VSERVER_DLIMIT_H */
8931d859
AM
10110diff -NurpP --minimal linux-4.4.161/include/linux/vserver/dlimit_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/dlimit_cmd.h
10111--- linux-4.4.161/include/linux/vserver/dlimit_cmd.h 1970-01-01 00:00:00.000000000 +0000
10112+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/dlimit_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
10113@@ -0,0 +1,46 @@
10114+#ifndef _VSERVER_DLIMIT_CMD_H
10115+#define _VSERVER_DLIMIT_CMD_H
d337f35e 10116+
4bf69007 10117+#include <uapi/vserver/dlimit_cmd.h>
d337f35e 10118+
d337f35e 10119+
4bf69007 10120+#ifdef CONFIG_COMPAT
d337f35e 10121+
4bf69007 10122+#include <asm/compat.h>
2380c486 10123+
4bf69007
AM
10124+struct vcmd_ctx_dlimit_base_v0_x32 {
10125+ compat_uptr_t name_ptr;
d33d7b00
AM
10126+ uint32_t flags;
10127+};
adc1caaa 10128+
4bf69007
AM
10129+struct vcmd_ctx_dlimit_v0_x32 {
10130+ compat_uptr_t name_ptr;
d33d7b00
AM
10131+ uint32_t space_used; /* used space in kbytes */
10132+ uint32_t space_total; /* maximum space in kbytes */
10133+ uint32_t inodes_used; /* used inodes */
10134+ uint32_t inodes_total; /* maximum inodes */
10135+ uint32_t reserved; /* reserved for root in % */
10136+ uint32_t flags;
10137+};
d337f35e 10138+
4bf69007 10139+#endif /* CONFIG_COMPAT */
d337f35e 10140+
4bf69007 10141+#include <linux/compiler.h>
d337f35e 10142+
4bf69007
AM
10143+extern int vc_add_dlimit(uint32_t, void __user *);
10144+extern int vc_rem_dlimit(uint32_t, void __user *);
d337f35e 10145+
4bf69007
AM
10146+extern int vc_set_dlimit(uint32_t, void __user *);
10147+extern int vc_get_dlimit(uint32_t, void __user *);
d337f35e 10148+
4bf69007 10149+#ifdef CONFIG_COMPAT
d337f35e 10150+
4bf69007
AM
10151+extern int vc_add_dlimit_x32(uint32_t, void __user *);
10152+extern int vc_rem_dlimit_x32(uint32_t, void __user *);
2380c486 10153+
d33d7b00
AM
10154+extern int vc_set_dlimit_x32(uint32_t, void __user *);
10155+extern int vc_get_dlimit_x32(uint32_t, void __user *);
d337f35e 10156+
d33d7b00 10157+#endif /* CONFIG_COMPAT */
d337f35e 10158+
4bf69007 10159+#endif /* _VSERVER_DLIMIT_CMD_H */
8931d859
AM
10160diff -NurpP --minimal linux-4.4.161/include/linux/vserver/global.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/global.h
10161--- linux-4.4.161/include/linux/vserver/global.h 1970-01-01 00:00:00.000000000 +0000
10162+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/global.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00 10163@@ -0,0 +1,19 @@
4bf69007
AM
10164+#ifndef _VSERVER_GLOBAL_H
10165+#define _VSERVER_GLOBAL_H
d337f35e 10166+
d337f35e 10167+
d33d7b00
AM
10168+extern atomic_t vx_global_ctotal;
10169+extern atomic_t vx_global_cactive;
d337f35e 10170+
d33d7b00
AM
10171+extern atomic_t nx_global_ctotal;
10172+extern atomic_t nx_global_cactive;
d337f35e 10173+
d33d7b00
AM
10174+extern atomic_t vs_global_nsproxy;
10175+extern atomic_t vs_global_fs;
10176+extern atomic_t vs_global_mnt_ns;
10177+extern atomic_t vs_global_uts_ns;
10178+extern atomic_t vs_global_user_ns;
10179+extern atomic_t vs_global_pid_ns;
d337f35e
JR
10180+
10181+
4bf69007 10182+#endif /* _VSERVER_GLOBAL_H */
8931d859
AM
10183diff -NurpP --minimal linux-4.4.161/include/linux/vserver/history.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/history.h
10184--- linux-4.4.161/include/linux/vserver/history.h 1970-01-01 00:00:00.000000000 +0000
10185+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/history.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00 10186@@ -0,0 +1,197 @@
4bf69007
AM
10187+#ifndef _VSERVER_HISTORY_H
10188+#define _VSERVER_HISTORY_H
d337f35e 10189+
d337f35e 10190+
d33d7b00
AM
10191+enum {
10192+ VXH_UNUSED = 0,
10193+ VXH_THROW_OOPS = 1,
d337f35e 10194+
d33d7b00
AM
10195+ VXH_GET_VX_INFO,
10196+ VXH_PUT_VX_INFO,
10197+ VXH_INIT_VX_INFO,
10198+ VXH_SET_VX_INFO,
10199+ VXH_CLR_VX_INFO,
10200+ VXH_CLAIM_VX_INFO,
10201+ VXH_RELEASE_VX_INFO,
10202+ VXH_ALLOC_VX_INFO,
10203+ VXH_DEALLOC_VX_INFO,
10204+ VXH_HASH_VX_INFO,
10205+ VXH_UNHASH_VX_INFO,
10206+ VXH_LOC_VX_INFO,
10207+ VXH_LOOKUP_VX_INFO,
10208+ VXH_CREATE_VX_INFO,
10209+};
d337f35e 10210+
d33d7b00
AM
10211+struct _vxhe_vxi {
10212+ struct vx_info *ptr;
10213+ unsigned xid;
10214+ unsigned usecnt;
10215+ unsigned tasks;
10216+};
d337f35e 10217+
d33d7b00
AM
10218+struct _vxhe_set_clr {
10219+ void *data;
10220+};
d337f35e 10221+
d33d7b00
AM
10222+struct _vxhe_loc_lookup {
10223+ unsigned arg;
10224+};
d337f35e 10225+
d33d7b00
AM
10226+struct _vx_hist_entry {
10227+ void *loc;
10228+ unsigned short seq;
10229+ unsigned short type;
10230+ struct _vxhe_vxi vxi;
10231+ union {
10232+ struct _vxhe_set_clr sc;
10233+ struct _vxhe_loc_lookup ll;
10234+ };
3bac966d 10235+};
d337f35e 10236+
d33d7b00 10237+#ifdef CONFIG_VSERVER_HISTORY
d337f35e 10238+
d33d7b00 10239+extern unsigned volatile int vxh_active;
d337f35e 10240+
d33d7b00 10241+struct _vx_hist_entry *vxh_advance(void *loc);
d337f35e 10242+
d337f35e 10243+
d33d7b00
AM
10244+static inline
10245+void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
10246+{
10247+ entry->vxi.ptr = vxi;
10248+ if (vxi) {
10249+ entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
10250+ entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
10251+ entry->vxi.xid = vxi->vx_id;
10252+ }
10253+}
d337f35e 10254+
d337f35e 10255+
d33d7b00 10256+#define __HERE__ current_text_addr()
d337f35e 10257+
d33d7b00
AM
10258+#define __VXH_BODY(__type, __data, __here) \
10259+ struct _vx_hist_entry *entry; \
10260+ \
10261+ preempt_disable(); \
10262+ entry = vxh_advance(__here); \
10263+ __data; \
10264+ entry->type = __type; \
10265+ preempt_enable();
d337f35e 10266+
d337f35e 10267+
d33d7b00 10268+ /* pass vxi only */
d337f35e 10269+
d33d7b00
AM
10270+#define __VXH_SMPL \
10271+ __vxh_copy_vxi(entry, vxi)
d337f35e 10272+
d33d7b00
AM
10273+static inline
10274+void __vxh_smpl(struct vx_info *vxi, int __type, void *__here)
10275+{
10276+ __VXH_BODY(__type, __VXH_SMPL, __here)
10277+}
d337f35e 10278+
d33d7b00 10279+ /* pass vxi and data (void *) */
d337f35e 10280+
d33d7b00
AM
10281+#define __VXH_DATA \
10282+ __vxh_copy_vxi(entry, vxi); \
10283+ entry->sc.data = data
d337f35e 10284+
d33d7b00
AM
10285+static inline
10286+void __vxh_data(struct vx_info *vxi, void *data,
10287+ int __type, void *__here)
3bac966d 10288+{
d33d7b00 10289+ __VXH_BODY(__type, __VXH_DATA, __here)
3bac966d 10290+}
d337f35e 10291+
d33d7b00 10292+ /* pass vxi and arg (long) */
d337f35e 10293+
d33d7b00
AM
10294+#define __VXH_LONG \
10295+ __vxh_copy_vxi(entry, vxi); \
10296+ entry->ll.arg = arg
d337f35e 10297+
d33d7b00
AM
10298+static inline
10299+void __vxh_long(struct vx_info *vxi, long arg,
10300+ int __type, void *__here)
10301+{
10302+ __VXH_BODY(__type, __VXH_LONG, __here)
10303+}
d337f35e 10304+
d337f35e 10305+
d33d7b00
AM
10306+static inline
10307+void __vxh_throw_oops(void *__here)
10308+{
10309+ __VXH_BODY(VXH_THROW_OOPS, {}, __here);
10310+ /* prevent further acquisition */
10311+ vxh_active = 0;
10312+}
d337f35e 10313+
d337f35e 10314+
d33d7b00 10315+#define vxh_throw_oops() __vxh_throw_oops(__HERE__);
d337f35e 10316+
d33d7b00
AM
10317+#define __vxh_get_vx_info(v, h) __vxh_smpl(v, VXH_GET_VX_INFO, h);
10318+#define __vxh_put_vx_info(v, h) __vxh_smpl(v, VXH_PUT_VX_INFO, h);
d337f35e 10319+
d33d7b00
AM
10320+#define __vxh_init_vx_info(v, d, h) \
10321+ __vxh_data(v, d, VXH_INIT_VX_INFO, h);
10322+#define __vxh_set_vx_info(v, d, h) \
10323+ __vxh_data(v, d, VXH_SET_VX_INFO, h);
10324+#define __vxh_clr_vx_info(v, d, h) \
10325+ __vxh_data(v, d, VXH_CLR_VX_INFO, h);
d337f35e 10326+
d33d7b00
AM
10327+#define __vxh_claim_vx_info(v, d, h) \
10328+ __vxh_data(v, d, VXH_CLAIM_VX_INFO, h);
10329+#define __vxh_release_vx_info(v, d, h) \
10330+ __vxh_data(v, d, VXH_RELEASE_VX_INFO, h);
d337f35e 10331+
d33d7b00
AM
10332+#define vxh_alloc_vx_info(v) \
10333+ __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
10334+#define vxh_dealloc_vx_info(v) \
10335+ __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
d337f35e 10336+
d33d7b00
AM
10337+#define vxh_hash_vx_info(v) \
10338+ __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
10339+#define vxh_unhash_vx_info(v) \
10340+ __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
d337f35e 10341+
d33d7b00
AM
10342+#define vxh_loc_vx_info(v, l) \
10343+ __vxh_long(v, l, VXH_LOC_VX_INFO, __HERE__);
10344+#define vxh_lookup_vx_info(v, l) \
10345+ __vxh_long(v, l, VXH_LOOKUP_VX_INFO, __HERE__);
10346+#define vxh_create_vx_info(v, l) \
10347+ __vxh_long(v, l, VXH_CREATE_VX_INFO, __HERE__);
d337f35e 10348+
d33d7b00 10349+extern void vxh_dump_history(void);
d337f35e 10350+
d337f35e 10351+
d33d7b00 10352+#else /* CONFIG_VSERVER_HISTORY */
2380c486 10353+
d33d7b00 10354+#define __HERE__ 0
d337f35e 10355+
d33d7b00 10356+#define vxh_throw_oops() do { } while (0)
d337f35e 10357+
d33d7b00
AM
10358+#define __vxh_get_vx_info(v, h) do { } while (0)
10359+#define __vxh_put_vx_info(v, h) do { } while (0)
d337f35e 10360+
d33d7b00
AM
10361+#define __vxh_init_vx_info(v, d, h) do { } while (0)
10362+#define __vxh_set_vx_info(v, d, h) do { } while (0)
10363+#define __vxh_clr_vx_info(v, d, h) do { } while (0)
d337f35e 10364+
d33d7b00
AM
10365+#define __vxh_claim_vx_info(v, d, h) do { } while (0)
10366+#define __vxh_release_vx_info(v, d, h) do { } while (0)
3bac966d 10367+
d33d7b00
AM
10368+#define vxh_alloc_vx_info(v) do { } while (0)
10369+#define vxh_dealloc_vx_info(v) do { } while (0)
d337f35e 10370+
d33d7b00
AM
10371+#define vxh_hash_vx_info(v) do { } while (0)
10372+#define vxh_unhash_vx_info(v) do { } while (0)
d337f35e 10373+
d33d7b00
AM
10374+#define vxh_loc_vx_info(v, l) do { } while (0)
10375+#define vxh_lookup_vx_info(v, l) do { } while (0)
10376+#define vxh_create_vx_info(v, l) do { } while (0)
d337f35e 10377+
d33d7b00 10378+#define vxh_dump_history() do { } while (0)
d337f35e 10379+
d337f35e 10380+
d33d7b00 10381+#endif /* CONFIG_VSERVER_HISTORY */
d337f35e 10382+
4bf69007 10383+#endif /* _VSERVER_HISTORY_H */
8931d859
AM
10384diff -NurpP --minimal linux-4.4.161/include/linux/vserver/inode.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/inode.h
10385--- linux-4.4.161/include/linux/vserver/inode.h 1970-01-01 00:00:00.000000000 +0000
10386+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/inode.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
10387@@ -0,0 +1,19 @@
10388+#ifndef _VSERVER_INODE_H
10389+#define _VSERVER_INODE_H
d337f35e 10390+
4bf69007 10391+#include <uapi/vserver/inode.h>
d337f35e 10392+
d337f35e 10393+
d33d7b00
AM
10394+#ifdef CONFIG_VSERVER_PROC_SECURE
10395+#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE )
10396+#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
10397+#else
10398+#define IATTR_PROC_DEFAULT ( IATTR_ADMIN )
10399+#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
10400+#endif
d337f35e 10401+
d33d7b00 10402+#define vx_hide_check(c, m) (((m) & IATTR_HIDE) ? vx_check(c, m) : 1)
d337f35e 10403+
4bf69007 10404+#else /* _VSERVER_INODE_H */
3bac966d 10405+#warning duplicate inclusion
4bf69007 10406+#endif /* _VSERVER_INODE_H */
8931d859
AM
10407diff -NurpP --minimal linux-4.4.161/include/linux/vserver/inode_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/inode_cmd.h
10408--- linux-4.4.161/include/linux/vserver/inode_cmd.h 1970-01-01 00:00:00.000000000 +0000
10409+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/inode_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
10410@@ -0,0 +1,36 @@
10411+#ifndef _VSERVER_INODE_CMD_H
10412+#define _VSERVER_INODE_CMD_H
d337f35e 10413+
4bf69007 10414+#include <uapi/vserver/inode_cmd.h>
d337f35e 10415+
d337f35e
JR
10416+
10417+
d33d7b00 10418+#ifdef CONFIG_COMPAT
d337f35e 10419+
d33d7b00 10420+#include <asm/compat.h>
d337f35e 10421+
d33d7b00
AM
10422+struct vcmd_ctx_iattr_v1_x32 {
10423+ compat_uptr_t name_ptr;
10424+ uint32_t tag;
10425+ uint32_t flags;
10426+ uint32_t mask;
10427+};
d337f35e 10428+
d33d7b00 10429+#endif /* CONFIG_COMPAT */
d337f35e 10430+
d33d7b00 10431+#include <linux/compiler.h>
d337f35e 10432+
d33d7b00
AM
10433+extern int vc_get_iattr(void __user *);
10434+extern int vc_set_iattr(void __user *);
d337f35e 10435+
d33d7b00
AM
10436+extern int vc_fget_iattr(uint32_t, void __user *);
10437+extern int vc_fset_iattr(uint32_t, void __user *);
d337f35e 10438+
d33d7b00 10439+#ifdef CONFIG_COMPAT
d337f35e 10440+
d33d7b00
AM
10441+extern int vc_get_iattr_x32(void __user *);
10442+extern int vc_set_iattr_x32(void __user *);
d337f35e 10443+
d33d7b00 10444+#endif /* CONFIG_COMPAT */
d337f35e 10445+
4bf69007 10446+#endif /* _VSERVER_INODE_CMD_H */
8931d859
AM
10447diff -NurpP --minimal linux-4.4.161/include/linux/vserver/limit.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/limit.h
10448--- linux-4.4.161/include/linux/vserver/limit.h 1970-01-01 00:00:00.000000000 +0000
10449+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/limit.h 2018-10-20 04:57:21.000000000 +0000
927ca606 10450@@ -0,0 +1,67 @@
4bf69007
AM
10451+#ifndef _VSERVER_LIMIT_H
10452+#define _VSERVER_LIMIT_H
d337f35e 10453+
4bf69007 10454+#include <uapi/vserver/limit.h>
d337f35e 10455+
d337f35e 10456+
d33d7b00 10457+#define VLIM_NOCHECK ((1L << VLIMIT_DENTRY) | (1L << RLIMIT_RSS))
d337f35e 10458+
d33d7b00 10459+/* keep in sync with CRLIM_INFINITY */
d337f35e 10460+
d33d7b00 10461+#define VLIM_INFINITY (~0ULL)
d337f35e 10462+
d33d7b00
AM
10463+#include <asm/atomic.h>
10464+#include <asm/resource.h>
d337f35e 10465+
d33d7b00
AM
10466+#ifndef RLIM_INFINITY
10467+#warning RLIM_INFINITY is undefined
10468+#endif
d337f35e 10469+
d33d7b00 10470+#define __rlim_val(l, r, v) ((l)->res[r].v)
d337f35e 10471+
d33d7b00
AM
10472+#define __rlim_soft(l, r) __rlim_val(l, r, soft)
10473+#define __rlim_hard(l, r) __rlim_val(l, r, hard)
d337f35e 10474+
d33d7b00
AM
10475+#define __rlim_rcur(l, r) __rlim_val(l, r, rcur)
10476+#define __rlim_rmin(l, r) __rlim_val(l, r, rmin)
10477+#define __rlim_rmax(l, r) __rlim_val(l, r, rmax)
d337f35e 10478+
d33d7b00
AM
10479+#define __rlim_lhit(l, r) __rlim_val(l, r, lhit)
10480+#define __rlim_hit(l, r) atomic_inc(&__rlim_lhit(l, r))
d337f35e 10481+
d33d7b00
AM
10482+typedef atomic_long_t rlim_atomic_t;
10483+typedef unsigned long rlim_t;
d337f35e 10484+
d33d7b00
AM
10485+#define __rlim_get(l, r) atomic_long_read(&__rlim_rcur(l, r))
10486+#define __rlim_set(l, r, v) atomic_long_set(&__rlim_rcur(l, r), v)
10487+#define __rlim_inc(l, r) atomic_long_inc(&__rlim_rcur(l, r))
10488+#define __rlim_dec(l, r) atomic_long_dec(&__rlim_rcur(l, r))
10489+#define __rlim_add(l, r, v) atomic_long_add(v, &__rlim_rcur(l, r))
10490+#define __rlim_sub(l, r, v) atomic_long_sub(v, &__rlim_rcur(l, r))
d337f35e 10491+
d337f35e 10492+
d33d7b00
AM
10493+#if (RLIM_INFINITY == VLIM_INFINITY)
10494+#define VX_VLIM(r) ((long long)(long)(r))
10495+#define VX_RLIM(v) ((rlim_t)(v))
3bac966d 10496+#else
d33d7b00
AM
10497+#define VX_VLIM(r) (((r) == RLIM_INFINITY) \
10498+ ? VLIM_INFINITY : (long long)(r))
10499+#define VX_RLIM(v) (((v) == VLIM_INFINITY) \
10500+ ? RLIM_INFINITY : (rlim_t)(v))
3bac966d 10501+#endif
d337f35e 10502+
d33d7b00 10503+struct sysinfo;
d337f35e 10504+
927ca606 10505+#ifdef CONFIG_MEMCG
d33d7b00
AM
10506+void vx_vsi_meminfo(struct sysinfo *);
10507+void vx_vsi_swapinfo(struct sysinfo *);
10508+long vx_vsi_cached(struct sysinfo *);
927ca606
AM
10509+#else /* !CONFIG_MEMCG */
10510+#define vx_vsi_meminfo(s) do { } while (0)
10511+#define vx_vsi_swapinfo(s) do { } while (0)
10512+#define vx_vsi_cached(s) (0L)
10513+#endif /* !CONFIG_MEMCG */
d337f35e 10514+
d33d7b00 10515+#define NUM_LIMITS 24
d337f35e 10516+
4bf69007 10517+#endif /* _VSERVER_LIMIT_H */
8931d859
AM
10518diff -NurpP --minimal linux-4.4.161/include/linux/vserver/limit_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/limit_cmd.h
10519--- linux-4.4.161/include/linux/vserver/limit_cmd.h 1970-01-01 00:00:00.000000000 +0000
10520+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/limit_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
10521@@ -0,0 +1,35 @@
10522+#ifndef _VSERVER_LIMIT_CMD_H
10523+#define _VSERVER_LIMIT_CMD_H
d337f35e 10524+
4bf69007 10525+#include <uapi/vserver/limit_cmd.h>
d337f35e 10526+
d337f35e 10527+
d33d7b00 10528+#ifdef CONFIG_IA32_EMULATION
d337f35e 10529+
d33d7b00
AM
10530+struct vcmd_ctx_rlimit_v0_x32 {
10531+ uint32_t id;
10532+ uint64_t minimum;
10533+ uint64_t softlimit;
10534+ uint64_t maximum;
10535+} __attribute__ ((packed));
d337f35e 10536+
d33d7b00 10537+#endif /* CONFIG_IA32_EMULATION */
d337f35e 10538+
d33d7b00 10539+#include <linux/compiler.h>
d337f35e 10540+
d33d7b00
AM
10541+extern int vc_get_rlimit_mask(uint32_t, void __user *);
10542+extern int vc_get_rlimit(struct vx_info *, void __user *);
10543+extern int vc_set_rlimit(struct vx_info *, void __user *);
10544+extern int vc_reset_hits(struct vx_info *, void __user *);
10545+extern int vc_reset_minmax(struct vx_info *, void __user *);
d337f35e 10546+
d33d7b00 10547+extern int vc_rlimit_stat(struct vx_info *, void __user *);
d337f35e 10548+
d33d7b00 10549+#ifdef CONFIG_IA32_EMULATION
d337f35e 10550+
d33d7b00
AM
10551+extern int vc_get_rlimit_x32(struct vx_info *, void __user *);
10552+extern int vc_set_rlimit_x32(struct vx_info *, void __user *);
adc1caaa 10553+
d33d7b00 10554+#endif /* CONFIG_IA32_EMULATION */
d337f35e 10555+
4bf69007 10556+#endif /* _VSERVER_LIMIT_CMD_H */
8931d859
AM
10557diff -NurpP --minimal linux-4.4.161/include/linux/vserver/limit_def.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/limit_def.h
10558--- linux-4.4.161/include/linux/vserver/limit_def.h 1970-01-01 00:00:00.000000000 +0000
10559+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/limit_def.h 2018-10-20 04:57:21.000000000 +0000
d33d7b00 10560@@ -0,0 +1,47 @@
4bf69007
AM
10561+#ifndef _VSERVER_LIMIT_DEF_H
10562+#define _VSERVER_LIMIT_DEF_H
d337f35e 10563+
d33d7b00
AM
10564+#include <asm/atomic.h>
10565+#include <asm/resource.h>
d337f35e 10566+
d33d7b00 10567+#include "limit.h"
d337f35e 10568+
d337f35e 10569+
d33d7b00
AM
10570+struct _vx_res_limit {
10571+ rlim_t soft; /* Context soft limit */
10572+ rlim_t hard; /* Context hard limit */
d337f35e 10573+
d33d7b00
AM
10574+ rlim_atomic_t rcur; /* Current value */
10575+ rlim_t rmin; /* Context minimum */
10576+ rlim_t rmax; /* Context maximum */
d337f35e 10577+
d33d7b00
AM
10578+ atomic_t lhit; /* Limit hits */
10579+};
d337f35e 10580+
d33d7b00 10581+/* context sub struct */
2380c486 10582+
d33d7b00
AM
10583+struct _vx_limit {
10584+ struct _vx_res_limit res[NUM_LIMITS];
10585+};
adc1caaa 10586+
d33d7b00 10587+#ifdef CONFIG_VSERVER_DEBUG
adc1caaa 10588+
d33d7b00 10589+static inline void __dump_vx_limit(struct _vx_limit *limit)
3bac966d 10590+{
d33d7b00 10591+ int i;
d337f35e 10592+
d33d7b00
AM
10593+ printk("\t_vx_limit:");
10594+ for (i = 0; i < NUM_LIMITS; i++) {
10595+ printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
10596+ i, (unsigned long)__rlim_get(limit, i),
10597+ (unsigned long)__rlim_rmin(limit, i),
10598+ (unsigned long)__rlim_rmax(limit, i),
10599+ (long)__rlim_soft(limit, i),
10600+ (long)__rlim_hard(limit, i),
10601+ atomic_read(&__rlim_lhit(limit, i)));
10602+ }
3bac966d 10603+}
d337f35e 10604+
d33d7b00 10605+#endif
d337f35e 10606+
4bf69007 10607+#endif /* _VSERVER_LIMIT_DEF_H */
8931d859
AM
10608diff -NurpP --minimal linux-4.4.161/include/linux/vserver/limit_int.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/limit_int.h
10609--- linux-4.4.161/include/linux/vserver/limit_int.h 1970-01-01 00:00:00.000000000 +0000
10610+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/limit_int.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
10611@@ -0,0 +1,193 @@
10612+#ifndef _VSERVER_LIMIT_INT_H
10613+#define _VSERVER_LIMIT_INT_H
d337f35e 10614+
d33d7b00
AM
10615+#define VXD_RCRES_COND(r) VXD_CBIT(cres, r)
10616+#define VXD_RLIMIT_COND(r) VXD_CBIT(limit, r)
d337f35e 10617+
d33d7b00 10618+extern const char *vlimit_name[NUM_LIMITS];
2380c486 10619+
d33d7b00
AM
10620+static inline void __vx_acc_cres(struct vx_info *vxi,
10621+ int res, int dir, void *_data, char *_file, int _line)
10622+{
10623+ if (VXD_RCRES_COND(res))
10624+ vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
10625+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10626+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10627+ (dir > 0) ? "++" : "--", _data, _file, _line);
10628+ if (!vxi)
10629+ return;
d337f35e 10630+
d33d7b00
AM
10631+ if (dir > 0)
10632+ __rlim_inc(&vxi->limit, res);
10633+ else
10634+ __rlim_dec(&vxi->limit, res);
10635+}
d337f35e 10636+
d33d7b00
AM
10637+static inline void __vx_add_cres(struct vx_info *vxi,
10638+ int res, int amount, void *_data, char *_file, int _line)
10639+{
10640+ if (VXD_RCRES_COND(res))
10641+ vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
10642+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10643+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10644+ amount, _data, _file, _line);
10645+ if (amount == 0)
10646+ return;
10647+ if (!vxi)
10648+ return;
10649+ __rlim_add(&vxi->limit, res, amount);
10650+}
d337f35e 10651+
3bac966d 10652+static inline
d33d7b00 10653+int __vx_cres_adjust_max(struct _vx_limit *limit, int res, rlim_t value)
3bac966d 10654+{
d33d7b00 10655+ int cond = (value > __rlim_rmax(limit, res));
d337f35e 10656+
d33d7b00
AM
10657+ if (cond)
10658+ __rlim_rmax(limit, res) = value;
10659+ return cond;
3bac966d 10660+}
d337f35e 10661+
3bac966d 10662+static inline
d33d7b00 10663+int __vx_cres_adjust_min(struct _vx_limit *limit, int res, rlim_t value)
3bac966d 10664+{
d33d7b00 10665+ int cond = (value < __rlim_rmin(limit, res));
d337f35e 10666+
d33d7b00
AM
10667+ if (cond)
10668+ __rlim_rmin(limit, res) = value;
10669+ return cond;
3bac966d 10670+}
d337f35e 10671+
3bac966d 10672+static inline
d33d7b00 10673+void __vx_cres_fixup(struct _vx_limit *limit, int res, rlim_t value)
3bac966d 10674+{
d33d7b00
AM
10675+ if (!__vx_cres_adjust_max(limit, res, value))
10676+ __vx_cres_adjust_min(limit, res, value);
3bac966d 10677+}
d337f35e 10678+
2380c486 10679+
d33d7b00
AM
10680+/* return values:
10681+ +1 ... no limit hit
10682+ -1 ... over soft limit
10683+ 0 ... over hard limit */
d337f35e 10684+
d33d7b00
AM
10685+static inline int __vx_cres_avail(struct vx_info *vxi,
10686+ int res, int num, char *_file, int _line)
3bac966d 10687+{
d33d7b00
AM
10688+ struct _vx_limit *limit;
10689+ rlim_t value;
d337f35e 10690+
d33d7b00
AM
10691+ if (VXD_RLIMIT_COND(res))
10692+ vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
10693+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10694+ (vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
10695+ (vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
10696+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10697+ num, _file, _line);
10698+ if (!vxi)
3bac966d 10699+ return 1;
d337f35e 10700+
d33d7b00
AM
10701+ limit = &vxi->limit;
10702+ value = __rlim_get(limit, res);
d337f35e 10703+
d33d7b00
AM
10704+ if (!__vx_cres_adjust_max(limit, res, value))
10705+ __vx_cres_adjust_min(limit, res, value);
d337f35e 10706+
d33d7b00 10707+ if (num == 0)
3bac966d 10708+ return 1;
d337f35e 10709+
d33d7b00
AM
10710+ if (__rlim_soft(limit, res) == RLIM_INFINITY)
10711+ return -1;
10712+ if (value + num <= __rlim_soft(limit, res))
10713+ return -1;
d337f35e 10714+
d33d7b00 10715+ if (__rlim_hard(limit, res) == RLIM_INFINITY)
3bac966d 10716+ return 1;
d33d7b00 10717+ if (value + num <= __rlim_hard(limit, res))
3bac966d 10718+ return 1;
d33d7b00
AM
10719+
10720+ __rlim_hit(limit, res);
3bac966d
AM
10721+ return 0;
10722+}
d337f35e 10723+
d337f35e 10724+
d33d7b00 10725+static const int VLA_RSS[] = { RLIMIT_RSS, VLIMIT_ANON, VLIMIT_MAPPED, 0 };
d337f35e 10726+
3bac966d 10727+static inline
d33d7b00 10728+rlim_t __vx_cres_array_sum(struct _vx_limit *limit, const int *array)
3bac966d 10729+{
d33d7b00
AM
10730+ rlim_t value, sum = 0;
10731+ int res;
d337f35e 10732+
d33d7b00
AM
10733+ while ((res = *array++)) {
10734+ value = __rlim_get(limit, res);
10735+ __vx_cres_fixup(limit, res, value);
10736+ sum += value;
10737+ }
10738+ return sum;
3bac966d 10739+}
d337f35e 10740+
3bac966d 10741+static inline
d33d7b00 10742+rlim_t __vx_cres_array_fixup(struct _vx_limit *limit, const int *array)
3bac966d 10743+{
d33d7b00
AM
10744+ rlim_t value = __vx_cres_array_sum(limit, array + 1);
10745+ int res = *array;
d337f35e 10746+
d33d7b00
AM
10747+ if (value == __rlim_get(limit, res))
10748+ return value;
10749+
10750+ __rlim_set(limit, res, value);
10751+ /* now adjust min/max */
10752+ if (!__vx_cres_adjust_max(limit, res, value))
10753+ __vx_cres_adjust_min(limit, res, value);
10754+
10755+ return value;
3bac966d 10756+}
d337f35e 10757+
d33d7b00
AM
10758+static inline int __vx_cres_array_avail(struct vx_info *vxi,
10759+ const int *array, int num, char *_file, int _line)
3bac966d 10760+{
d33d7b00
AM
10761+ struct _vx_limit *limit;
10762+ rlim_t value = 0;
10763+ int res;
10764+
10765+ if (num == 0)
3bac966d 10766+ return 1;
d33d7b00 10767+ if (!vxi)
3bac966d 10768+ return 1;
d337f35e 10769+
d33d7b00
AM
10770+ limit = &vxi->limit;
10771+ res = *array;
10772+ value = __vx_cres_array_sum(limit, array + 1);
d337f35e 10773+
d33d7b00
AM
10774+ __rlim_set(limit, res, value);
10775+ __vx_cres_fixup(limit, res, value);
10776+
10777+ return __vx_cres_avail(vxi, res, num, _file, _line);
3bac966d 10778+}
d337f35e 10779+
d337f35e 10780+
d33d7b00 10781+static inline void vx_limit_fixup(struct _vx_limit *limit, int id)
3bac966d 10782+{
d33d7b00
AM
10783+ rlim_t value;
10784+ int res;
d337f35e 10785+
d33d7b00
AM
10786+ /* complex resources first */
10787+ if ((id < 0) || (id == RLIMIT_RSS))
10788+ __vx_cres_array_fixup(limit, VLA_RSS);
d337f35e 10789+
d33d7b00
AM
10790+ for (res = 0; res < NUM_LIMITS; res++) {
10791+ if ((id > 0) && (res != id))
10792+ continue;
10793+
10794+ value = __rlim_get(limit, res);
10795+ __vx_cres_fixup(limit, res, value);
10796+
10797+ /* not supposed to happen, maybe warn? */
10798+ if (__rlim_rmax(limit, res) > __rlim_hard(limit, res))
10799+ __rlim_rmax(limit, res) = __rlim_hard(limit, res);
10800+ }
3bac966d 10801+}
d337f35e
JR
10802+
10803+
4bf69007 10804+#endif /* _VSERVER_LIMIT_INT_H */
8931d859
AM
10805diff -NurpP --minimal linux-4.4.161/include/linux/vserver/monitor.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/monitor.h
10806--- linux-4.4.161/include/linux/vserver/monitor.h 1970-01-01 00:00:00.000000000 +0000
10807+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/monitor.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
10808@@ -0,0 +1,6 @@
10809+#ifndef _VSERVER_MONITOR_H
10810+#define _VSERVER_MONITOR_H
d337f35e 10811+
4bf69007 10812+#include <uapi/vserver/monitor.h>
d337f35e 10813+
4bf69007 10814+#endif /* _VSERVER_MONITOR_H */
8931d859
AM
10815diff -NurpP --minimal linux-4.4.161/include/linux/vserver/network.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/network.h
10816--- linux-4.4.161/include/linux/vserver/network.h 1970-01-01 00:00:00.000000000 +0000
10817+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/network.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
10818@@ -0,0 +1,76 @@
10819+#ifndef _VSERVER_NETWORK_H
10820+#define _VSERVER_NETWORK_H
d337f35e 10821+
d337f35e 10822+
4bf69007
AM
10823+#include <linux/list.h>
10824+#include <linux/spinlock.h>
10825+#include <linux/rcupdate.h>
10826+#include <linux/in.h>
10827+#include <linux/in6.h>
10828+#include <asm/atomic.h>
10829+#include <uapi/vserver/network.h>
d337f35e 10830+
4bf69007
AM
10831+struct nx_addr_v4 {
10832+ struct nx_addr_v4 *next;
10833+ struct in_addr ip[2];
10834+ struct in_addr mask;
10835+ uint16_t type;
10836+ uint16_t flags;
10837+};
d337f35e 10838+
4bf69007
AM
10839+struct nx_addr_v6 {
10840+ struct nx_addr_v6 *next;
10841+ struct in6_addr ip;
10842+ struct in6_addr mask;
10843+ uint32_t prefix;
10844+ uint16_t type;
10845+ uint16_t flags;
10846+};
d337f35e 10847+
4bf69007
AM
10848+struct nx_info {
10849+ struct hlist_node nx_hlist; /* linked list of nxinfos */
61333608 10850+ vnid_t nx_id; /* vnet id */
4bf69007
AM
10851+ atomic_t nx_usecnt; /* usage count */
10852+ atomic_t nx_tasks; /* tasks count */
10853+ int nx_state; /* context state */
d337f35e 10854+
4bf69007
AM
10855+ uint64_t nx_flags; /* network flag word */
10856+ uint64_t nx_ncaps; /* network capabilities */
d337f35e 10857+
4bf69007
AM
10858+ spinlock_t addr_lock; /* protect address changes */
10859+ struct in_addr v4_lback; /* Loopback address */
10860+ struct in_addr v4_bcast; /* Broadcast address */
10861+ struct nx_addr_v4 v4; /* First/Single ipv4 address */
10862+#ifdef CONFIG_IPV6
10863+ struct nx_addr_v6 v6; /* First/Single ipv6 address */
10864+#endif
10865+ char nx_name[65]; /* network context name */
d33d7b00 10866+};
d337f35e 10867+
d337f35e 10868+
4bf69007 10869+/* status flags */
d337f35e 10870+
4bf69007
AM
10871+#define NXS_HASHED 0x0001
10872+#define NXS_SHUTDOWN 0x0100
10873+#define NXS_RELEASED 0x8000
d337f35e 10874+
4bf69007 10875+extern struct nx_info *lookup_nx_info(int);
d337f35e 10876+
4bf69007 10877+extern int get_nid_list(int, unsigned int *, int);
61333608 10878+extern int nid_is_hashed(vnid_t);
d337f35e 10879+
4bf69007 10880+extern int nx_migrate_task(struct task_struct *, struct nx_info *);
d337f35e 10881+
4bf69007 10882+extern long vs_net_change(struct nx_info *, unsigned int);
d337f35e 10883+
4bf69007 10884+struct sock;
d337f35e 10885+
d337f35e 10886+
4bf69007
AM
10887+#define NX_IPV4(n) ((n)->v4.type != NXA_TYPE_NONE)
10888+#ifdef CONFIG_IPV6
10889+#define NX_IPV6(n) ((n)->v6.type != NXA_TYPE_NONE)
10890+#else
10891+#define NX_IPV6(n) (0)
10892+#endif
d337f35e 10893+
4bf69007 10894+#endif /* _VSERVER_NETWORK_H */
8931d859
AM
10895diff -NurpP --minimal linux-4.4.161/include/linux/vserver/network_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/network_cmd.h
10896--- linux-4.4.161/include/linux/vserver/network_cmd.h 1970-01-01 00:00:00.000000000 +0000
10897+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/network_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
10898@@ -0,0 +1,37 @@
10899+#ifndef _VSERVER_NETWORK_CMD_H
10900+#define _VSERVER_NETWORK_CMD_H
d337f35e 10901+
4bf69007 10902+#include <uapi/vserver/network_cmd.h>
d337f35e 10903+
4bf69007 10904+extern int vc_task_nid(uint32_t);
d337f35e 10905+
4bf69007 10906+extern int vc_nx_info(struct nx_info *, void __user *);
d337f35e 10907+
4bf69007
AM
10908+extern int vc_net_create(uint32_t, void __user *);
10909+extern int vc_net_migrate(struct nx_info *, void __user *);
d337f35e 10910+
4bf69007
AM
10911+extern int vc_net_add(struct nx_info *, void __user *);
10912+extern int vc_net_remove(struct nx_info *, void __user *);
d337f35e 10913+
4bf69007
AM
10914+extern int vc_net_add_ipv4_v1(struct nx_info *, void __user *);
10915+extern int vc_net_add_ipv4(struct nx_info *, void __user *);
d337f35e 10916+
4bf69007
AM
10917+extern int vc_net_rem_ipv4_v1(struct nx_info *, void __user *);
10918+extern int vc_net_rem_ipv4(struct nx_info *, void __user *);
d337f35e 10919+
4bf69007
AM
10920+extern int vc_net_add_ipv6(struct nx_info *, void __user *);
10921+extern int vc_net_remove_ipv6(struct nx_info *, void __user *);
d337f35e 10922+
4bf69007
AM
10923+extern int vc_add_match_ipv4(struct nx_info *, void __user *);
10924+extern int vc_get_match_ipv4(struct nx_info *, void __user *);
d33d7b00 10925+
4bf69007
AM
10926+extern int vc_add_match_ipv6(struct nx_info *, void __user *);
10927+extern int vc_get_match_ipv6(struct nx_info *, void __user *);
d337f35e 10928+
4bf69007
AM
10929+extern int vc_get_nflags(struct nx_info *, void __user *);
10930+extern int vc_set_nflags(struct nx_info *, void __user *);
d337f35e 10931+
4bf69007
AM
10932+extern int vc_get_ncaps(struct nx_info *, void __user *);
10933+extern int vc_set_ncaps(struct nx_info *, void __user *);
d337f35e 10934+
4bf69007 10935+#endif /* _VSERVER_CONTEXT_CMD_H */
8931d859
AM
10936diff -NurpP --minimal linux-4.4.161/include/linux/vserver/percpu.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/percpu.h
10937--- linux-4.4.161/include/linux/vserver/percpu.h 1970-01-01 00:00:00.000000000 +0000
10938+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/percpu.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
10939@@ -0,0 +1,14 @@
10940+#ifndef _VSERVER_PERCPU_H
10941+#define _VSERVER_PERCPU_H
d337f35e 10942+
4bf69007
AM
10943+#include "cvirt_def.h"
10944+#include "sched_def.h"
d337f35e 10945+
4bf69007
AM
10946+struct _vx_percpu {
10947+ struct _vx_cvirt_pc cvirt;
10948+ struct _vx_sched_pc sched;
10949+};
9795bf04 10950+
4bf69007 10951+#define PERCPU_PERCTX (sizeof(struct _vx_percpu))
d337f35e 10952+
4bf69007 10953+#endif /* _VSERVER_PERCPU_H */
8931d859
AM
10954diff -NurpP --minimal linux-4.4.161/include/linux/vserver/pid.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/pid.h
10955--- linux-4.4.161/include/linux/vserver/pid.h 1970-01-01 00:00:00.000000000 +0000
10956+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/pid.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
10957@@ -0,0 +1,51 @@
10958+#ifndef _VSERVER_PID_H
10959+#define _VSERVER_PID_H
d337f35e 10960+
4bf69007 10961+/* pid faking stuff */
d337f35e 10962+
4bf69007
AM
10963+#define vx_info_map_pid(v, p) \
10964+ __vx_info_map_pid((v), (p), __func__, __FILE__, __LINE__)
10965+#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p)
10966+#define vx_map_pid(p) vx_info_map_pid(current_vx_info(), p)
10967+#define vx_map_tgid(p) vx_map_pid(p)
d337f35e 10968+
4bf69007
AM
10969+static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
10970+ const char *func, const char *file, int line)
10971+{
10972+ if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
10973+ vxfprintk(VXD_CBIT(cvirt, 2),
10974+ "vx_map_tgid: %p/%llx: %d -> %d",
10975+ vxi, (long long)vxi->vx_flags, pid,
10976+ (pid && pid == vxi->vx_initpid) ? 1 : pid,
10977+ func, file, line);
10978+ if (pid == 0)
10979+ return 0;
10980+ if (pid == vxi->vx_initpid)
10981+ return 1;
10982+ }
10983+ return pid;
10984+}
d337f35e 10985+
4bf69007
AM
10986+#define vx_info_rmap_pid(v, p) \
10987+ __vx_info_rmap_pid((v), (p), __func__, __FILE__, __LINE__)
10988+#define vx_rmap_pid(p) vx_info_rmap_pid(current_vx_info(), p)
10989+#define vx_rmap_tgid(p) vx_rmap_pid(p)
d337f35e 10990+
4bf69007
AM
10991+static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
10992+ const char *func, const char *file, int line)
10993+{
10994+ if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
10995+ vxfprintk(VXD_CBIT(cvirt, 2),
10996+ "vx_rmap_tgid: %p/%llx: %d -> %d",
10997+ vxi, (long long)vxi->vx_flags, pid,
10998+ (pid == 1) ? vxi->vx_initpid : pid,
10999+ func, file, line);
11000+ if ((pid == 1) && vxi->vx_initpid)
11001+ return vxi->vx_initpid;
11002+ if (pid == vxi->vx_initpid)
11003+ return ~0U;
11004+ }
11005+ return pid;
11006+}
d337f35e 11007+
4bf69007 11008+#endif
8931d859
AM
11009diff -NurpP --minimal linux-4.4.161/include/linux/vserver/sched.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/sched.h
11010--- linux-4.4.161/include/linux/vserver/sched.h 1970-01-01 00:00:00.000000000 +0000
11011+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/sched.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11012@@ -0,0 +1,23 @@
11013+#ifndef _VSERVER_SCHED_H
11014+#define _VSERVER_SCHED_H
d337f35e 11015+
d337f35e 11016+
d33d7b00 11017+#ifdef __KERNEL__
d337f35e 11018+
4bf69007 11019+struct timespec;
d337f35e 11020+
4bf69007 11021+void vx_vsi_uptime(struct timespec *, struct timespec *);
d337f35e
JR
11022+
11023+
4bf69007 11024+struct vx_info;
d337f35e 11025+
4bf69007 11026+void vx_update_load(struct vx_info *);
d337f35e 11027+
d337f35e 11028+
4bf69007
AM
11029+void vx_update_sched_param(struct _vx_sched *sched,
11030+ struct _vx_sched_pc *sched_pc);
d337f35e 11031+
4bf69007
AM
11032+#endif /* __KERNEL__ */
11033+#else /* _VSERVER_SCHED_H */
11034+#warning duplicate inclusion
11035+#endif /* _VSERVER_SCHED_H */
8931d859
AM
11036diff -NurpP --minimal linux-4.4.161/include/linux/vserver/sched_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/sched_cmd.h
11037--- linux-4.4.161/include/linux/vserver/sched_cmd.h 1970-01-01 00:00:00.000000000 +0000
11038+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/sched_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11039@@ -0,0 +1,11 @@
11040+#ifndef _VSERVER_SCHED_CMD_H
11041+#define _VSERVER_SCHED_CMD_H
2380c486 11042+
2380c486 11043+
4bf69007
AM
11044+#include <linux/compiler.h>
11045+#include <uapi/vserver/sched_cmd.h>
d337f35e 11046+
4bf69007
AM
11047+extern int vc_set_prio_bias(struct vx_info *, void __user *);
11048+extern int vc_get_prio_bias(struct vx_info *, void __user *);
d337f35e 11049+
4bf69007 11050+#endif /* _VSERVER_SCHED_CMD_H */
8931d859
AM
11051diff -NurpP --minimal linux-4.4.161/include/linux/vserver/sched_def.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/sched_def.h
11052--- linux-4.4.161/include/linux/vserver/sched_def.h 1970-01-01 00:00:00.000000000 +0000
11053+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/sched_def.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11054@@ -0,0 +1,38 @@
11055+#ifndef _VSERVER_SCHED_DEF_H
11056+#define _VSERVER_SCHED_DEF_H
d33d7b00 11057+
4bf69007
AM
11058+#include <linux/spinlock.h>
11059+#include <linux/jiffies.h>
11060+#include <linux/cpumask.h>
11061+#include <asm/atomic.h>
11062+#include <asm/param.h>
d33d7b00 11063+
d337f35e 11064+
4bf69007 11065+/* context sub struct */
d337f35e 11066+
4bf69007
AM
11067+struct _vx_sched {
11068+ int prio_bias; /* bias offset for priority */
d337f35e 11069+
4bf69007
AM
11070+ cpumask_t update; /* CPUs which should update */
11071+};
d337f35e 11072+
4bf69007
AM
11073+struct _vx_sched_pc {
11074+ int prio_bias; /* bias offset for priority */
d337f35e 11075+
4bf69007
AM
11076+ uint64_t user_ticks; /* token tick events */
11077+ uint64_t sys_ticks; /* token tick events */
11078+ uint64_t hold_ticks; /* token ticks paused */
11079+};
d337f35e 11080+
d337f35e 11081+
4bf69007 11082+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 11083+
4bf69007
AM
11084+static inline void __dump_vx_sched(struct _vx_sched *sched)
11085+{
11086+ printk("\t_vx_sched:\n");
11087+ printk("\t priority = %4d\n", sched->prio_bias);
11088+}
d337f35e 11089+
4bf69007
AM
11090+#endif
11091+
11092+#endif /* _VSERVER_SCHED_DEF_H */
8931d859
AM
11093diff -NurpP --minimal linux-4.4.161/include/linux/vserver/signal.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/signal.h
11094--- linux-4.4.161/include/linux/vserver/signal.h 1970-01-01 00:00:00.000000000 +0000
11095+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/signal.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11096@@ -0,0 +1,14 @@
11097+#ifndef _VSERVER_SIGNAL_H
11098+#define _VSERVER_SIGNAL_H
d337f35e 11099+
d337f35e 11100+
d33d7b00 11101+#ifdef __KERNEL__
4bf69007
AM
11102+
11103+struct vx_info;
11104+
11105+int vx_info_kill(struct vx_info *, int, int);
d337f35e 11106+
d33d7b00 11107+#endif /* __KERNEL__ */
4bf69007
AM
11108+#else /* _VSERVER_SIGNAL_H */
11109+#warning duplicate inclusion
11110+#endif /* _VSERVER_SIGNAL_H */
8931d859
AM
11111diff -NurpP --minimal linux-4.4.161/include/linux/vserver/signal_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/signal_cmd.h
11112--- linux-4.4.161/include/linux/vserver/signal_cmd.h 1970-01-01 00:00:00.000000000 +0000
11113+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/signal_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11114@@ -0,0 +1,14 @@
11115+#ifndef _VSERVER_SIGNAL_CMD_H
11116+#define _VSERVER_SIGNAL_CMD_H
d337f35e 11117+
4bf69007 11118+#include <uapi/vserver/signal_cmd.h>
d337f35e 11119+
d337f35e 11120+
4bf69007
AM
11121+extern int vc_ctx_kill(struct vx_info *, void __user *);
11122+extern int vc_wait_exit(struct vx_info *, void __user *);
d337f35e
JR
11123+
11124+
4bf69007
AM
11125+extern int vc_get_pflags(uint32_t pid, void __user *);
11126+extern int vc_set_pflags(uint32_t pid, void __user *);
adc1caaa 11127+
4bf69007 11128+#endif /* _VSERVER_SIGNAL_CMD_H */
8931d859
AM
11129diff -NurpP --minimal linux-4.4.161/include/linux/vserver/space.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/space.h
11130--- linux-4.4.161/include/linux/vserver/space.h 1970-01-01 00:00:00.000000000 +0000
11131+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/space.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11132@@ -0,0 +1,12 @@
11133+#ifndef _VSERVER_SPACE_H
11134+#define _VSERVER_SPACE_H
d337f35e 11135+
4bf69007 11136+#include <linux/types.h>
d337f35e 11137+
4bf69007 11138+struct vx_info;
d337f35e 11139+
4bf69007 11140+int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index);
9f7054f1 11141+
4bf69007
AM
11142+#else /* _VSERVER_SPACE_H */
11143+#warning duplicate inclusion
11144+#endif /* _VSERVER_SPACE_H */
8931d859
AM
11145diff -NurpP --minimal linux-4.4.161/include/linux/vserver/space_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/space_cmd.h
11146--- linux-4.4.161/include/linux/vserver/space_cmd.h 1970-01-01 00:00:00.000000000 +0000
11147+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/space_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11148@@ -0,0 +1,13 @@
11149+#ifndef _VSERVER_SPACE_CMD_H
11150+#define _VSERVER_SPACE_CMD_H
9f7054f1 11151+
4bf69007 11152+#include <uapi/vserver/space_cmd.h>
d337f35e 11153+
d337f35e 11154+
4bf69007
AM
11155+extern int vc_enter_space_v1(struct vx_info *, void __user *);
11156+extern int vc_set_space_v1(struct vx_info *, void __user *);
11157+extern int vc_enter_space(struct vx_info *, void __user *);
11158+extern int vc_set_space(struct vx_info *, void __user *);
11159+extern int vc_get_space_mask(void __user *, int);
d337f35e 11160+
4bf69007 11161+#endif /* _VSERVER_SPACE_CMD_H */
8931d859
AM
11162diff -NurpP --minimal linux-4.4.161/include/linux/vserver/switch.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/switch.h
11163--- linux-4.4.161/include/linux/vserver/switch.h 1970-01-01 00:00:00.000000000 +0000
11164+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/switch.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11165@@ -0,0 +1,8 @@
11166+#ifndef _VSERVER_SWITCH_H
11167+#define _VSERVER_SWITCH_H
d337f35e 11168+
d337f35e 11169+
4bf69007
AM
11170+#include <linux/errno.h>
11171+#include <uapi/vserver/switch.h>
2380c486 11172+
4bf69007 11173+#endif /* _VSERVER_SWITCH_H */
8931d859
AM
11174diff -NurpP --minimal linux-4.4.161/include/linux/vserver/tag.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/tag.h
11175--- linux-4.4.161/include/linux/vserver/tag.h 1970-01-01 00:00:00.000000000 +0000
11176+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/tag.h 2018-10-20 04:57:21.000000000 +0000
a4a22af8 11177@@ -0,0 +1,160 @@
4bf69007
AM
11178+#ifndef _DX_TAG_H
11179+#define _DX_TAG_H
d337f35e 11180+
4bf69007 11181+#include <linux/types.h>
a4a22af8 11182+#include <linux/uidgid.h>
d337f35e 11183+
d337f35e 11184+
4bf69007 11185+#define DX_TAG(in) (IS_TAGGED(in))
9f7054f1 11186+
d337f35e 11187+
4bf69007
AM
11188+#ifdef CONFIG_TAG_NFSD
11189+#define DX_TAG_NFSD 1
11190+#else
11191+#define DX_TAG_NFSD 0
11192+#endif
2380c486 11193+
2380c486 11194+
4bf69007 11195+#ifdef CONFIG_TAGGING_NONE
d337f35e 11196+
4bf69007
AM
11197+#define MAX_UID 0xFFFFFFFF
11198+#define MAX_GID 0xFFFFFFFF
d337f35e 11199+
4bf69007 11200+#define INOTAG_TAG(cond, uid, gid, tag) (0)
d337f35e 11201+
4bf69007
AM
11202+#define TAGINO_UID(cond, uid, tag) (uid)
11203+#define TAGINO_GID(cond, gid, tag) (gid)
d337f35e 11204+
4bf69007 11205+#endif
d337f35e 11206+
d337f35e 11207+
4bf69007 11208+#ifdef CONFIG_TAGGING_GID16
d337f35e 11209+
4bf69007
AM
11210+#define MAX_UID 0xFFFFFFFF
11211+#define MAX_GID 0x0000FFFF
d337f35e 11212+
4bf69007
AM
11213+#define INOTAG_TAG(cond, uid, gid, tag) \
11214+ ((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
d337f35e 11215+
4bf69007
AM
11216+#define TAGINO_UID(cond, uid, tag) (uid)
11217+#define TAGINO_GID(cond, gid, tag) \
11218+ ((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
d337f35e 11219+
4bf69007 11220+#endif
d337f35e 11221+
d337f35e 11222+
4bf69007 11223+#ifdef CONFIG_TAGGING_ID24
d337f35e 11224+
4bf69007
AM
11225+#define MAX_UID 0x00FFFFFF
11226+#define MAX_GID 0x00FFFFFF
d337f35e 11227+
4bf69007
AM
11228+#define INOTAG_TAG(cond, uid, gid, tag) \
11229+ ((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
d337f35e 11230+
4bf69007
AM
11231+#define TAGINO_UID(cond, uid, tag) \
11232+ ((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
11233+#define TAGINO_GID(cond, gid, tag) \
11234+ ((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
d337f35e 11235+
4bf69007 11236+#endif
d337f35e 11237+
d337f35e 11238+
4bf69007 11239+#ifdef CONFIG_TAGGING_UID16
d337f35e 11240+
4bf69007
AM
11241+#define MAX_UID 0x0000FFFF
11242+#define MAX_GID 0xFFFFFFFF
3bac966d 11243+
4bf69007
AM
11244+#define INOTAG_TAG(cond, uid, gid, tag) \
11245+ ((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
d337f35e 11246+
4bf69007
AM
11247+#define TAGINO_UID(cond, uid, tag) \
11248+ ((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
11249+#define TAGINO_GID(cond, gid, tag) (gid)
d337f35e 11250+
d33d7b00 11251+#endif
d337f35e
JR
11252+
11253+
4bf69007 11254+#ifdef CONFIG_TAGGING_INTERN
d337f35e 11255+
4bf69007
AM
11256+#define MAX_UID 0xFFFFFFFF
11257+#define MAX_GID 0xFFFFFFFF
d337f35e 11258+
4bf69007
AM
11259+#define INOTAG_TAG(cond, uid, gid, tag) \
11260+ ((cond) ? (tag) : 0)
d337f35e 11261+
4bf69007
AM
11262+#define TAGINO_UID(cond, uid, tag) (uid)
11263+#define TAGINO_GID(cond, gid, tag) (gid)
d337f35e 11264+
4bf69007 11265+#endif
d337f35e 11266+
d337f35e 11267+
4bf69007
AM
11268+#ifndef CONFIG_TAGGING_NONE
11269+#define dx_current_fstag(sb) \
11270+ ((sb)->s_flags & MS_TAGGED ? dx_current_tag() : 0)
11271+#else
11272+#define dx_current_fstag(sb) (0)
11273+#endif
d337f35e 11274+
4bf69007
AM
11275+#ifndef CONFIG_TAGGING_INTERN
11276+#define TAGINO_TAG(cond, tag) (0)
11277+#else
11278+#define TAGINO_TAG(cond, tag) ((cond) ? (tag) : 0)
11279+#endif
d337f35e 11280+
a4a22af8
AM
11281+#define TAGINO_KUID(cond, kuid, ktag) \
11282+ KUIDT_INIT(TAGINO_UID(cond, __kuid_val(kuid), __ktag_val(ktag)))
11283+#define TAGINO_KGID(cond, kgid, ktag) \
11284+ KGIDT_INIT(TAGINO_GID(cond, __kgid_val(kgid), __ktag_val(ktag)))
11285+#define TAGINO_KTAG(cond, ktag) \
11286+ KTAGT_INIT(TAGINO_TAG(cond, __ktag_val(ktag)))
11287+
11288+
4bf69007
AM
11289+#define INOTAG_UID(cond, uid, gid) \
11290+ ((cond) ? ((uid) & MAX_UID) : (uid))
11291+#define INOTAG_GID(cond, uid, gid) \
11292+ ((cond) ? ((gid) & MAX_GID) : (gid))
d337f35e 11293+
a4a22af8
AM
11294+#define INOTAG_KUID(cond, kuid, kgid) \
11295+ KUIDT_INIT(INOTAG_UID(cond, __kuid_val(kuid), __kgid_val(kgid)))
11296+#define INOTAG_KGID(cond, kuid, kgid) \
11297+ KGIDT_INIT(INOTAG_GID(cond, __kuid_val(kuid), __kgid_val(kgid)))
11298+#define INOTAG_KTAG(cond, kuid, kgid, ktag) \
11299+ KTAGT_INIT(INOTAG_TAG(cond, \
11300+ __kuid_val(kuid), __kgid_val(kgid), __ktag_val(ktag)))
11301+
d337f35e 11302+
4bf69007 11303+static inline uid_t dx_map_uid(uid_t uid)
3bac966d 11304+{
4bf69007
AM
11305+ if ((uid > MAX_UID) && (uid != -1))
11306+ uid = -2;
11307+ return (uid & MAX_UID);
d33d7b00 11308+}
d337f35e 11309+
4bf69007
AM
11310+static inline gid_t dx_map_gid(gid_t gid)
11311+{
11312+ if ((gid > MAX_GID) && (gid != -1))
11313+ gid = -2;
11314+ return (gid & MAX_GID);
11315+}
d337f35e 11316+
4bf69007
AM
11317+struct peer_tag {
11318+ int32_t xid;
11319+ int32_t nid;
d33d7b00 11320+};
d337f35e 11321+
4bf69007 11322+#define dx_notagcheck(sb) ((sb) && ((sb)->s_flags & MS_NOTAGCHECK))
2380c486 11323+
61333608 11324+int dx_parse_tag(char *string, vtag_t *tag, int remove, int *mnt_flags,
4bf69007 11325+ unsigned long *flags);
d337f35e 11326+
4bf69007 11327+#ifdef CONFIG_PROPAGATE
d337f35e 11328+
4bf69007 11329+void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
d337f35e 11330+
4bf69007 11331+#define dx_propagate_tag(n, i) __dx_propagate_tag(n, i)
d337f35e 11332+
4bf69007
AM
11333+#else
11334+#define dx_propagate_tag(n, i) do { } while (0)
11335+#endif
d337f35e 11336+
4bf69007 11337+#endif /* _DX_TAG_H */
8931d859
AM
11338diff -NurpP --minimal linux-4.4.161/include/linux/vserver/tag_cmd.h linux-4.4.161-vs2.3.9.8/include/linux/vserver/tag_cmd.h
11339--- linux-4.4.161/include/linux/vserver/tag_cmd.h 1970-01-01 00:00:00.000000000 +0000
11340+++ linux-4.4.161-vs2.3.9.8/include/linux/vserver/tag_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11341@@ -0,0 +1,10 @@
11342+#ifndef _VSERVER_TAG_CMD_H
11343+#define _VSERVER_TAG_CMD_H
d337f35e 11344+
4bf69007 11345+#include <uapi/vserver/tag_cmd.h>
d337f35e 11346+
4bf69007 11347+extern int vc_task_tag(uint32_t);
3bac966d 11348+
4bf69007 11349+extern int vc_tag_migrate(uint32_t);
3bac966d 11350+
4bf69007 11351+#endif /* _VSERVER_TAG_CMD_H */
8931d859
AM
11352diff -NurpP --minimal linux-4.4.161/include/net/addrconf.h linux-4.4.161-vs2.3.9.8/include/net/addrconf.h
11353--- linux-4.4.161/include/net/addrconf.h 2018-10-20 02:34:30.000000000 +0000
11354+++ linux-4.4.161-vs2.3.9.8/include/net/addrconf.h 2018-10-20 04:57:21.000000000 +0000
927ca606 11355@@ -84,7 +84,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(str
c2e5f7c8
JR
11356
11357 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
11358 const struct in6_addr *daddr, unsigned int srcprefs,
11359- struct in6_addr *saddr);
11360+ struct in6_addr *saddr, struct nx_info *nxi);
11361 int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
bb20add7 11362 u32 banned_flags);
c2e5f7c8 11363 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
8931d859
AM
11364diff -NurpP --minimal linux-4.4.161/include/net/af_unix.h linux-4.4.161-vs2.3.9.8/include/net/af_unix.h
11365--- linux-4.4.161/include/net/af_unix.h 2018-10-20 02:34:30.000000000 +0000
11366+++ linux-4.4.161-vs2.3.9.8/include/net/af_unix.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11367@@ -4,6 +4,7 @@
11368 #include <linux/socket.h>
11369 #include <linux/un.h>
11370 #include <linux/mutex.h>
927ca606 11371+// #include <linux/vs_base.h>
4bf69007
AM
11372 #include <net/sock.h>
11373
927ca606 11374 void unix_inflight(struct user_struct *user, struct file *fp);
8931d859
AM
11375diff -NurpP --minimal linux-4.4.161/include/net/inet_timewait_sock.h linux-4.4.161-vs2.3.9.8/include/net/inet_timewait_sock.h
11376--- linux-4.4.161/include/net/inet_timewait_sock.h 2018-10-20 02:34:30.000000000 +0000
11377+++ linux-4.4.161-vs2.3.9.8/include/net/inet_timewait_sock.h 2018-10-20 04:57:21.000000000 +0000
11378@@ -72,6 +72,10 @@ struct inet_timewait_sock {
b00e13aa 11379 #define tw_num __tw_common.skc_num
927ca606
AM
11380 #define tw_cookie __tw_common.skc_cookie
11381 #define tw_dr __tw_common.skc_tw_dr
4bf69007
AM
11382+#define tw_xid __tw_common.skc_xid
11383+#define tw_vx_info __tw_common.skc_vx_info
11384+#define tw_nid __tw_common.skc_nid
11385+#define tw_nx_info __tw_common.skc_nx_info
b00e13aa 11386
4bf69007
AM
11387 int tw_timeout;
11388 volatile unsigned char tw_substate;
8931d859
AM
11389diff -NurpP --minimal linux-4.4.161/include/net/ip6_route.h linux-4.4.161-vs2.3.9.8/include/net/ip6_route.h
11390--- linux-4.4.161/include/net/ip6_route.h 2018-10-20 02:34:30.000000000 +0000
11391+++ linux-4.4.161-vs2.3.9.8/include/net/ip6_route.h 2018-10-20 04:57:21.000000000 +0000
927ca606 11392@@ -90,7 +90,7 @@ int ip6_del_rt(struct rt6_info *);
c2e5f7c8
JR
11393
11394 int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
11395 const struct in6_addr *daddr, unsigned int prefs,
11396- struct in6_addr *saddr);
11397+ struct in6_addr *saddr, struct nx_info *nxi);
11398
11399 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
11400 const struct in6_addr *saddr, int oif, int flags);
8931d859
AM
11401diff -NurpP --minimal linux-4.4.161/include/net/route.h linux-4.4.161-vs2.3.9.8/include/net/route.h
11402--- linux-4.4.161/include/net/route.h 2018-10-20 02:34:30.000000000 +0000
11403+++ linux-4.4.161-vs2.3.9.8/include/net/route.h 2018-10-20 04:57:21.000000000 +0000
11404@@ -224,6 +224,9 @@ static inline void ip_rt_put(struct rtab
b00e13aa 11405 dst_release(&rt->dst);
4bf69007
AM
11406 }
11407
11408+#include <linux/vs_base.h>
11409+#include <linux/vs_inet.h>
d337f35e 11410+
4bf69007
AM
11411 #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3)
11412
11413 extern const __u8 ip_tos2prio[16];
8931d859 11414@@ -271,6 +274,9 @@ static inline void ip_route_connect_init
4bf69007
AM
11415 protocol, flow_flags, dst, src, dport, sport);
11416 }
11417
11418+extern struct rtable *ip_v4_find_src(struct net *net, struct nx_info *,
11419+ struct flowi4 *);
d337f35e 11420+
4bf69007
AM
11421 static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
11422 __be32 dst, __be32 src, u32 tos,
11423 int oif, u8 protocol,
8931d859 11424@@ -279,6 +285,7 @@ static inline struct rtable *ip_route_co
4bf69007
AM
11425 {
11426 struct net *net = sock_net(sk);
11427 struct rtable *rt;
11428+ struct nx_info *nx_info = current_nx_info();
11429
11430 ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
f15949f2 11431 sport, dport, sk);
8931d859 11432@@ -292,7 +299,21 @@ static inline struct rtable *ip_route_co
4bf69007 11433
927ca606
AM
11434 src = fl4->saddr;
11435 }
4bf69007 11436- if (!dst || !src) {
927ca606 11437+
4bf69007
AM
11438+ if (sk)
11439+ nx_info = sk->sk_nx_info;
d337f35e 11440+
4bf69007
AM
11441+ vxdprintk(VXD_CBIT(net, 4),
11442+ "ip_route_connect(%p) %p,%p;%lx",
11443+ sk, nx_info, sk->sk_socket,
11444+ (sk->sk_socket?sk->sk_socket->flags:0));
d337f35e 11445+
4bf69007
AM
11446+ rt = ip_v4_find_src(net, nx_info, fl4);
11447+ if (IS_ERR(rt))
11448+ return rt;
11449+ ip_rt_put(rt);
d337f35e 11450+
4bf69007
AM
11451+ if (!fl4->daddr || !fl4->saddr) {
11452 rt = __ip_route_output_key(net, fl4);
11453 if (IS_ERR(rt))
11454 return rt;
8931d859
AM
11455diff -NurpP --minimal linux-4.4.161/include/net/sock.h linux-4.4.161-vs2.3.9.8/include/net/sock.h
11456--- linux-4.4.161/include/net/sock.h 2018-10-20 02:34:30.000000000 +0000
11457+++ linux-4.4.161-vs2.3.9.8/include/net/sock.h 2018-10-20 05:50:20.000000000 +0000
927ca606
AM
11458@@ -201,6 +201,10 @@ struct sock_common {
11459 struct in6_addr skc_v6_daddr;
11460 struct in6_addr skc_v6_rcv_saddr;
4bf69007 11461 #endif
61333608 11462+ vxid_t skc_xid;
4bf69007 11463+ struct vx_info *skc_vx_info;
61333608 11464+ vnid_t skc_nid;
4bf69007 11465+ struct nx_info *skc_nx_info;
c2e5f7c8 11466
927ca606
AM
11467 atomic64_t skc_cookie;
11468
11469@@ -349,8 +353,12 @@ struct sock {
4bf69007
AM
11470 #define sk_prot __sk_common.skc_prot
11471 #define sk_net __sk_common.skc_net
c2e5f7c8
JR
11472 #define sk_v6_daddr __sk_common.skc_v6_daddr
11473-#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
11474+#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
927ca606 11475 #define sk_cookie __sk_common.skc_cookie
4bf69007
AM
11476+#define sk_xid __sk_common.skc_xid
11477+#define sk_vx_info __sk_common.skc_vx_info
11478+#define sk_nid __sk_common.skc_nid
11479+#define sk_nx_info __sk_common.skc_nx_info
927ca606
AM
11480 #define sk_incoming_cpu __sk_common.skc_incoming_cpu
11481 #define sk_flags __sk_common.skc_flags
11482 #define sk_rxhash __sk_common.skc_rxhash
8931d859
AM
11483diff -NurpP --minimal linux-4.4.161/include/uapi/Kbuild linux-4.4.161-vs2.3.9.8/include/uapi/Kbuild
11484--- linux-4.4.161/include/uapi/Kbuild 2016-01-10 23:01:32.000000000 +0000
11485+++ linux-4.4.161-vs2.3.9.8/include/uapi/Kbuild 2018-10-20 04:57:21.000000000 +0000
bb20add7 11486@@ -13,3 +13,4 @@ header-y += drm/
4bf69007
AM
11487 header-y += xen/
11488 header-y += scsi/
bb20add7 11489 header-y += misc/
4bf69007 11490+header-y += vserver/
8931d859
AM
11491diff -NurpP --minimal linux-4.4.161/include/uapi/linux/capability.h linux-4.4.161-vs2.3.9.8/include/uapi/linux/capability.h
11492--- linux-4.4.161/include/uapi/linux/capability.h 2016-01-10 23:01:32.000000000 +0000
11493+++ linux-4.4.161-vs2.3.9.8/include/uapi/linux/capability.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11494@@ -259,6 +259,7 @@ struct vfs_cap_data {
11495 arbitrary SCSI commands */
11496 /* Allow setting encryption key on loopback filesystem */
11497 /* Allow setting zone reclaim policy */
11498+/* Allow the selection of a security context */
11499
11500 #define CAP_SYS_ADMIN 21
11501
bb20add7 11502@@ -354,7 +355,12 @@ struct vfs_cap_data {
4bf69007 11503
bb20add7 11504 #define CAP_LAST_CAP CAP_AUDIT_READ
4bf69007
AM
11505
11506-#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
11507+/* Allow context manipulations */
11508+/* Allow changing context info on files */
d337f35e 11509+
4bf69007 11510+#define CAP_CONTEXT 63
d337f35e 11511+
4bf69007
AM
11512+#define cap_valid(x) ((x) >= 0 && ((x) <= CAP_LAST_CAP || (x) == CAP_CONTEXT))
11513
11514 /*
11515 * Bit location of each capability (used by user-space library and kernel)
8931d859
AM
11516diff -NurpP --minimal linux-4.4.161/include/uapi/linux/fs.h linux-4.4.161-vs2.3.9.8/include/uapi/linux/fs.h
11517--- linux-4.4.161/include/uapi/linux/fs.h 2016-01-10 23:01:32.000000000 +0000
11518+++ linux-4.4.161-vs2.3.9.8/include/uapi/linux/fs.h 2018-10-20 04:57:21.000000000 +0000
927ca606 11519@@ -91,6 +91,9 @@ struct inodes_stat_t {
4bf69007
AM
11520 #define MS_I_VERSION (1<<23) /* Update inode I_version field */
11521 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
927ca606 11522 #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
b00e13aa
AM
11523+#define MS_TAGGED (1<<8) /* use generic inode tagging */
11524+#define MS_NOTAGCHECK (1<<9) /* don't check tags */
927ca606 11525+#define MS_TAGID (1<<26) /* use specific tag for this mount */
b00e13aa
AM
11526
11527 /* These sb flags are internal to the kernel */
09be7631 11528 #define MS_NOSEC (1<<28)
927ca606 11529@@ -197,12 +200,15 @@ struct inodes_stat_t {
4bf69007
AM
11530 #define FS_EXTENT_FL 0x00080000 /* Extents */
11531 #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */
11532 #define FS_NOCOW_FL 0x00800000 /* Do not cow file */
11533+#define FS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
927ca606 11534 #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
4bf69007
AM
11535 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
11536
11537-#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
11538-#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
11539+#define FS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
11540+#define FS_COW_FL 0x20000000 /* Copy on Write marker */
8931d859 11541
4bf69007
AM
11542+#define FS_FL_USER_VISIBLE 0x0103DFFF /* User visible flags */
11543+#define FS_FL_USER_MODIFIABLE 0x010380FF /* User modifiable flags */
11544
11545 #define SYNC_FILE_RANGE_WAIT_BEFORE 1
11546 #define SYNC_FILE_RANGE_WRITE 2
8931d859
AM
11547diff -NurpP --minimal linux-4.4.161/include/uapi/linux/gfs2_ondisk.h linux-4.4.161-vs2.3.9.8/include/uapi/linux/gfs2_ondisk.h
11548--- linux-4.4.161/include/uapi/linux/gfs2_ondisk.h 2016-01-10 23:01:32.000000000 +0000
11549+++ linux-4.4.161-vs2.3.9.8/include/uapi/linux/gfs2_ondisk.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11550@@ -225,6 +225,9 @@ enum {
11551 gfs2fl_Sync = 8,
11552 gfs2fl_System = 9,
11553 gfs2fl_TopLevel = 10,
11554+ gfs2fl_IXUnlink = 16,
11555+ gfs2fl_Barrier = 17,
11556+ gfs2fl_Cow = 18,
11557 gfs2fl_TruncInProg = 29,
11558 gfs2fl_InheritDirectio = 30,
11559 gfs2fl_InheritJdata = 31,
11560@@ -242,6 +245,9 @@ enum {
11561 #define GFS2_DIF_SYNC 0x00000100
11562 #define GFS2_DIF_SYSTEM 0x00000200 /* New in gfs2 */
11563 #define GFS2_DIF_TOPDIR 0x00000400 /* New in gfs2 */
11564+#define GFS2_DIF_IXUNLINK 0x00010000
11565+#define GFS2_DIF_BARRIER 0x00020000
11566+#define GFS2_DIF_COW 0x00040000
11567 #define GFS2_DIF_TRUNC_IN_PROG 0x20000000 /* New in gfs2 */
11568 #define GFS2_DIF_INHERIT_DIRECTIO 0x40000000 /* only in gfs1 */
11569 #define GFS2_DIF_INHERIT_JDATA 0x80000000
8931d859
AM
11570diff -NurpP --minimal linux-4.4.161/include/uapi/linux/if_tun.h linux-4.4.161-vs2.3.9.8/include/uapi/linux/if_tun.h
11571--- linux-4.4.161/include/uapi/linux/if_tun.h 2016-01-10 23:01:32.000000000 +0000
11572+++ linux-4.4.161-vs2.3.9.8/include/uapi/linux/if_tun.h 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
11573@@ -56,6 +56,7 @@
11574 */
11575 #define TUNSETVNETBE _IOW('T', 222, int)
11576 #define TUNGETVNETBE _IOR('T', 223, int)
11577+#define TUNSETNID _IOW('T', 224, int)
4bf69007
AM
11578
11579 /* TUNSETIFF ifr flags */
11580 #define IFF_TUN 0x0001
8931d859
AM
11581diff -NurpP --minimal linux-4.4.161/include/uapi/linux/major.h linux-4.4.161-vs2.3.9.8/include/uapi/linux/major.h
11582--- linux-4.4.161/include/uapi/linux/major.h 2016-01-10 23:01:32.000000000 +0000
11583+++ linux-4.4.161-vs2.3.9.8/include/uapi/linux/major.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11584@@ -15,6 +15,7 @@
11585 #define HD_MAJOR IDE0_MAJOR
11586 #define PTY_SLAVE_MAJOR 3
11587 #define TTY_MAJOR 4
11588+#define VROOT_MAJOR 4
11589 #define TTYAUX_MAJOR 5
11590 #define LP_MAJOR 6
11591 #define VCS_MAJOR 7
8931d859
AM
11592diff -NurpP --minimal linux-4.4.161/include/uapi/linux/nfs_mount.h linux-4.4.161-vs2.3.9.8/include/uapi/linux/nfs_mount.h
11593--- linux-4.4.161/include/uapi/linux/nfs_mount.h 2016-01-10 23:01:32.000000000 +0000
11594+++ linux-4.4.161-vs2.3.9.8/include/uapi/linux/nfs_mount.h 2018-10-20 04:57:21.000000000 +0000
4bf69007 11595@@ -63,7 +63,8 @@ struct nfs_mount_data {
c2e5f7c8 11596 #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 non-text parsed mount data only */
4bf69007
AM
11597 #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */
11598 #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
11599-#define NFS_MOUNT_FLAGMASK 0xFFFF
11600+#define NFS_MOUNT_TAGGED 0x10000 /* context tagging */
11601+#define NFS_MOUNT_FLAGMASK 0x1FFFF
11602
11603 /* The following are for internal use only */
11604 #define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
8931d859
AM
11605diff -NurpP --minimal linux-4.4.161/include/uapi/linux/reboot.h linux-4.4.161-vs2.3.9.8/include/uapi/linux/reboot.h
11606--- linux-4.4.161/include/uapi/linux/reboot.h 2016-01-10 23:01:32.000000000 +0000
11607+++ linux-4.4.161-vs2.3.9.8/include/uapi/linux/reboot.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11608@@ -33,7 +33,7 @@
11609 #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
11610 #define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2
11611 #define LINUX_REBOOT_CMD_KEXEC 0x45584543
11612-
11613+#define LINUX_REBOOT_CMD_OOM 0xDEADBEEF
11614
11615
11616 #endif /* _UAPI_LINUX_REBOOT_H */
8931d859
AM
11617diff -NurpP --minimal linux-4.4.161/include/uapi/linux/sysctl.h linux-4.4.161-vs2.3.9.8/include/uapi/linux/sysctl.h
11618--- linux-4.4.161/include/uapi/linux/sysctl.h 2016-01-10 23:01:32.000000000 +0000
11619+++ linux-4.4.161-vs2.3.9.8/include/uapi/linux/sysctl.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11620@@ -60,6 +60,7 @@ enum
11621 CTL_ABI=9, /* Binary emulation */
11622 CTL_CPU=10, /* CPU stuff (speed scaling, etc) */
11623 CTL_ARLAN=254, /* arlan wireless driver */
11624+ CTL_VSERVER=4242, /* Linux-VServer debug */
11625 CTL_S390DBF=5677, /* s390 debug */
11626 CTL_SUNRPC=7249, /* sunrpc debug */
11627 CTL_PM=9899, /* frv power management */
11628@@ -94,6 +95,7 @@ enum
11629
11630 KERN_PANIC=15, /* int: panic timeout */
11631 KERN_REALROOTDEV=16, /* real root device to mount after initrd */
11632+ KERN_VSHELPER=17, /* string: path to vshelper policy agent */
11633
11634 KERN_SPARC_REBOOT=21, /* reboot command on Sparc */
11635 KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */
8931d859
AM
11636diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/Kbuild linux-4.4.161-vs2.3.9.8/include/uapi/vserver/Kbuild
11637--- linux-4.4.161/include/uapi/vserver/Kbuild 1970-01-01 00:00:00.000000000 +0000
11638+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/Kbuild 2018-10-20 04:57:21.000000000 +0000
4bf69007 11639@@ -0,0 +1,9 @@
d337f35e 11640+
4bf69007
AM
11641+header-y += context_cmd.h network_cmd.h space_cmd.h \
11642+ cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \
11643+ inode_cmd.h tag_cmd.h sched_cmd.h signal_cmd.h \
11644+ debug_cmd.h device_cmd.h
2380c486 11645+
4bf69007
AM
11646+header-y += switch.h context.h network.h monitor.h \
11647+ limit.h inode.h device.h
2380c486 11648+
8931d859
AM
11649diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/cacct_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/cacct_cmd.h
11650--- linux-4.4.161/include/uapi/vserver/cacct_cmd.h 1970-01-01 00:00:00.000000000 +0000
11651+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/cacct_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11652@@ -0,0 +1,15 @@
11653+#ifndef _UAPI_VS_CACCT_CMD_H
11654+#define _UAPI_VS_CACCT_CMD_H
d337f35e
JR
11655+
11656+
4bf69007 11657+/* virtual host info name commands */
d337f35e 11658+
4bf69007 11659+#define VCMD_sock_stat VC_CMD(VSTAT, 5, 0)
d337f35e 11660+
4bf69007
AM
11661+struct vcmd_sock_stat_v0 {
11662+ uint32_t field;
11663+ uint32_t count[3];
11664+ uint64_t total[3];
11665+};
d337f35e 11666+
4bf69007 11667+#endif /* _UAPI_VS_CACCT_CMD_H */
8931d859
AM
11668diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/context.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/context.h
11669--- linux-4.4.161/include/uapi/vserver/context.h 1970-01-01 00:00:00.000000000 +0000
11670+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/context.h 2018-10-20 04:57:21.000000000 +0000
b00e13aa 11671@@ -0,0 +1,81 @@
4bf69007
AM
11672+#ifndef _UAPI_VS_CONTEXT_H
11673+#define _UAPI_VS_CONTEXT_H
d337f35e 11674+
4bf69007
AM
11675+#include <linux/types.h>
11676+#include <linux/capability.h>
d337f35e
JR
11677+
11678+
4bf69007 11679+/* context flags */
d337f35e 11680+
4bf69007
AM
11681+#define VXF_INFO_SCHED 0x00000002
11682+#define VXF_INFO_NPROC 0x00000004
11683+#define VXF_INFO_PRIVATE 0x00000008
d337f35e 11684+
4bf69007
AM
11685+#define VXF_INFO_INIT 0x00000010
11686+#define VXF_INFO_HIDE 0x00000020
11687+#define VXF_INFO_ULIMIT 0x00000040
11688+#define VXF_INFO_NSPACE 0x00000080
d337f35e 11689+
4bf69007
AM
11690+#define VXF_SCHED_HARD 0x00000100
11691+#define VXF_SCHED_PRIO 0x00000200
11692+#define VXF_SCHED_PAUSE 0x00000400
2380c486 11693+
4bf69007
AM
11694+#define VXF_VIRT_MEM 0x00010000
11695+#define VXF_VIRT_UPTIME 0x00020000
11696+#define VXF_VIRT_CPU 0x00040000
11697+#define VXF_VIRT_LOAD 0x00080000
11698+#define VXF_VIRT_TIME 0x00100000
d337f35e 11699+
4bf69007
AM
11700+#define VXF_HIDE_MOUNT 0x01000000
11701+/* was VXF_HIDE_NETIF 0x02000000 */
11702+#define VXF_HIDE_VINFO 0x04000000
d337f35e 11703+
4bf69007
AM
11704+#define VXF_STATE_SETUP (1ULL << 32)
11705+#define VXF_STATE_INIT (1ULL << 33)
11706+#define VXF_STATE_ADMIN (1ULL << 34)
d337f35e 11707+
4bf69007
AM
11708+#define VXF_SC_HELPER (1ULL << 36)
11709+#define VXF_REBOOT_KILL (1ULL << 37)
11710+#define VXF_PERSISTENT (1ULL << 38)
d337f35e 11711+
4bf69007
AM
11712+#define VXF_FORK_RSS (1ULL << 48)
11713+#define VXF_PROLIFIC (1ULL << 49)
d337f35e 11714+
4bf69007 11715+#define VXF_IGNEG_NICE (1ULL << 52)
d337f35e 11716+
4bf69007 11717+#define VXF_ONE_TIME (0x0007ULL << 32)
d337f35e 11718+
4bf69007 11719+#define VXF_INIT_SET (VXF_STATE_SETUP | VXF_STATE_INIT | VXF_STATE_ADMIN)
d337f35e
JR
11720+
11721+
4bf69007 11722+/* context migration */
d337f35e 11723+
4bf69007
AM
11724+#define VXM_SET_INIT 0x00000001
11725+#define VXM_SET_REAPER 0x00000002
d337f35e 11726+
4bf69007 11727+/* context caps */
d337f35e 11728+
4bf69007
AM
11729+#define VXC_SET_UTSNAME 0x00000001
11730+#define VXC_SET_RLIMIT 0x00000002
11731+#define VXC_FS_SECURITY 0x00000004
11732+#define VXC_FS_TRUSTED 0x00000008
11733+#define VXC_TIOCSTI 0x00000010
2380c486 11734+
4bf69007
AM
11735+/* was VXC_RAW_ICMP 0x00000100 */
11736+#define VXC_SYSLOG 0x00001000
11737+#define VXC_OOM_ADJUST 0x00002000
11738+#define VXC_AUDIT_CONTROL 0x00004000
d337f35e 11739+
c2e5f7c8
JR
11740+#define VXC_SECURE_MOUNT 0x00010000
11741+/* #define VXC_SECURE_REMOUNT 0x00020000 */
4bf69007 11742+#define VXC_BINARY_MOUNT 0x00040000
b00e13aa 11743+#define VXC_DEV_MOUNT 0x00080000
d337f35e 11744+
4bf69007
AM
11745+#define VXC_QUOTA_CTL 0x00100000
11746+#define VXC_ADMIN_MAPPER 0x00200000
11747+#define VXC_ADMIN_CLOOP 0x00400000
d337f35e 11748+
4bf69007
AM
11749+#define VXC_KTHREAD 0x01000000
11750+#define VXC_NAMESPACE 0x02000000
d337f35e 11751+
4bf69007 11752+#endif /* _UAPI_VS_CONTEXT_H */
8931d859
AM
11753diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/context_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/context_cmd.h
11754--- linux-4.4.161/include/uapi/vserver/context_cmd.h 1970-01-01 00:00:00.000000000 +0000
11755+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/context_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11756@@ -0,0 +1,115 @@
11757+#ifndef _UAPI_VS_CONTEXT_CMD_H
11758+#define _UAPI_VS_CONTEXT_CMD_H
d33d7b00
AM
11759+
11760+
4bf69007 11761+/* vinfo commands */
3bac966d 11762+
4bf69007 11763+#define VCMD_task_xid VC_CMD(VINFO, 1, 0)
3bac966d 11764+
3bac966d 11765+
4bf69007 11766+#define VCMD_vx_info VC_CMD(VINFO, 5, 0)
3bac966d 11767+
4bf69007
AM
11768+struct vcmd_vx_info_v0 {
11769+ uint32_t xid;
11770+ uint32_t initpid;
11771+ /* more to come */
11772+};
3bac966d
AM
11773+
11774+
4bf69007 11775+#define VCMD_ctx_stat VC_CMD(VSTAT, 0, 0)
3bac966d 11776+
4bf69007
AM
11777+struct vcmd_ctx_stat_v0 {
11778+ uint32_t usecnt;
11779+ uint32_t tasks;
11780+ /* more to come */
11781+};
3bac966d 11782+
3bac966d 11783+
4bf69007 11784+/* context commands */
3bac966d 11785+
4bf69007
AM
11786+#define VCMD_ctx_create_v0 VC_CMD(VPROC, 1, 0)
11787+#define VCMD_ctx_create VC_CMD(VPROC, 1, 1)
3bac966d 11788+
4bf69007
AM
11789+struct vcmd_ctx_create {
11790+ uint64_t flagword;
11791+};
3bac966d 11792+
4bf69007
AM
11793+#define VCMD_ctx_migrate_v0 VC_CMD(PROCMIG, 1, 0)
11794+#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 1)
3bac966d 11795+
4bf69007
AM
11796+struct vcmd_ctx_migrate {
11797+ uint64_t flagword;
11798+};
3bac966d 11799+
d33d7b00 11800+
d33d7b00 11801+
4bf69007 11802+/* flag commands */
d33d7b00 11803+
4bf69007
AM
11804+#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0)
11805+#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0)
d33d7b00 11806+
4bf69007
AM
11807+struct vcmd_ctx_flags_v0 {
11808+ uint64_t flagword;
11809+ uint64_t mask;
11810+};
3bac966d
AM
11811+
11812+
3bac966d 11813+
4bf69007 11814+/* context caps commands */
3bac966d 11815+
4bf69007
AM
11816+#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 1)
11817+#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 1)
d33d7b00 11818+
4bf69007
AM
11819+struct vcmd_ctx_caps_v1 {
11820+ uint64_t ccaps;
11821+ uint64_t cmask;
11822+};
d33d7b00 11823+
d33d7b00
AM
11824+
11825+
4bf69007 11826+/* bcaps commands */
d33d7b00 11827+
4bf69007
AM
11828+#define VCMD_get_bcaps VC_CMD(FLAGS, 9, 0)
11829+#define VCMD_set_bcaps VC_CMD(FLAGS, 10, 0)
d33d7b00 11830+
4bf69007
AM
11831+struct vcmd_bcaps {
11832+ uint64_t bcaps;
11833+ uint64_t bmask;
11834+};
3bac966d 11835+
d33d7b00 11836+
d33d7b00 11837+
4bf69007 11838+/* umask commands */
d33d7b00 11839+
4bf69007
AM
11840+#define VCMD_get_umask VC_CMD(FLAGS, 13, 0)
11841+#define VCMD_set_umask VC_CMD(FLAGS, 14, 0)
3bac966d 11842+
4bf69007
AM
11843+struct vcmd_umask {
11844+ uint64_t umask;
11845+ uint64_t mask;
11846+};
d33d7b00 11847+
d33d7b00
AM
11848+
11849+
4bf69007 11850+/* wmask commands */
d33d7b00 11851+
4bf69007
AM
11852+#define VCMD_get_wmask VC_CMD(FLAGS, 15, 0)
11853+#define VCMD_set_wmask VC_CMD(FLAGS, 16, 0)
d33d7b00 11854+
4bf69007
AM
11855+struct vcmd_wmask {
11856+ uint64_t wmask;
11857+ uint64_t mask;
d33d7b00
AM
11858+};
11859+
d33d7b00 11860+
d33d7b00 11861+
4bf69007 11862+/* OOM badness */
d33d7b00 11863+
4bf69007
AM
11864+#define VCMD_get_badness VC_CMD(MEMCTRL, 5, 0)
11865+#define VCMD_set_badness VC_CMD(MEMCTRL, 6, 0)
d33d7b00 11866+
4bf69007
AM
11867+struct vcmd_badness_v0 {
11868+ int64_t bias;
11869+};
d33d7b00 11870+
4bf69007 11871+#endif /* _UAPI_VS_CONTEXT_CMD_H */
8931d859
AM
11872diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/cvirt_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/cvirt_cmd.h
11873--- linux-4.4.161/include/uapi/vserver/cvirt_cmd.h 1970-01-01 00:00:00.000000000 +0000
11874+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/cvirt_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11875@@ -0,0 +1,41 @@
11876+#ifndef _UAPI_VS_CVIRT_CMD_H
11877+#define _UAPI_VS_CVIRT_CMD_H
d33d7b00 11878+
d33d7b00 11879+
4bf69007 11880+/* virtual host info name commands */
d33d7b00 11881+
4bf69007
AM
11882+#define VCMD_set_vhi_name VC_CMD(VHOST, 1, 0)
11883+#define VCMD_get_vhi_name VC_CMD(VHOST, 2, 0)
d33d7b00 11884+
4bf69007
AM
11885+struct vcmd_vhi_name_v0 {
11886+ uint32_t field;
11887+ char name[65];
11888+};
d33d7b00 11889+
d33d7b00 11890+
4bf69007
AM
11891+enum vhi_name_field {
11892+ VHIN_CONTEXT = 0,
11893+ VHIN_SYSNAME,
11894+ VHIN_NODENAME,
11895+ VHIN_RELEASE,
11896+ VHIN_VERSION,
11897+ VHIN_MACHINE,
11898+ VHIN_DOMAINNAME,
11899+};
d33d7b00 11900+
d33d7b00 11901+
d33d7b00 11902+
4bf69007 11903+#define VCMD_virt_stat VC_CMD(VSTAT, 3, 0)
d33d7b00 11904+
4bf69007
AM
11905+struct vcmd_virt_stat_v0 {
11906+ uint64_t offset;
11907+ uint64_t uptime;
11908+ uint32_t nr_threads;
11909+ uint32_t nr_running;
11910+ uint32_t nr_uninterruptible;
11911+ uint32_t nr_onhold;
11912+ uint32_t nr_forks;
11913+ uint32_t load[3];
11914+};
2380c486 11915+
4bf69007 11916+#endif /* _UAPI_VS_CVIRT_CMD_H */
8931d859
AM
11917diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/debug_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/debug_cmd.h
11918--- linux-4.4.161/include/uapi/vserver/debug_cmd.h 1970-01-01 00:00:00.000000000 +0000
11919+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/debug_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11920@@ -0,0 +1,24 @@
11921+#ifndef _UAPI_VS_DEBUG_CMD_H
11922+#define _UAPI_VS_DEBUG_CMD_H
537831f9 11923+
537831f9 11924+
4bf69007 11925+/* debug commands */
537831f9 11926+
4bf69007 11927+#define VCMD_dump_history VC_CMD(DEBUG, 1, 0)
537831f9 11928+
4bf69007
AM
11929+#define VCMD_read_history VC_CMD(DEBUG, 5, 0)
11930+#define VCMD_read_monitor VC_CMD(DEBUG, 6, 0)
537831f9 11931+
4bf69007
AM
11932+struct vcmd_read_history_v0 {
11933+ uint32_t index;
11934+ uint32_t count;
11935+ char __user *data;
11936+};
537831f9 11937+
4bf69007
AM
11938+struct vcmd_read_monitor_v0 {
11939+ uint32_t index;
11940+ uint32_t count;
11941+ char __user *data;
11942+};
537831f9 11943+
4bf69007 11944+#endif /* _UAPI_VS_DEBUG_CMD_H */
8931d859
AM
11945diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/device.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/device.h
11946--- linux-4.4.161/include/uapi/vserver/device.h 1970-01-01 00:00:00.000000000 +0000
11947+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/device.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11948@@ -0,0 +1,12 @@
11949+#ifndef _UAPI_VS_DEVICE_H
11950+#define _UAPI_VS_DEVICE_H
d337f35e 11951+
d337f35e 11952+
4bf69007
AM
11953+#define DATTR_CREATE 0x00000001
11954+#define DATTR_OPEN 0x00000002
d337f35e 11955+
4bf69007 11956+#define DATTR_REMAP 0x00000010
d337f35e 11957+
4bf69007 11958+#define DATTR_MASK 0x00000013
ec22aa5c 11959+
4bf69007 11960+#endif /* _UAPI_VS_DEVICE_H */
8931d859
AM
11961diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/device_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/device_cmd.h
11962--- linux-4.4.161/include/uapi/vserver/device_cmd.h 1970-01-01 00:00:00.000000000 +0000
11963+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/device_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11964@@ -0,0 +1,16 @@
11965+#ifndef _UAPI_VS_DEVICE_CMD_H
11966+#define _UAPI_VS_DEVICE_CMD_H
2380c486 11967+
1163e6ab 11968+
4bf69007 11969+/* device vserver commands */
1163e6ab 11970+
4bf69007
AM
11971+#define VCMD_set_mapping VC_CMD(DEVICE, 1, 0)
11972+#define VCMD_unset_mapping VC_CMD(DEVICE, 2, 0)
e915af4e 11973+
4bf69007
AM
11974+struct vcmd_set_mapping_v0 {
11975+ const char __user *device;
11976+ const char __user *target;
11977+ uint32_t flags;
11978+};
e915af4e 11979+
4bf69007 11980+#endif /* _UAPI_VS_DEVICE_CMD_H */
8931d859
AM
11981diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/dlimit_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/dlimit_cmd.h
11982--- linux-4.4.161/include/uapi/vserver/dlimit_cmd.h 1970-01-01 00:00:00.000000000 +0000
11983+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/dlimit_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
11984@@ -0,0 +1,67 @@
11985+#ifndef _UAPI_VS_DLIMIT_CMD_H
11986+#define _UAPI_VS_DLIMIT_CMD_H
e915af4e 11987+
42bc425c 11988+
4bf69007 11989+/* dlimit vserver commands */
d337f35e 11990+
4bf69007
AM
11991+#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0)
11992+#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0)
d337f35e 11993+
4bf69007
AM
11994+#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0)
11995+#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0)
d337f35e 11996+
4bf69007
AM
11997+struct vcmd_ctx_dlimit_base_v0 {
11998+ const char __user *name;
11999+ uint32_t flags;
12000+};
12001+
12002+struct vcmd_ctx_dlimit_v0 {
12003+ const char __user *name;
12004+ uint32_t space_used; /* used space in kbytes */
12005+ uint32_t space_total; /* maximum space in kbytes */
12006+ uint32_t inodes_used; /* used inodes */
12007+ uint32_t inodes_total; /* maximum inodes */
12008+ uint32_t reserved; /* reserved for root in % */
12009+ uint32_t flags;
12010+};
12011+
12012+#define CDLIM_UNSET ((uint32_t)0UL)
12013+#define CDLIM_INFINITY ((uint32_t)~0UL)
12014+#define CDLIM_KEEP ((uint32_t)~1UL)
12015+
12016+#define DLIME_UNIT 0
12017+#define DLIME_KILO 1
12018+#define DLIME_MEGA 2
12019+#define DLIME_GIGA 3
12020+
12021+#define DLIMF_SHIFT 0x10
12022+
12023+#define DLIMS_USED 0
12024+#define DLIMS_TOTAL 2
12025+
12026+static inline
12027+uint64_t dlimit_space_32to64(uint32_t val, uint32_t flags, int shift)
2380c486 12028+{
4bf69007
AM
12029+ int exp = (flags & DLIMF_SHIFT) ?
12030+ (flags >> shift) & DLIME_GIGA : DLIME_KILO;
12031+ return ((uint64_t)val) << (10 * exp);
2380c486
JR
12032+}
12033+
4bf69007
AM
12034+static inline
12035+uint32_t dlimit_space_64to32(uint64_t val, uint32_t *flags, int shift)
2380c486 12036+{
4bf69007 12037+ int exp = 0;
ec22aa5c 12038+
4bf69007
AM
12039+ if (*flags & DLIMF_SHIFT) {
12040+ while (val > (1LL << 32) && (exp < 3)) {
12041+ val >>= 10;
12042+ exp++;
12043+ }
12044+ *flags &= ~(DLIME_GIGA << shift);
12045+ *flags |= exp << shift;
12046+ } else
12047+ val >>= 10;
12048+ return val;
2380c486
JR
12049+}
12050+
4bf69007 12051+#endif /* _UAPI_VS_DLIMIT_CMD_H */
8931d859
AM
12052diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/inode.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/inode.h
12053--- linux-4.4.161/include/uapi/vserver/inode.h 1970-01-01 00:00:00.000000000 +0000
12054+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/inode.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12055@@ -0,0 +1,23 @@
12056+#ifndef _UAPI_VS_INODE_H
12057+#define _UAPI_VS_INODE_H
2380c486 12058+
d337f35e 12059+
4bf69007 12060+#define IATTR_TAG 0x01000000
2380c486 12061+
4bf69007
AM
12062+#define IATTR_ADMIN 0x00000001
12063+#define IATTR_WATCH 0x00000002
12064+#define IATTR_HIDE 0x00000004
12065+#define IATTR_FLAGS 0x00000007
2380c486 12066+
4bf69007
AM
12067+#define IATTR_BARRIER 0x00010000
12068+#define IATTR_IXUNLINK 0x00020000
12069+#define IATTR_IMMUTABLE 0x00040000
12070+#define IATTR_COW 0x00080000
d337f35e 12071+
ec22aa5c 12072+
4bf69007 12073+/* inode ioctls */
ec22aa5c 12074+
4bf69007
AM
12075+#define FIOC_GETXFLG _IOR('x', 5, long)
12076+#define FIOC_SETXFLG _IOW('x', 6, long)
d337f35e 12077+
4bf69007 12078+#endif /* _UAPI_VS_INODE_H */
8931d859
AM
12079diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/inode_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/inode_cmd.h
12080--- linux-4.4.161/include/uapi/vserver/inode_cmd.h 1970-01-01 00:00:00.000000000 +0000
12081+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/inode_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12082@@ -0,0 +1,26 @@
12083+#ifndef _UAPI_VS_INODE_CMD_H
12084+#define _UAPI_VS_INODE_CMD_H
d337f35e 12085+
db55b927 12086+
4bf69007 12087+/* inode vserver commands */
2c8c5bc5 12088+
4bf69007
AM
12089+#define VCMD_get_iattr VC_CMD(INODE, 1, 1)
12090+#define VCMD_set_iattr VC_CMD(INODE, 2, 1)
2bf5ad28 12091+
4bf69007
AM
12092+#define VCMD_fget_iattr VC_CMD(INODE, 3, 0)
12093+#define VCMD_fset_iattr VC_CMD(INODE, 4, 0)
4a036bed 12094+
4bf69007
AM
12095+struct vcmd_ctx_iattr_v1 {
12096+ const char __user *name;
12097+ uint32_t tag;
12098+ uint32_t flags;
12099+ uint32_t mask;
12100+};
4a036bed 12101+
4bf69007
AM
12102+struct vcmd_ctx_fiattr_v0 {
12103+ uint32_t tag;
12104+ uint32_t flags;
12105+ uint32_t mask;
12106+};
4a036bed 12107+
4bf69007 12108+#endif /* _UAPI_VS_INODE_CMD_H */
8931d859
AM
12109diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/limit.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/limit.h
12110--- linux-4.4.161/include/uapi/vserver/limit.h 1970-01-01 00:00:00.000000000 +0000
12111+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/limit.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12112@@ -0,0 +1,14 @@
12113+#ifndef _UAPI_VS_LIMIT_H
12114+#define _UAPI_VS_LIMIT_H
4a036bed 12115+
42bc425c 12116+
4bf69007
AM
12117+#define VLIMIT_NSOCK 16
12118+#define VLIMIT_OPENFD 17
12119+#define VLIMIT_ANON 18
12120+#define VLIMIT_SHMEM 19
12121+#define VLIMIT_SEMARY 20
12122+#define VLIMIT_NSEMS 21
12123+#define VLIMIT_DENTRY 22
12124+#define VLIMIT_MAPPED 23
adc1caaa 12125+
4bf69007 12126+#endif /* _UAPI_VS_LIMIT_H */
8931d859
AM
12127diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/limit_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/limit_cmd.h
12128--- linux-4.4.161/include/uapi/vserver/limit_cmd.h 1970-01-01 00:00:00.000000000 +0000
12129+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/limit_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12130@@ -0,0 +1,40 @@
12131+#ifndef _UAPI_VS_LIMIT_CMD_H
12132+#define _UAPI_VS_LIMIT_CMD_H
adc1caaa 12133+
adc1caaa 12134+
4bf69007 12135+/* rlimit vserver commands */
adc1caaa 12136+
4bf69007
AM
12137+#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0)
12138+#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0)
12139+#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0)
12140+#define VCMD_reset_hits VC_CMD(RLIMIT, 7, 0)
12141+#define VCMD_reset_minmax VC_CMD(RLIMIT, 9, 0)
adc1caaa 12142+
4bf69007
AM
12143+struct vcmd_ctx_rlimit_v0 {
12144+ uint32_t id;
12145+ uint64_t minimum;
12146+ uint64_t softlimit;
12147+ uint64_t maximum;
12148+};
d33d7b00 12149+
4bf69007
AM
12150+struct vcmd_ctx_rlimit_mask_v0 {
12151+ uint32_t minimum;
12152+ uint32_t softlimit;
12153+ uint32_t maximum;
12154+};
d33d7b00 12155+
4bf69007 12156+#define VCMD_rlimit_stat VC_CMD(VSTAT, 1, 0)
d33d7b00 12157+
4bf69007
AM
12158+struct vcmd_rlimit_stat_v0 {
12159+ uint32_t id;
12160+ uint32_t hits;
12161+ uint64_t value;
12162+ uint64_t minimum;
12163+ uint64_t maximum;
12164+};
d33d7b00 12165+
4bf69007
AM
12166+#define CRLIM_UNSET (0ULL)
12167+#define CRLIM_INFINITY (~0ULL)
12168+#define CRLIM_KEEP (~1ULL)
d33d7b00 12169+
4bf69007 12170+#endif /* _UAPI_VS_LIMIT_CMD_H */
8931d859
AM
12171diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/monitor.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/monitor.h
12172--- linux-4.4.161/include/uapi/vserver/monitor.h 1970-01-01 00:00:00.000000000 +0000
12173+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/monitor.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12174@@ -0,0 +1,96 @@
12175+#ifndef _UAPI_VS_MONITOR_H
12176+#define _UAPI_VS_MONITOR_H
d33d7b00 12177+
4bf69007 12178+#include <linux/types.h>
d33d7b00 12179+
d33d7b00 12180+
4bf69007
AM
12181+enum {
12182+ VXM_UNUSED = 0,
d33d7b00 12183+
4bf69007 12184+ VXM_SYNC = 0x10,
d33d7b00 12185+
4bf69007
AM
12186+ VXM_UPDATE = 0x20,
12187+ VXM_UPDATE_1,
12188+ VXM_UPDATE_2,
d33d7b00 12189+
4bf69007
AM
12190+ VXM_RQINFO_1 = 0x24,
12191+ VXM_RQINFO_2,
d33d7b00 12192+
4bf69007
AM
12193+ VXM_ACTIVATE = 0x40,
12194+ VXM_DEACTIVATE,
12195+ VXM_IDLE,
d33d7b00 12196+
4bf69007
AM
12197+ VXM_HOLD = 0x44,
12198+ VXM_UNHOLD,
d33d7b00 12199+
4bf69007
AM
12200+ VXM_MIGRATE = 0x48,
12201+ VXM_RESCHED,
d33d7b00 12202+
4bf69007
AM
12203+ /* all other bits are flags */
12204+ VXM_SCHED = 0x80,
12205+};
d33d7b00 12206+
4bf69007
AM
12207+struct _vxm_update_1 {
12208+ uint32_t tokens_max;
12209+ uint32_t fill_rate;
12210+ uint32_t interval;
12211+};
d33d7b00 12212+
4bf69007
AM
12213+struct _vxm_update_2 {
12214+ uint32_t tokens_min;
12215+ uint32_t fill_rate;
12216+ uint32_t interval;
12217+};
d33d7b00 12218+
4bf69007
AM
12219+struct _vxm_rqinfo_1 {
12220+ uint16_t running;
12221+ uint16_t onhold;
12222+ uint16_t iowait;
12223+ uint16_t uintr;
12224+ uint32_t idle_tokens;
12225+};
d33d7b00 12226+
4bf69007
AM
12227+struct _vxm_rqinfo_2 {
12228+ uint32_t norm_time;
12229+ uint32_t idle_time;
12230+ uint32_t idle_skip;
12231+};
d33d7b00 12232+
4bf69007
AM
12233+struct _vxm_sched {
12234+ uint32_t tokens;
12235+ uint32_t norm_time;
12236+ uint32_t idle_time;
12237+};
d33d7b00 12238+
4bf69007
AM
12239+struct _vxm_task {
12240+ uint16_t pid;
12241+ uint16_t state;
12242+};
d33d7b00 12243+
4bf69007
AM
12244+struct _vxm_event {
12245+ uint32_t jif;
12246+ union {
12247+ uint32_t seq;
12248+ uint32_t sec;
12249+ };
12250+ union {
12251+ uint32_t tokens;
12252+ uint32_t nsec;
12253+ struct _vxm_task tsk;
12254+ };
12255+};
61b0c03f 12256+
4bf69007
AM
12257+struct _vx_mon_entry {
12258+ uint16_t type;
12259+ uint16_t xid;
12260+ union {
12261+ struct _vxm_event ev;
12262+ struct _vxm_sched sd;
12263+ struct _vxm_update_1 u1;
12264+ struct _vxm_update_2 u2;
12265+ struct _vxm_rqinfo_1 q1;
12266+ struct _vxm_rqinfo_2 q2;
12267+ };
12268+};
d33d7b00 12269+
4bf69007 12270+#endif /* _UAPI_VS_MONITOR_H */
8931d859
AM
12271diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/network.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/network.h
12272--- linux-4.4.161/include/uapi/vserver/network.h 1970-01-01 00:00:00.000000000 +0000
12273+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/network.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12274@@ -0,0 +1,76 @@
12275+#ifndef _UAPI_VS_NETWORK_H
12276+#define _UAPI_VS_NETWORK_H
d33d7b00 12277+
4bf69007 12278+#include <linux/types.h>
d33d7b00 12279+
d33d7b00 12280+
4bf69007 12281+#define MAX_N_CONTEXT 65535 /* Arbitrary limit */
d33d7b00 12282+
d33d7b00 12283+
4bf69007 12284+/* network flags */
d33d7b00 12285+
4bf69007 12286+#define NXF_INFO_PRIVATE 0x00000008
d33d7b00 12287+
4bf69007
AM
12288+#define NXF_SINGLE_IP 0x00000100
12289+#define NXF_LBACK_REMAP 0x00000200
12290+#define NXF_LBACK_ALLOW 0x00000400
d33d7b00 12291+
4bf69007
AM
12292+#define NXF_HIDE_NETIF 0x02000000
12293+#define NXF_HIDE_LBACK 0x04000000
265d6dcc 12294+
4bf69007
AM
12295+#define NXF_STATE_SETUP (1ULL << 32)
12296+#define NXF_STATE_ADMIN (1ULL << 34)
d33d7b00 12297+
4bf69007
AM
12298+#define NXF_SC_HELPER (1ULL << 36)
12299+#define NXF_PERSISTENT (1ULL << 38)
d33d7b00 12300+
4bf69007 12301+#define NXF_ONE_TIME (0x0005ULL << 32)
d33d7b00 12302+
d33d7b00 12303+
4bf69007 12304+#define NXF_INIT_SET (__nxf_init_set())
d33d7b00 12305+
4bf69007
AM
12306+static inline uint64_t __nxf_init_set(void) {
12307+ return NXF_STATE_ADMIN
12308+#ifdef CONFIG_VSERVER_AUTO_LBACK
12309+ | NXF_LBACK_REMAP
12310+ | NXF_HIDE_LBACK
12311+#endif
12312+#ifdef CONFIG_VSERVER_AUTO_SINGLE
12313+ | NXF_SINGLE_IP
12314+#endif
12315+ | NXF_HIDE_NETIF;
12316+}
d33d7b00 12317+
d33d7b00 12318+
4bf69007 12319+/* network caps */
d33d7b00 12320+
4bf69007 12321+#define NXC_TUN_CREATE 0x00000001
d33d7b00 12322+
4bf69007 12323+#define NXC_RAW_ICMP 0x00000100
d33d7b00 12324+
4bf69007 12325+#define NXC_MULTICAST 0x00001000
d33d7b00 12326+
adc1caaa 12327+
4bf69007 12328+/* address types */
adc1caaa 12329+
4bf69007
AM
12330+#define NXA_TYPE_IPV4 0x0001
12331+#define NXA_TYPE_IPV6 0x0002
adc1caaa 12332+
4bf69007
AM
12333+#define NXA_TYPE_NONE 0x0000
12334+#define NXA_TYPE_ANY 0x00FF
adc1caaa 12335+
4bf69007
AM
12336+#define NXA_TYPE_ADDR 0x0010
12337+#define NXA_TYPE_MASK 0x0020
12338+#define NXA_TYPE_RANGE 0x0040
adc1caaa 12339+
4bf69007 12340+#define NXA_MASK_ALL (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
adc1caaa 12341+
4bf69007
AM
12342+#define NXA_MOD_BCAST 0x0100
12343+#define NXA_MOD_LBACK 0x0200
adc1caaa 12344+
4bf69007 12345+#define NXA_LOOPBACK 0x1000
2380c486 12346+
4bf69007
AM
12347+#define NXA_MASK_BIND (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
12348+#define NXA_MASK_SHOW (NXA_MASK_ALL | NXA_LOOPBACK)
2380c486 12349+
4bf69007 12350+#endif /* _UAPI_VS_NETWORK_H */
8931d859
AM
12351diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/network_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/network_cmd.h
12352--- linux-4.4.161/include/uapi/vserver/network_cmd.h 1970-01-01 00:00:00.000000000 +0000
12353+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/network_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12354@@ -0,0 +1,123 @@
12355+#ifndef _UAPI_VS_NETWORK_CMD_H
12356+#define _UAPI_VS_NETWORK_CMD_H
2380c486 12357+
2380c486 12358+
4bf69007 12359+/* vinfo commands */
2380c486 12360+
4bf69007 12361+#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
2380c486 12362+
2380c486 12363+
4bf69007 12364+#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
2380c486 12365+
4bf69007
AM
12366+struct vcmd_nx_info_v0 {
12367+ uint32_t nid;
12368+ /* more to come */
12369+};
2380c486 12370+
2380c486 12371+
4bf69007
AM
12372+#include <linux/in.h>
12373+#include <linux/in6.h>
2380c486 12374+
4bf69007
AM
12375+#define VCMD_net_create_v0 VC_CMD(VNET, 1, 0)
12376+#define VCMD_net_create VC_CMD(VNET, 1, 1)
2380c486 12377+
4bf69007
AM
12378+struct vcmd_net_create {
12379+ uint64_t flagword;
12380+};
2380c486 12381+
4bf69007 12382+#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0)
2380c486 12383+
4bf69007
AM
12384+#define VCMD_net_add VC_CMD(NETALT, 1, 0)
12385+#define VCMD_net_remove VC_CMD(NETALT, 2, 0)
2380c486 12386+
4bf69007
AM
12387+struct vcmd_net_addr_v0 {
12388+ uint16_t type;
12389+ uint16_t count;
12390+ struct in_addr ip[4];
12391+ struct in_addr mask[4];
12392+};
2380c486 12393+
4bf69007
AM
12394+#define VCMD_net_add_ipv4_v1 VC_CMD(NETALT, 1, 1)
12395+#define VCMD_net_rem_ipv4_v1 VC_CMD(NETALT, 2, 1)
2380c486 12396+
4bf69007
AM
12397+struct vcmd_net_addr_ipv4_v1 {
12398+ uint16_t type;
12399+ uint16_t flags;
12400+ struct in_addr ip;
12401+ struct in_addr mask;
12402+};
2380c486 12403+
4bf69007
AM
12404+#define VCMD_net_add_ipv4 VC_CMD(NETALT, 1, 2)
12405+#define VCMD_net_rem_ipv4 VC_CMD(NETALT, 2, 2)
2380c486 12406+
4bf69007
AM
12407+struct vcmd_net_addr_ipv4_v2 {
12408+ uint16_t type;
12409+ uint16_t flags;
12410+ struct in_addr ip;
12411+ struct in_addr ip2;
12412+ struct in_addr mask;
12413+};
2380c486 12414+
4bf69007
AM
12415+#define VCMD_net_add_ipv6 VC_CMD(NETALT, 3, 1)
12416+#define VCMD_net_remove_ipv6 VC_CMD(NETALT, 4, 1)
2380c486 12417+
4bf69007
AM
12418+struct vcmd_net_addr_ipv6_v1 {
12419+ uint16_t type;
12420+ uint16_t flags;
12421+ uint32_t prefix;
12422+ struct in6_addr ip;
12423+ struct in6_addr mask;
12424+};
2380c486 12425+
4bf69007
AM
12426+#define VCMD_add_match_ipv4 VC_CMD(NETALT, 5, 0)
12427+#define VCMD_get_match_ipv4 VC_CMD(NETALT, 6, 0)
2380c486 12428+
4bf69007
AM
12429+struct vcmd_match_ipv4_v0 {
12430+ uint16_t type;
12431+ uint16_t flags;
12432+ uint16_t parent;
12433+ uint16_t prefix;
12434+ struct in_addr ip;
12435+ struct in_addr ip2;
12436+ struct in_addr mask;
12437+};
2380c486 12438+
4bf69007
AM
12439+#define VCMD_add_match_ipv6 VC_CMD(NETALT, 7, 0)
12440+#define VCMD_get_match_ipv6 VC_CMD(NETALT, 8, 0)
2380c486 12441+
4bf69007
AM
12442+struct vcmd_match_ipv6_v0 {
12443+ uint16_t type;
12444+ uint16_t flags;
12445+ uint16_t parent;
12446+ uint16_t prefix;
12447+ struct in6_addr ip;
12448+ struct in6_addr ip2;
12449+ struct in6_addr mask;
12450+};
2380c486 12451+
2380c486 12452+
2380c486 12453+
2380c486 12454+
4bf69007 12455+/* flag commands */
2380c486 12456+
4bf69007
AM
12457+#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0)
12458+#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0)
2380c486 12459+
4bf69007
AM
12460+struct vcmd_net_flags_v0 {
12461+ uint64_t flagword;
12462+ uint64_t mask;
12463+};
2380c486 12464+
2380c486 12465+
ab30d09f 12466+
4bf69007 12467+/* network caps commands */
ab30d09f 12468+
4bf69007
AM
12469+#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0)
12470+#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0)
ec22aa5c 12471+
4bf69007
AM
12472+struct vcmd_net_caps_v0 {
12473+ uint64_t ncaps;
12474+ uint64_t cmask;
12475+};
3bac966d 12476+
4bf69007 12477+#endif /* _UAPI_VS_NETWORK_CMD_H */
8931d859
AM
12478diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/sched_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/sched_cmd.h
12479--- linux-4.4.161/include/uapi/vserver/sched_cmd.h 1970-01-01 00:00:00.000000000 +0000
12480+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/sched_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12481@@ -0,0 +1,13 @@
12482+#ifndef _UAPI_VS_SCHED_CMD_H
12483+#define _UAPI_VS_SCHED_CMD_H
d337f35e 12484+
d337f35e 12485+
4bf69007
AM
12486+struct vcmd_prio_bias {
12487+ int32_t cpu_id;
12488+ int32_t prio_bias;
12489+};
2380c486 12490+
4bf69007
AM
12491+#define VCMD_set_prio_bias VC_CMD(SCHED, 4, 0)
12492+#define VCMD_get_prio_bias VC_CMD(SCHED, 5, 0)
d337f35e 12493+
4bf69007 12494+#endif /* _UAPI_VS_SCHED_CMD_H */
8931d859
AM
12495diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/signal_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/signal_cmd.h
12496--- linux-4.4.161/include/uapi/vserver/signal_cmd.h 1970-01-01 00:00:00.000000000 +0000
12497+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/signal_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12498@@ -0,0 +1,31 @@
12499+#ifndef _UAPI_VS_SIGNAL_CMD_H
12500+#define _UAPI_VS_SIGNAL_CMD_H
d337f35e 12501+
d337f35e 12502+
4bf69007 12503+/* signalling vserver commands */
d337f35e 12504+
4bf69007
AM
12505+#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0)
12506+#define VCMD_wait_exit VC_CMD(EVENT, 99, 0)
d337f35e 12507+
4bf69007
AM
12508+struct vcmd_ctx_kill_v0 {
12509+ int32_t pid;
12510+ int32_t sig;
12511+};
d337f35e 12512+
4bf69007
AM
12513+struct vcmd_wait_exit_v0 {
12514+ int32_t reboot_cmd;
12515+ int32_t exit_code;
12516+};
d337f35e 12517+
d337f35e 12518+
4bf69007 12519+/* process alteration commands */
ab30d09f 12520+
4bf69007
AM
12521+#define VCMD_get_pflags VC_CMD(PROCALT, 5, 0)
12522+#define VCMD_set_pflags VC_CMD(PROCALT, 6, 0)
d337f35e 12523+
4bf69007
AM
12524+struct vcmd_pflags_v0 {
12525+ uint32_t flagword;
12526+ uint32_t mask;
12527+};
3bac966d 12528+
4bf69007 12529+#endif /* _UAPI_VS_SIGNAL_CMD_H */
8931d859
AM
12530diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/space_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/space_cmd.h
12531--- linux-4.4.161/include/uapi/vserver/space_cmd.h 1970-01-01 00:00:00.000000000 +0000
12532+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/space_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12533@@ -0,0 +1,28 @@
12534+#ifndef _UAPI_VS_SPACE_CMD_H
12535+#define _UAPI_VS_SPACE_CMD_H
d337f35e 12536+
d337f35e 12537+
4bf69007
AM
12538+#define VCMD_enter_space_v0 VC_CMD(PROCALT, 1, 0)
12539+#define VCMD_enter_space_v1 VC_CMD(PROCALT, 1, 1)
12540+#define VCMD_enter_space VC_CMD(PROCALT, 1, 2)
2380c486 12541+
4bf69007
AM
12542+#define VCMD_set_space_v0 VC_CMD(PROCALT, 3, 0)
12543+#define VCMD_set_space_v1 VC_CMD(PROCALT, 3, 1)
12544+#define VCMD_set_space VC_CMD(PROCALT, 3, 2)
d337f35e 12545+
4bf69007 12546+#define VCMD_get_space_mask_v0 VC_CMD(PROCALT, 4, 0)
d337f35e 12547+
4bf69007
AM
12548+#define VCMD_get_space_mask VC_CMD(VSPACE, 0, 1)
12549+#define VCMD_get_space_default VC_CMD(VSPACE, 1, 0)
d337f35e 12550+
d337f35e 12551+
4bf69007
AM
12552+struct vcmd_space_mask_v1 {
12553+ uint64_t mask;
12554+};
d337f35e 12555+
4bf69007
AM
12556+struct vcmd_space_mask_v2 {
12557+ uint64_t mask;
12558+ uint32_t index;
12559+};
d337f35e 12560+
4bf69007 12561+#endif /* _UAPI_VS_SPACE_CMD_H */
8931d859
AM
12562diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/switch.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/switch.h
12563--- linux-4.4.161/include/uapi/vserver/switch.h 1970-01-01 00:00:00.000000000 +0000
12564+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/switch.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12565@@ -0,0 +1,90 @@
12566+#ifndef _UAPI_VS_SWITCH_H
12567+#define _UAPI_VS_SWITCH_H
d337f35e 12568+
4bf69007 12569+#include <linux/types.h>
d337f35e 12570+
d337f35e 12571+
4bf69007
AM
12572+#define VC_CATEGORY(c) (((c) >> 24) & 0x3F)
12573+#define VC_COMMAND(c) (((c) >> 16) & 0xFF)
12574+#define VC_VERSION(c) ((c) & 0xFFF)
d337f35e 12575+
4bf69007
AM
12576+#define VC_CMD(c, i, v) ((((VC_CAT_ ## c) & 0x3F) << 24) \
12577+ | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
d337f35e 12578+
4bf69007 12579+/*
d337f35e 12580+
4bf69007 12581+ Syscall Matrix V2.8
d337f35e 12582+
4bf69007
AM
12583+ |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
12584+ |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | |
12585+ |INFO |SETUP | |MOVE | | | | | |
12586+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12587+ SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICE | |
12588+ HOST | 00| 01| 02| 03| 04| 05| | 06| 07|
12589+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12590+ CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | |
12591+ PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15|
12592+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12593+ MEMORY | | | | |MEMCTRL| | |SWAP | |
12594+ | 16| 17| 18| 19| 20| 21| | 22| 23|
12595+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12596+ NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | |
12597+ | 24| 25| 26| 27| 28| 29| | 30| 31|
12598+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12599+ DISK | | | |TAGMIG |DLIMIT | | |INODE | |
12600+ VFS | 32| 33| 34| 35| 36| 37| | 38| 39|
12601+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12602+ OTHER |VSTAT | | | | | | |VINFO | |
12603+ | 40| 41| 42| 43| 44| 45| | 46| 47|
12604+ =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
12605+ SPECIAL|EVENT | | | |FLAGS | | |VSPACE | |
12606+ | 48| 49| 50| 51| 52| 53| | 54| 55|
12607+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12608+ SPECIAL|DEBUG | | | |RLIMIT |SYSCALL| | |COMPAT |
12609+ | 56| 57| 58| 59| 60|TEST 61| | 62| 63|
12610+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
d337f35e 12611+
4bf69007 12612+*/
d337f35e 12613+
4bf69007 12614+#define VC_CAT_VERSION 0
d337f35e 12615+
4bf69007
AM
12616+#define VC_CAT_VSETUP 1
12617+#define VC_CAT_VHOST 2
d337f35e 12618+
4bf69007 12619+#define VC_CAT_DEVICE 6
d337f35e 12620+
4bf69007
AM
12621+#define VC_CAT_VPROC 9
12622+#define VC_CAT_PROCALT 10
12623+#define VC_CAT_PROCMIG 11
12624+#define VC_CAT_PROCTRL 12
d337f35e 12625+
4bf69007
AM
12626+#define VC_CAT_SCHED 14
12627+#define VC_CAT_MEMCTRL 20
d337f35e 12628+
4bf69007
AM
12629+#define VC_CAT_VNET 25
12630+#define VC_CAT_NETALT 26
12631+#define VC_CAT_NETMIG 27
12632+#define VC_CAT_NETCTRL 28
d337f35e 12633+
4bf69007
AM
12634+#define VC_CAT_TAGMIG 35
12635+#define VC_CAT_DLIMIT 36
12636+#define VC_CAT_INODE 38
d337f35e 12637+
4bf69007
AM
12638+#define VC_CAT_VSTAT 40
12639+#define VC_CAT_VINFO 46
12640+#define VC_CAT_EVENT 48
d337f35e 12641+
4bf69007
AM
12642+#define VC_CAT_FLAGS 52
12643+#define VC_CAT_VSPACE 54
12644+#define VC_CAT_DEBUG 56
12645+#define VC_CAT_RLIMIT 60
d337f35e 12646+
4bf69007
AM
12647+#define VC_CAT_SYSTEST 61
12648+#define VC_CAT_COMPAT 63
d337f35e 12649+
4bf69007 12650+/* query version */
d337f35e 12651+
4bf69007
AM
12652+#define VCMD_get_version VC_CMD(VERSION, 0, 0)
12653+#define VCMD_get_vci VC_CMD(VERSION, 1, 0)
2380c486 12654+
4bf69007 12655+#endif /* _UAPI_VS_SWITCH_H */
8931d859
AM
12656diff -NurpP --minimal linux-4.4.161/include/uapi/vserver/tag_cmd.h linux-4.4.161-vs2.3.9.8/include/uapi/vserver/tag_cmd.h
12657--- linux-4.4.161/include/uapi/vserver/tag_cmd.h 1970-01-01 00:00:00.000000000 +0000
12658+++ linux-4.4.161-vs2.3.9.8/include/uapi/vserver/tag_cmd.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12659@@ -0,0 +1,14 @@
12660+#ifndef _UAPI_VS_TAG_CMD_H
12661+#define _UAPI_VS_TAG_CMD_H
d337f35e 12662+
d337f35e 12663+
4bf69007 12664+/* vinfo commands */
d337f35e 12665+
4bf69007 12666+#define VCMD_task_tag VC_CMD(VINFO, 3, 0)
d337f35e
JR
12667+
12668+
4bf69007 12669+/* context commands */
d337f35e 12670+
4bf69007 12671+#define VCMD_tag_migrate VC_CMD(TAGMIG, 1, 0)
2380c486 12672+
4bf69007 12673+#endif /* _UAPI_VS_TAG_CMD_H */
8931d859
AM
12674diff -NurpP --minimal linux-4.4.161/init/Kconfig linux-4.4.161-vs2.3.9.8/init/Kconfig
12675--- linux-4.4.161/init/Kconfig 2018-10-20 02:34:31.000000000 +0000
12676+++ linux-4.4.161-vs2.3.9.8/init/Kconfig 2018-10-20 04:57:21.000000000 +0000
927ca606 12677@@ -927,6 +927,7 @@ config NUMA_BALANCING_DEFAULT_ENABLED
4bf69007 12678 menuconfig CGROUPS
927ca606 12679 bool "Control Group support"
265de2f7 12680 select KERNFS
4bf69007
AM
12681+ default y
12682 help
12683 This option adds support for grouping sets of processes together, for
12684 use with process control subsystems such as Cpusets, CFS, memory
8931d859
AM
12685diff -NurpP --minimal linux-4.4.161/init/main.c linux-4.4.161-vs2.3.9.8/init/main.c
12686--- linux-4.4.161/init/main.c 2018-10-20 02:34:31.000000000 +0000
12687+++ linux-4.4.161-vs2.3.9.8/init/main.c 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
12688@@ -82,6 +82,7 @@
12689 #include <linux/proc_ns.h>
12690 #include <linux/io.h>
12691 #include <linux/kaiser.h>
4bf69007
AM
12692+#include <linux/vserver/percpu.h>
12693
12694 #include <asm/io.h>
12695 #include <asm/bugs.h>
8931d859
AM
12696diff -NurpP --minimal linux-4.4.161/ipc/mqueue.c linux-4.4.161-vs2.3.9.8/ipc/mqueue.c
12697--- linux-4.4.161/ipc/mqueue.c 2018-10-20 02:34:31.000000000 +0000
12698+++ linux-4.4.161-vs2.3.9.8/ipc/mqueue.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12699@@ -35,6 +35,8 @@
12700 #include <linux/ipc_namespace.h>
12701 #include <linux/user_namespace.h>
12702 #include <linux/slab.h>
12703+#include <linux/vs_context.h>
12704+#include <linux/vs_limit.h>
12705
12706 #include <net/sock.h>
12707 #include "util.h"
927ca606 12708@@ -75,6 +77,7 @@ struct mqueue_inode_info {
bb20add7 12709 struct pid *notify_owner;
4bf69007
AM
12710 struct user_namespace *notify_user_ns;
12711 struct user_struct *user; /* user who created, for accounting */
12712+ struct vx_info *vxi;
12713 struct sock *notify_sock;
12714 struct sk_buff *notify_cookie;
12715
927ca606 12716@@ -230,6 +233,7 @@ static struct inode *mqueue_get_inode(st
4bf69007
AM
12717 if (S_ISREG(mode)) {
12718 struct mqueue_inode_info *info;
12719 unsigned long mq_bytes, mq_treesize;
12720+ struct vx_info *vxi = current_vx_info();
12721
12722 inode->i_fop = &mqueue_file_operations;
12723 inode->i_size = FILENT_SIZE;
927ca606 12724@@ -243,6 +247,7 @@ static struct inode *mqueue_get_inode(st
4bf69007
AM
12725 info->notify_user_ns = NULL;
12726 info->qsize = 0;
12727 info->user = NULL; /* set when all is ok */
12728+ info->vxi = NULL;
12729 info->msg_tree = RB_ROOT;
12730 info->node_cache = NULL;
12731 memset(&info->attr, 0, sizeof(info->attr));
927ca606 12732@@ -276,17 +281,20 @@ static struct inode *mqueue_get_inode(st
4bf69007
AM
12733
12734 spin_lock(&mq_lock);
12735 if (u->mq_bytes + mq_bytes < u->mq_bytes ||
12736- u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
12737+ u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE) ||
12738+ !vx_ipcmsg_avail(vxi, mq_bytes)) {
12739 spin_unlock(&mq_lock);
12740 /* mqueue_evict_inode() releases info->messages */
12741 ret = -EMFILE;
12742 goto out_inode;
12743 }
12744 u->mq_bytes += mq_bytes;
12745+ vx_ipcmsg_add(vxi, u, mq_bytes);
12746 spin_unlock(&mq_lock);
12747
12748 /* all is ok */
12749 info->user = get_uid(u);
12750+ info->vxi = get_vx_info(vxi);
12751 } else if (S_ISDIR(mode)) {
12752 inc_nlink(inode);
12753 /* Some things misbehave if size == 0 on a directory */
7c48c125 12754@@ -402,6 +402,7 @@ static void mqueue_evict_inode(struct in
4bf69007
AM
12755 user = info->user;
12756 if (user) {
7c48c125 12757 unsigned long mq_bytes, mq_treesize;
4bf69007 12758+ struct vx_info *vxi = info->vxi;
7c48c125
AM
12759
12760 /* Total amount of bytes accounted for the mqueue */
12761 mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
12762@@ -413,6 +414,7 @@ static void mqueue_evict_inode(struct in
12763
4bf69007
AM
12764 spin_lock(&mq_lock);
12765 user->mq_bytes -= mq_bytes;
12766+ vx_ipcmsg_sub(vxi, user, mq_bytes);
12767 /*
12768 * get_ns_from_inode() ensures that the
12769 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
927ca606 12770@@ -409,6 +420,7 @@ static void mqueue_evict_inode(struct in
4bf69007
AM
12771 if (ipc_ns)
12772 ipc_ns->mq_queues_count--;
12773 spin_unlock(&mq_lock);
12774+ put_vx_info(vxi);
12775 free_uid(user);
12776 }
12777 if (ipc_ns)
8931d859
AM
12778diff -NurpP --minimal linux-4.4.161/ipc/msg.c linux-4.4.161-vs2.3.9.8/ipc/msg.c
12779--- linux-4.4.161/ipc/msg.c 2018-10-20 02:34:31.000000000 +0000
12780+++ linux-4.4.161-vs2.3.9.8/ipc/msg.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12781@@ -37,6 +37,7 @@
12782 #include <linux/rwsem.h>
12783 #include <linux/nsproxy.h>
12784 #include <linux/ipc_namespace.h>
12785+#include <linux/vs_base.h>
12786
12787 #include <asm/current.h>
bb20add7
AM
12788 #include <linux/uaccess.h>
12789@@ -129,6 +130,7 @@ static int newque(struct ipc_namespace *
4bf69007
AM
12790
12791 msq->q_perm.mode = msgflg & S_IRWXUGO;
12792 msq->q_perm.key = key;
12793+ msq->q_perm.xid = vx_current_xid();
12794
12795 msq->q_perm.security = NULL;
12796 retval = security_msg_queue_alloc(msq);
8931d859
AM
12797diff -NurpP --minimal linux-4.4.161/ipc/sem.c linux-4.4.161-vs2.3.9.8/ipc/sem.c
12798--- linux-4.4.161/ipc/sem.c 2018-10-20 02:34:31.000000000 +0000
12799+++ linux-4.4.161-vs2.3.9.8/ipc/sem.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 12800@@ -85,6 +85,8 @@
4bf69007
AM
12801 #include <linux/rwsem.h>
12802 #include <linux/nsproxy.h>
12803 #include <linux/ipc_namespace.h>
12804+#include <linux/vs_base.h>
12805+#include <linux/vs_limit.h>
12806
bb20add7 12807 #include <linux/uaccess.h>
4bf69007 12808 #include "util.h"
927ca606 12809@@ -533,6 +535,7 @@ static int newary(struct ipc_namespace *
4bf69007
AM
12810
12811 sma->sem_perm.mode = (semflg & S_IRWXUGO);
12812 sma->sem_perm.key = key;
12813+ sma->sem_perm.xid = vx_current_xid();
12814
12815 sma->sem_perm.security = NULL;
12816 retval = security_sem_alloc(sma);
927ca606 12817@@ -563,6 +566,9 @@ static int newary(struct ipc_namespace *
4bf69007
AM
12818 return id;
12819 }
12820 ns->used_sems += nsems;
12821+ /* FIXME: obsoleted? */
12822+ vx_semary_inc(sma);
12823+ vx_nsems_add(sma, nsems);
12824
bb20add7
AM
12825 sem_unlock(sma, -1);
12826 rcu_read_unlock();
927ca606 12827@@ -1151,6 +1157,9 @@ static void freeary(struct ipc_namespace
4bf69007
AM
12828
12829 wake_up_sem_queue_do(&tasks);
12830 ns->used_sems -= sma->sem_nsems;
12831+ /* FIXME: obsoleted? */
12832+ vx_nsems_sub(sma, sma->sem_nsems);
12833+ vx_semary_dec(sma);
926e38e0 12834 ipc_rcu_putref(sma, sem_rcu_free);
4bf69007 12835 }
926e38e0 12836
8931d859
AM
12837diff -NurpP --minimal linux-4.4.161/ipc/shm.c linux-4.4.161-vs2.3.9.8/ipc/shm.c
12838--- linux-4.4.161/ipc/shm.c 2018-10-20 02:34:31.000000000 +0000
12839+++ linux-4.4.161-vs2.3.9.8/ipc/shm.c 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 12840@@ -42,6 +42,8 @@
4bf69007
AM
12841 #include <linux/nsproxy.h>
12842 #include <linux/mount.h>
12843 #include <linux/ipc_namespace.h>
12844+#include <linux/vs_context.h>
12845+#include <linux/vs_limit.h>
12846
bb20add7 12847 #include <linux/uaccess.h>
4bf69007 12848
8931d859 12849@@ -234,10 +236,14 @@ static void shm_open(struct vm_area_stru
4bf69007
AM
12850 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
12851 {
c2e5f7c8 12852 struct file *shm_file;
4bf69007
AM
12853+ struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
12854+ int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
c2e5f7c8
JR
12855
12856 shm_file = shp->shm_file;
12857 shp->shm_file = NULL;
12858- ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
4bf69007
AM
12859+ vx_ipcshm_sub(vxi, shp, numpages);
12860+ ns->shm_tot -= numpages;
d337f35e 12861+
4bf69007
AM
12862 shm_rmid(ns, shp);
12863 shm_unlock(shp);
c2e5f7c8 12864 if (!is_file_hugepages(shm_file))
8931d859 12865@@ -246,6 +252,7 @@ static void shm_destroy(struct ipc_names
927ca606
AM
12866 user_shm_unlock(i_size_read(file_inode(shm_file)),
12867 shp->mlock_user);
c2e5f7c8 12868 fput(shm_file);
4bf69007 12869+ put_vx_info(vxi);
926e38e0 12870 ipc_rcu_putref(shp, shm_rcu_free);
4bf69007
AM
12871 }
12872
8931d859 12873@@ -545,11 +552,15 @@ static int newseg(struct ipc_namespace *
bb20add7 12874 ns->shm_tot + numpages > ns->shm_ctlall)
4bf69007
AM
12875 return -ENOSPC;
12876
12877+ if (!vx_ipcshm_avail(current_vx_info(), numpages))
12878+ return -ENOSPC;
d337f35e 12879+
4bf69007
AM
12880 shp = ipc_rcu_alloc(sizeof(*shp));
12881 if (!shp)
12882 return -ENOMEM;
12883
12884 shp->shm_perm.key = key;
12885+ shp->shm_perm.xid = vx_current_xid();
12886 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
12887 shp->mlock_user = NULL;
12888
8931d859 12889@@ -620,6 +631,7 @@ static int newseg(struct ipc_namespace *
926e38e0
JR
12890
12891 ipc_unlock_object(&shp->shm_perm);
12892 rcu_read_unlock();
4bf69007
AM
12893+ vx_ipcshm_add(current_vx_info(), key, numpages);
12894 return error;
12895
12896 no_id:
8931d859
AM
12897diff -NurpP --minimal linux-4.4.161/kernel/Makefile linux-4.4.161-vs2.3.9.8/kernel/Makefile
12898--- linux-4.4.161/kernel/Makefile 2016-01-10 23:01:32.000000000 +0000
12899+++ linux-4.4.161-vs2.3.9.8/kernel/Makefile 2018-10-20 04:57:21.000000000 +0000
927ca606 12900@@ -29,6 +29,7 @@ obj-y += printk/
c2e5f7c8
JR
12901 obj-y += irq/
12902 obj-y += rcu/
927ca606 12903 obj-y += livepatch/
4bf69007
AM
12904+obj-y += vserver/
12905
b00e13aa
AM
12906 obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
12907 obj-$(CONFIG_FREEZER) += freezer.o
8931d859
AM
12908diff -NurpP --minimal linux-4.4.161/kernel/auditsc.c linux-4.4.161-vs2.3.9.8/kernel/auditsc.c
12909--- linux-4.4.161/kernel/auditsc.c 2018-10-20 02:34:31.000000000 +0000
12910+++ linux-4.4.161-vs2.3.9.8/kernel/auditsc.c 2018-10-20 05:50:20.000000000 +0000
12911@@ -1964,7 +1964,7 @@ static int audit_set_loginuid_perm(kuid_
c2e5f7c8 12912 if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
4bf69007 12913 return -EPERM;
c2e5f7c8 12914 /* it is set, you need permission */
4bf69007
AM
12915- if (!capable(CAP_AUDIT_CONTROL))
12916+ if (!vx_capable(CAP_AUDIT_CONTROL, VXC_AUDIT_CONTROL))
12917 return -EPERM;
c2e5f7c8
JR
12918 /* reject if this is not an unset and we don't allow that */
12919 if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid))
8931d859
AM
12920diff -NurpP --minimal linux-4.4.161/kernel/capability.c linux-4.4.161-vs2.3.9.8/kernel/capability.c
12921--- linux-4.4.161/kernel/capability.c 2018-10-20 02:34:31.000000000 +0000
12922+++ linux-4.4.161-vs2.3.9.8/kernel/capability.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 12923@@ -17,6 +17,7 @@
4bf69007
AM
12924 #include <linux/syscalls.h>
12925 #include <linux/pid_namespace.h>
12926 #include <linux/user_namespace.h>
12927+#include <linux/vs_context.h>
12928 #include <asm/uaccess.h>
12929
12930 /*
927ca606 12931@@ -107,6 +108,7 @@ static int cap_validate_magic(cap_user_h
4bf69007
AM
12932 return 0;
12933 }
12934
2380c486 12935+
4bf69007
AM
12936 /*
12937 * The only thing that can change the capabilities of the current
12938 * process is the current process. As such, we can't be in this code
927ca606 12939@@ -344,6 +346,8 @@ bool has_ns_capability_noaudit(struct ta
4bf69007
AM
12940 return (ret == 0);
12941 }
12942
12943+#include <linux/vserver/base.h>
d337f35e 12944+
4bf69007
AM
12945 /**
12946 * has_capability_noaudit - Does a task have a capability (unaudited) in the
12947 * initial user ns
8931d859
AM
12948diff -NurpP --minimal linux-4.4.161/kernel/compat.c linux-4.4.161-vs2.3.9.8/kernel/compat.c
12949--- linux-4.4.161/kernel/compat.c 2016-01-10 23:01:32.000000000 +0000
12950+++ linux-4.4.161-vs2.3.9.8/kernel/compat.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
12951@@ -27,6 +27,7 @@
12952 #include <linux/times.h>
12953 #include <linux/ptrace.h>
12954 #include <linux/gfp.h>
12955+#include <linux/vs_time.h>
12956
12957 #include <asm/uaccess.h>
12958
927ca606 12959@@ -1059,7 +1060,7 @@ COMPAT_SYSCALL_DEFINE1(stime, compat_tim
4bf69007
AM
12960 if (err)
12961 return err;
12962
12963- do_settimeofday(&tv);
12964+ vx_settimeofday(&tv);
12965 return 0;
12966 }
12967
8931d859
AM
12968diff -NurpP --minimal linux-4.4.161/kernel/cred.c linux-4.4.161-vs2.3.9.8/kernel/cred.c
12969--- linux-4.4.161/kernel/cred.c 2018-10-20 02:34:31.000000000 +0000
12970+++ linux-4.4.161-vs2.3.9.8/kernel/cred.c 2018-10-20 04:57:21.000000000 +0000
927ca606 12971@@ -64,31 +64,6 @@ struct cred init_cred = {
b00e13aa 12972 .group_info = &init_groups,
4bf69007
AM
12973 };
12974
12975-static inline void set_cred_subscribers(struct cred *cred, int n)
12976-{
12977-#ifdef CONFIG_DEBUG_CREDENTIALS
12978- atomic_set(&cred->subscribers, n);
12979-#endif
12980-}
12981-
12982-static inline int read_cred_subscribers(const struct cred *cred)
12983-{
12984-#ifdef CONFIG_DEBUG_CREDENTIALS
12985- return atomic_read(&cred->subscribers);
12986-#else
12987- return 0;
12988-#endif
12989-}
12990-
12991-static inline void alter_cred_subscribers(const struct cred *_cred, int n)
12992-{
12993-#ifdef CONFIG_DEBUG_CREDENTIALS
12994- struct cred *cred = (struct cred *) _cred;
12995-
12996- atomic_add(n, &cred->subscribers);
12997-#endif
12998-}
12999-
13000 /*
b00e13aa 13001 * The RCU callback to actually dispose of a set of credentials
4bf69007 13002 */
927ca606 13003@@ -240,21 +215,16 @@ error:
4bf69007
AM
13004 *
13005 * Call commit_creds() or abort_creds() to clean up.
13006 */
13007-struct cred *prepare_creds(void)
13008+struct cred *__prepare_creds(const struct cred *old)
13009 {
13010- struct task_struct *task = current;
13011- const struct cred *old;
13012 struct cred *new;
13013
13014- validate_process_creds();
13015-
13016 new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
13017 if (!new)
13018 return NULL;
13019
13020 kdebug("prepare_creds() alloc %p", new);
13021
13022- old = task->cred;
13023 memcpy(new, old, sizeof(struct cred));
13024
13025 atomic_set(&new->usage, 1);
927ca606 13026@@ -283,6 +253,13 @@ error:
4bf69007
AM
13027 abort_creds(new);
13028 return NULL;
13029 }
d337f35e 13030+
4bf69007 13031+struct cred *prepare_creds(void)
2380c486 13032+{
4bf69007 13033+ validate_process_creds();
d337f35e 13034+
4bf69007 13035+ return __prepare_creds(current->cred);
2380c486 13036+}
4bf69007
AM
13037 EXPORT_SYMBOL(prepare_creds);
13038
13039 /*
8931d859
AM
13040diff -NurpP --minimal linux-4.4.161/kernel/exit.c linux-4.4.161-vs2.3.9.8/kernel/exit.c
13041--- linux-4.4.161/kernel/exit.c 2018-10-20 02:34:31.000000000 +0000
13042+++ linux-4.4.161-vs2.3.9.8/kernel/exit.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
13043@@ -48,6 +48,10 @@
13044 #include <linux/fs_struct.h>
13045 #include <linux/init_task.h>
13046 #include <linux/perf_event.h>
13047+#include <linux/vs_limit.h>
13048+#include <linux/vs_context.h>
13049+#include <linux/vs_network.h>
13050+#include <linux/vs_pid.h>
13051 #include <trace/events/sched.h>
13052 #include <linux/hw_breakpoint.h>
13053 #include <linux/oom.h>
927ca606 13054@@ -456,14 +460,24 @@ static struct task_struct *find_child_re
4bf69007 13055 struct pid_namespace *pid_ns = task_active_pid_ns(father);
927ca606 13056 struct task_struct *reaper = pid_ns->child_reaper;
af9853d8 13057 struct task_struct *p, *n;
4bf69007 13058+ struct vx_info *vxi = task_get_vx_info(father);
d337f35e 13059+
4bf69007
AM
13060+ if (vxi) {
13061+ BUG_ON(!vxi->vx_reaper);
13062+ if (vxi->vx_reaper != init_pid_ns.child_reaper &&
927ca606 13063+ vxi->vx_reaper != father) {
4bf69007 13064+ reaper = vxi->vx_reaper;
927ca606
AM
13065+ goto out_put;
13066+ }
13067+ }
4bf69007 13068
927ca606
AM
13069 if (likely(reaper != father))
13070- return reaper;
13071+ goto out_put;
13072
13073 reaper = find_alive_thread(father);
13074 if (reaper) {
13075 pid_ns->child_reaper = reaper;
13076- return reaper;
13077+ goto out_put;
4bf69007
AM
13078 }
13079
927ca606
AM
13080 write_unlock_irq(&tasklist_lock);
13081@@ -474,7 +488,10 @@ static struct task_struct *find_child_re
13082 zap_pid_ns_processes(pid_ns);
13083 write_lock_irq(&tasklist_lock);
13084
13085- return father;
13086+ reaper = father;
4bf69007
AM
13087+out_put:
13088+ put_vx_info(vxi);
13089+ return reaper;
13090 }
13091
13092 /*
927ca606
AM
13093@@ -562,9 +579,13 @@ static void forget_original_parent(struc
13094 return;
bb20add7 13095
927ca606
AM
13096 reaper = find_new_reaper(father, reaper);
13097- list_for_each_entry(p, &father->children, sibling) {
13098+ for (p = list_first_entry(&father->children, struct task_struct, sibling);
13099+ &p->sibling != &father->children; ) {
13100+ struct task_struct *next, *this_reaper = reaper;
13101+ if (p == reaper)
13102+ this_reaper = task_active_pid_ns(reaper)->child_reaper;
13103 for_each_thread(p, t) {
4bf69007 13104- t->real_parent = reaper;
927ca606
AM
13105+ t->real_parent = this_reaper;
13106 BUG_ON((!t->ptrace) != (t->parent == father));
13107 if (likely(!t->ptrace))
13108 t->parent = t->real_parent;
13109@@ -576,10 +597,13 @@ static void forget_original_parent(struc
13110 * If this is a threaded reparent there is no need to
13111 * notify anyone anything has happened.
13112 */
13113- if (!same_thread_group(reaper, father))
13114+ if (!same_thread_group(this_reaper, father))
13115 reparent_leader(father, p, dead);
13116+ next = list_next_entry(p, sibling);
13117+ list_add(&p->sibling, &this_reaper->children);
13118+ p = next;
13119 }
13120- list_splice_tail_init(&father->children, &reaper->children);
13121+ INIT_LIST_HEAD(&father->children);
13122 }
13123
13124 /*
13125@@ -763,6 +787,9 @@ void do_exit(long code)
4bf69007 13126 */
c2e5f7c8 13127 flush_ptrace_hw_breakpoint(tsk);
4bf69007
AM
13128
13129+ /* needs to stay before exit_notify() */
13130+ exit_vx_info_early(tsk, code);
d337f35e 13131+
927ca606 13132 TASKS_RCU(preempt_disable());
bb20add7 13133 TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
927ca606 13134 TASKS_RCU(preempt_enable());
0e1bbc97 13135@@ -800,6 +827,10 @@ void do_exit(long code)
4bf69007 13136
0e1bbc97
AM
13137 validate_creds_for_do_exit(tsk);
13138
13139+ /* needs to stay after exit_notify() and before preempt_disable() */
4bf69007
AM
13140+ exit_vx_info(tsk, code);
13141+ exit_nx_info(tsk);
d337f35e 13142+
0e1bbc97
AM
13143 check_stack_usage();
13144 preempt_disable();
13145 if (tsk->nr_dirtied)
13146@@ -826,6 +857,7 @@ void do_exit(long code)
4bf69007
AM
13147 tsk->state = TASK_DEAD;
13148 tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
13149 schedule();
13150+ printk("bad task: %p [%lx]\n", current, current->state);
13151 BUG();
13152 /* Avoid "noreturn function does return". */
13153 for (;;)
8931d859
AM
13154diff -NurpP --minimal linux-4.4.161/kernel/fork.c linux-4.4.161-vs2.3.9.8/kernel/fork.c
13155--- linux-4.4.161/kernel/fork.c 2018-10-20 02:34:31.000000000 +0000
13156+++ linux-4.4.161-vs2.3.9.8/kernel/fork.c 2018-10-20 05:50:20.000000000 +0000
927ca606 13157@@ -76,6 +76,9 @@
09be7631 13158 #include <linux/aio.h>
265de2f7 13159 #include <linux/compiler.h>
927ca606 13160 #include <linux/sysctl.h>
4bf69007
AM
13161+#include <linux/vs_context.h>
13162+#include <linux/vs_network.h>
13163+#include <linux/vs_limit.h>
13164
13165 #include <asm/pgtable.h>
13166 #include <asm/pgalloc.h>
927ca606 13167@@ -227,6 +230,8 @@ void free_task(struct task_struct *tsk)
4bf69007
AM
13168 arch_release_thread_info(tsk->stack);
13169 free_thread_info(tsk->stack);
13170 rt_mutex_debug_task_free(tsk);
13171+ clr_vx_info(&tsk->vx_info);
13172+ clr_nx_info(&tsk->nx_info);
13173 ftrace_graph_exit_task(tsk);
13174 put_seccomp_filter(tsk);
13175 arch_release_task_struct(tsk);
8931d859 13176@@ -1284,6 +1289,8 @@ static struct task_struct *copy_process(
8d50a2ea 13177 {
4bf69007
AM
13178 int retval;
13179 struct task_struct *p;
4bf69007
AM
13180+ struct vx_info *vxi;
13181+ struct nx_info *nxi;
927ca606 13182 void *cgrp_ss_priv[CGROUP_CANFORK_COUNT] = {};
4bf69007
AM
13183
13184 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
8931d859 13185@@ -1357,7 +1364,12 @@ static struct task_struct *copy_process(
4bf69007
AM
13186 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
13187 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
13188 #endif
13189+ init_vx_info(&p->vx_info, current_vx_info());
13190+ init_nx_info(&p->nx_info, current_nx_info());
13191+
13192 retval = -EAGAIN;
13193+ if (!vx_nproc_avail(1))
13194+ goto bad_fork_free;
13195 if (atomic_read(&p->real_cred->user->processes) >=
13196 task_rlimit(p, RLIMIT_NPROC)) {
c2e5f7c8 13197 if (p->real_cred->user != INIT_USER &&
8931d859 13198@@ -1649,6 +1661,18 @@ static struct task_struct *copy_process(
4bf69007
AM
13199 total_forks++;
13200 spin_unlock(&current->sighand->siglock);
bb20add7 13201 syscall_tracepoint_update(p);
4bf69007
AM
13202+
13203+ /* p is copy of current */
13204+ vxi = p->vx_info;
13205+ if (vxi) {
13206+ claim_vx_info(vxi, p);
13207+ atomic_inc(&vxi->cvirt.nr_threads);
13208+ atomic_inc(&vxi->cvirt.total_forks);
13209+ vx_nproc_inc(p);
2380c486 13210+ }
4bf69007
AM
13211+ nxi = p->nx_info;
13212+ if (nxi)
13213+ claim_nx_info(nxi, p);
13214 write_unlock_irq(&tasklist_lock);
bb20add7 13215
4bf69007 13216 proc_fork_connector(p);
8931d859
AM
13217diff -NurpP --minimal linux-4.4.161/kernel/kthread.c linux-4.4.161-vs2.3.9.8/kernel/kthread.c
13218--- linux-4.4.161/kernel/kthread.c 2018-10-20 02:34:31.000000000 +0000
13219+++ linux-4.4.161-vs2.3.9.8/kernel/kthread.c 2018-10-20 05:50:20.000000000 +0000
927ca606 13220@@ -19,6 +19,7 @@
4bf69007 13221 #include <linux/ptrace.h>
09be7631 13222 #include <linux/uaccess.h>
927ca606 13223 #include <linux/cgroup.h>
4bf69007
AM
13224+#include <linux/vs_pid.h>
13225 #include <trace/events/sched.h>
13226
13227 static DEFINE_SPINLOCK(kthread_create_lock);
8931d859
AM
13228diff -NurpP --minimal linux-4.4.161/kernel/nsproxy.c linux-4.4.161-vs2.3.9.8/kernel/nsproxy.c
13229--- linux-4.4.161/kernel/nsproxy.c 2016-01-10 23:01:32.000000000 +0000
13230+++ linux-4.4.161-vs2.3.9.8/kernel/nsproxy.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
13231@@ -20,11 +20,14 @@
13232 #include <linux/mnt_namespace.h>
13233 #include <linux/utsname.h>
13234 #include <linux/pid_namespace.h>
13235+#include <linux/vserver/global.h>
13236+#include <linux/vserver/debug.h>
13237 #include <net/net_namespace.h>
13238 #include <linux/ipc_namespace.h>
09be7631 13239 #include <linux/proc_ns.h>
4bf69007
AM
13240 #include <linux/file.h>
13241 #include <linux/syscalls.h>
13242+#include "../fs/mount.h"
13243
13244 static struct kmem_cache *nsproxy_cachep;
13245
13246@@ -46,8 +49,11 @@ static inline struct nsproxy *create_nsp
13247 struct nsproxy *nsproxy;
13248
13249 nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
13250- if (nsproxy)
13251+ if (nsproxy) {
13252 atomic_set(&nsproxy->count, 1);
13253+ atomic_inc(&vs_global_nsproxy);
13254+ }
13255+ vxdprintk(VXD_CBIT(space, 2), "create_nsproxy = %p[1]", nsproxy);
13256 return nsproxy;
13257 }
13258
b00e13aa 13259@@ -56,9 +62,12 @@ static inline struct nsproxy *create_nsp
4bf69007
AM
13260 * Return the newly created nsproxy. Do not attach this to the task,
13261 * leave it to the caller to do proper locking and attach it to task.
13262 */
13263-static struct nsproxy *create_new_namespaces(unsigned long flags,
b00e13aa
AM
13264- struct task_struct *tsk, struct user_namespace *user_ns,
13265- struct fs_struct *new_fs)
13266+static struct nsproxy *unshare_namespaces(
13267+ unsigned long flags,
13268+ struct nsproxy *orig,
13269+ struct fs_struct *new_fs,
13270+ struct user_namespace *new_user,
13271+ struct pid_namespace *new_pid)
4bf69007
AM
13272 {
13273 struct nsproxy *new_nsp;
13274 int err;
c2e5f7c8 13275@@ -67,32 +76,31 @@ static struct nsproxy *create_new_namesp
4bf69007
AM
13276 if (!new_nsp)
13277 return ERR_PTR(-ENOMEM);
13278
b00e13aa
AM
13279- new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
13280+ new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_user, new_fs);
4bf69007
AM
13281 if (IS_ERR(new_nsp->mnt_ns)) {
13282 err = PTR_ERR(new_nsp->mnt_ns);
13283 goto out_ns;
13284 }
13285
b00e13aa
AM
13286- new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
13287+ new_nsp->uts_ns = copy_utsname(flags, new_user, orig->uts_ns);
4bf69007
AM
13288 if (IS_ERR(new_nsp->uts_ns)) {
13289 err = PTR_ERR(new_nsp->uts_ns);
13290 goto out_uts;
13291 }
13292
b00e13aa
AM
13293- new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
13294+ new_nsp->ipc_ns = copy_ipcs(flags, new_user, orig->ipc_ns);
4bf69007
AM
13295 if (IS_ERR(new_nsp->ipc_ns)) {
13296 err = PTR_ERR(new_nsp->ipc_ns);
13297 goto out_ipc;
13298 }
13299
c2e5f7c8
JR
13300- new_nsp->pid_ns_for_children =
13301- copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
13302+ new_nsp->pid_ns_for_children = copy_pid_ns(flags, new_user, new_pid);
13303 if (IS_ERR(new_nsp->pid_ns_for_children)) {
13304 err = PTR_ERR(new_nsp->pid_ns_for_children);
4bf69007
AM
13305 goto out_pid;
13306 }
13307
b00e13aa
AM
13308- new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
13309+ new_nsp->net_ns = copy_net_ns(flags, new_user, orig->net_ns);
4bf69007
AM
13310 if (IS_ERR(new_nsp->net_ns)) {
13311 err = PTR_ERR(new_nsp->net_ns);
13312 goto out_net;
c2e5f7c8 13313@@ -117,6 +125,41 @@ out_ns:
4bf69007
AM
13314 return ERR_PTR(err);
13315 }
13316
13317+static struct nsproxy *create_new_namespaces(unsigned long flags,
b00e13aa
AM
13318+ struct task_struct *tsk, struct user_namespace *user_ns,
13319+ struct fs_struct *new_fs)
13320+
4bf69007
AM
13321+{
13322+ return unshare_namespaces(flags, tsk->nsproxy,
b00e13aa 13323+ new_fs, user_ns, task_active_pid_ns(tsk));
2380c486 13324+}
d337f35e 13325+
4bf69007
AM
13326+/*
13327+ * copies the nsproxy, setting refcount to 1, and grabbing a
13328+ * reference to all contained namespaces.
13329+ */
13330+struct nsproxy *copy_nsproxy(struct nsproxy *orig)
2380c486 13331+{
4bf69007 13332+ struct nsproxy *ns = create_nsproxy();
d337f35e 13333+
4bf69007
AM
13334+ if (ns) {
13335+ memcpy(ns, orig, sizeof(struct nsproxy));
13336+ atomic_set(&ns->count, 1);
d337f35e 13337+
4bf69007
AM
13338+ if (ns->mnt_ns)
13339+ get_mnt_ns(ns->mnt_ns);
13340+ if (ns->uts_ns)
13341+ get_uts_ns(ns->uts_ns);
13342+ if (ns->ipc_ns)
13343+ get_ipc_ns(ns->ipc_ns);
c2e5f7c8
JR
13344+ if (ns->pid_ns_for_children)
13345+ get_pid_ns(ns->pid_ns_for_children);
4bf69007
AM
13346+ if (ns->net_ns)
13347+ get_net(ns->net_ns);
13348+ }
13349+ return ns;
13350+}
d337f35e 13351+
4bf69007
AM
13352 /*
13353 * called from clone. This now handles copy for nsproxy and all
13354 * namespaces therein.
c2e5f7c8 13355@@ -125,7 +168,10 @@ int copy_namespaces(unsigned long flags,
4bf69007
AM
13356 {
13357 struct nsproxy *old_ns = tsk->nsproxy;
b00e13aa 13358 struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
4bf69007
AM
13359- struct nsproxy *new_ns;
13360+ struct nsproxy *new_ns = NULL;
c2e5f7c8 13361+
4bf69007
AM
13362+ vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08lx,%p[%p])",
13363+ flags, tsk, old_ns);
4bf69007 13364
c2e5f7c8
JR
13365 if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
13366 CLONE_NEWPID | CLONE_NEWNET)))) {
13367@@ -133,7 +179,7 @@ int copy_namespaces(unsigned long flags,
4bf69007 13368 return 0;
4bf69007 13369 }
4bf69007 13370
c2e5f7c8
JR
13371- if (!ns_capable(user_ns, CAP_SYS_ADMIN))
13372+ if (!vx_ns_can_unshare(user_ns, CAP_SYS_ADMIN, flags))
13373 return -EPERM;
13374
13375 /*
13376@@ -152,6 +198,9 @@ int copy_namespaces(unsigned long flags,
13377 return PTR_ERR(new_ns);
13378
13379 tsk->nsproxy = new_ns;
4bf69007 13380+ vxdprintk(VXD_CBIT(space, 3),
c2e5f7c8
JR
13381+ "copy_namespaces(0x%08lx,%p[%p]) = [%p]",
13382+ flags, tsk, old_ns, new_ns);
13383 return 0;
4bf69007
AM
13384 }
13385
c2e5f7c8 13386@@ -165,7 +214,9 @@ void free_nsproxy(struct nsproxy *ns)
4bf69007 13387 put_ipc_ns(ns->ipc_ns);
c2e5f7c8
JR
13388 if (ns->pid_ns_for_children)
13389 put_pid_ns(ns->pid_ns_for_children);
4bf69007
AM
13390- put_net(ns->net_ns);
13391+ if (ns->net_ns)
13392+ put_net(ns->net_ns);
13393+ atomic_dec(&vs_global_nsproxy);
13394 kmem_cache_free(nsproxy_cachep, ns);
13395 }
13396
c2e5f7c8 13397@@ -179,12 +230,16 @@ int unshare_nsproxy_namespaces(unsigned
b00e13aa 13398 struct user_namespace *user_ns;
4bf69007
AM
13399 int err = 0;
13400
13401+ vxdprintk(VXD_CBIT(space, 4),
13402+ "unshare_nsproxy_namespaces(0x%08lx,[%p])",
13403+ unshare_flags, current->nsproxy);
d337f35e 13404+
4bf69007 13405 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
b00e13aa 13406 CLONE_NEWNET | CLONE_NEWPID)))
4bf69007
AM
13407 return 0;
13408
b00e13aa
AM
13409 user_ns = new_cred ? new_cred->user_ns : current_user_ns();
13410- if (!ns_capable(user_ns, CAP_SYS_ADMIN))
13411+ if (!vx_ns_can_unshare(user_ns, CAP_SYS_ADMIN, unshare_flags))
4bf69007
AM
13412 return -EPERM;
13413
b00e13aa 13414 *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
8931d859
AM
13415diff -NurpP --minimal linux-4.4.161/kernel/pid.c linux-4.4.161-vs2.3.9.8/kernel/pid.c
13416--- linux-4.4.161/kernel/pid.c 2018-10-20 02:34:31.000000000 +0000
13417+++ linux-4.4.161-vs2.3.9.8/kernel/pid.c 2018-10-20 04:57:21.000000000 +0000
09be7631 13418@@ -38,6 +38,7 @@
4bf69007 13419 #include <linux/syscalls.h>
09be7631 13420 #include <linux/proc_ns.h>
b00e13aa 13421 #include <linux/proc_fs.h>
4bf69007
AM
13422+#include <linux/vs_pid.h>
13423
13424 #define pid_hashfn(nr, ns) \
13425 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
8931d859 13426@@ -381,7 +382,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
4bf69007
AM
13427
13428 struct pid *find_vpid(int nr)
13429 {
b00e13aa
AM
13430- return find_pid_ns(nr, task_active_pid_ns(current));
13431+ return find_pid_ns(vx_rmap_pid(nr), task_active_pid_ns(current));
4bf69007
AM
13432 }
13433 EXPORT_SYMBOL_GPL(find_vpid);
13434
8931d859 13435@@ -437,6 +438,9 @@ void transfer_pid(struct task_struct *ol
4bf69007
AM
13436 struct task_struct *pid_task(struct pid *pid, enum pid_type type)
13437 {
13438 struct task_struct *result = NULL;
d337f35e 13439+
927ca606 13440+ if (type == __PIDTYPE_REALPID)
4bf69007
AM
13441+ type = PIDTYPE_PID;
13442 if (pid) {
13443 struct hlist_node *first;
13444 first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
8931d859 13445@@ -455,7 +459,7 @@ struct task_struct *find_task_by_pid_ns(
927ca606
AM
13446 {
13447 RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
13448 "find_task_by_pid_ns() needs rcu_read_lock() protection");
4bf69007
AM
13449- return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
13450+ return pid_task(find_pid_ns(vx_rmap_pid(nr), ns), PIDTYPE_PID);
13451 }
13452
13453 struct task_struct *find_task_by_vpid(pid_t vnr)
8931d859 13454@@ -499,7 +503,7 @@ struct pid *find_get_pid(pid_t nr)
4bf69007
AM
13455 }
13456 EXPORT_SYMBOL_GPL(find_get_pid);
13457
13458-pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
13459+pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns)
13460 {
13461 struct upid *upid;
13462 pid_t nr = 0;
8931d859 13463@@ -513,6 +517,11 @@ pid_t pid_nr_ns(struct pid *pid, struct
4bf69007
AM
13464 }
13465 EXPORT_SYMBOL_GPL(pid_nr_ns);
13466
13467+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
2380c486 13468+{
4bf69007
AM
13469+ return vx_map_pid(pid_unmapped_nr_ns(pid, ns));
13470+}
d337f35e 13471+
4bf69007
AM
13472 pid_t pid_vnr(struct pid *pid)
13473 {
b00e13aa 13474 return pid_nr_ns(pid, task_active_pid_ns(current));
8931d859
AM
13475diff -NurpP --minimal linux-4.4.161/kernel/pid_namespace.c linux-4.4.161-vs2.3.9.8/kernel/pid_namespace.c
13476--- linux-4.4.161/kernel/pid_namespace.c 2018-10-20 02:34:31.000000000 +0000
13477+++ linux-4.4.161-vs2.3.9.8/kernel/pid_namespace.c 2018-10-20 04:57:21.000000000 +0000
b00e13aa 13478@@ -18,6 +18,7 @@
09be7631 13479 #include <linux/proc_ns.h>
4bf69007
AM
13480 #include <linux/reboot.h>
13481 #include <linux/export.h>
13482+#include <linux/vserver/global.h>
13483
09be7631
JR
13484 struct pid_cache {
13485 int nr_ids;
927ca606
AM
13486@@ -111,6 +112,7 @@ static struct pid_namespace *create_pid_
13487 ns->ns.ops = &pidns_operations;
4bf69007
AM
13488
13489 kref_init(&ns->kref);
13490+ atomic_inc(&vs_global_pid_ns);
13491 ns->level = level;
13492 ns->parent = get_pid_ns(parent_pid_ns);
b00e13aa 13493 ns->user_ns = get_user_ns(user_ns);
927ca606 13494@@ -128,6 +130,7 @@ static struct pid_namespace *create_pid_
c2e5f7c8
JR
13495 out_free_map:
13496 kfree(ns->pidmap[0].page);
13497 out_free:
4bf69007
AM
13498+ atomic_dec(&vs_global_pid_ns);
13499 kmem_cache_free(pid_ns_cachep, ns);
c2e5f7c8
JR
13500 out:
13501 return ERR_PTR(err);
8931d859
AM
13502diff -NurpP --minimal linux-4.4.161/kernel/printk/printk.c linux-4.4.161-vs2.3.9.8/kernel/printk/printk.c
13503--- linux-4.4.161/kernel/printk/printk.c 2018-10-20 02:34:31.000000000 +0000
13504+++ linux-4.4.161-vs2.3.9.8/kernel/printk/printk.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 13505@@ -46,6 +46,7 @@
09be7631 13506 #include <linux/utsname.h>
bb20add7 13507 #include <linux/ctype.h>
927ca606 13508 #include <linux/uio.h>
4bf69007
AM
13509+#include <linux/vs_cvirt.h>
13510
13511 #include <asm/uaccess.h>
13512
927ca606
AM
13513@@ -502,7 +503,7 @@ int check_syslog_permissions(int type, i
13514 goto ok;
4bf69007
AM
13515
13516 if (syslog_action_restricted(type)) {
13517- if (capable(CAP_SYSLOG))
13518+ if (vx_capable(CAP_SYSLOG, VXC_SYSLOG))
927ca606 13519 goto ok;
092a4f51
JR
13520 /*
13521 * For historical reasons, accept CAP_SYS_ADMIN too, with
927ca606 13522@@ -1304,12 +1305,9 @@ int do_syslog(int type, char __user *buf
4bf69007 13523 if (error)
927ca606 13524 goto out;
4bf69007
AM
13525
13526- switch (type) {
13527- case SYSLOG_ACTION_CLOSE: /* Close log */
13528- break;
13529- case SYSLOG_ACTION_OPEN: /* Open log */
13530- break;
13531- case SYSLOG_ACTION_READ: /* Read from log */
13532+ if ((type == SYSLOG_ACTION_READ) ||
13533+ (type == SYSLOG_ACTION_READ_ALL) ||
13534+ (type == SYSLOG_ACTION_READ_CLEAR)) {
13535 error = -EINVAL;
13536 if (!buf || len < 0)
13537 goto out;
927ca606 13538@@ -1320,6 +1318,16 @@ int do_syslog(int type, char __user *buf
4bf69007
AM
13539 error = -EFAULT;
13540 goto out;
13541 }
13542+ }
13543+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
13544+ return vx_do_syslog(type, buf, len);
d337f35e 13545+
4bf69007
AM
13546+ switch (type) {
13547+ case SYSLOG_ACTION_CLOSE: /* Close log */
13548+ break;
13549+ case SYSLOG_ACTION_OPEN: /* Open log */
13550+ break;
13551+ case SYSLOG_ACTION_READ: /* Read from log */
13552 error = wait_event_interruptible(log_wait,
13553 syslog_seq != log_next_seq);
13554 if (error)
927ca606 13555@@ -1332,16 +1340,6 @@ int do_syslog(int type, char __user *buf
4bf69007
AM
13556 /* FALL THRU */
13557 /* Read last kernel messages */
13558 case SYSLOG_ACTION_READ_ALL:
13559- error = -EINVAL;
13560- if (!buf || len < 0)
13561- goto out;
13562- error = 0;
13563- if (!len)
13564- goto out;
13565- if (!access_ok(VERIFY_WRITE, buf, len)) {
13566- error = -EFAULT;
13567- goto out;
13568- }
13569 error = syslog_print_all(buf, len, clear);
13570 break;
13571 /* Clear ring buffer */
8931d859
AM
13572diff -NurpP --minimal linux-4.4.161/kernel/ptrace.c linux-4.4.161-vs2.3.9.8/kernel/ptrace.c
13573--- linux-4.4.161/kernel/ptrace.c 2018-10-20 02:34:31.000000000 +0000
13574+++ linux-4.4.161-vs2.3.9.8/kernel/ptrace.c 2018-10-20 04:57:21.000000000 +0000
09be7631 13575@@ -23,6 +23,7 @@
4bf69007
AM
13576 #include <linux/syscalls.h>
13577 #include <linux/uaccess.h>
13578 #include <linux/regset.h>
13579+#include <linux/vs_context.h>
13580 #include <linux/hw_breakpoint.h>
13581 #include <linux/cn_proc.h>
09be7631 13582 #include <linux/compat.h>
fc310d58 13583@@ -301,6 +301,11 @@ ok:
b00e13aa 13584
fc310d58
AM
13585 if (mode & PTRACE_MODE_SCHED)
13586 return 0;
4bf69007
AM
13587+ if (!vx_check(task->xid, VS_ADMIN_P|VS_WATCH_P|VS_IDENT))
13588+ return -EPERM;
13589+ if (!vx_check(task->xid, VS_IDENT) &&
fc310d58 13590+ !task_vx_flags(task, VXF_STATE_ADMIN, 0))
4bf69007 13591+ return -EACCES;
4bf69007
AM
13592 return security_ptrace_access_check(task, mode);
13593 }
b00e13aa 13594
8931d859
AM
13595diff -NurpP --minimal linux-4.4.161/kernel/reboot.c linux-4.4.161-vs2.3.9.8/kernel/reboot.c
13596--- linux-4.4.161/kernel/reboot.c 2016-01-10 23:01:32.000000000 +0000
13597+++ linux-4.4.161-vs2.3.9.8/kernel/reboot.c 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8
JR
13598@@ -16,6 +16,7 @@
13599 #include <linux/syscalls.h>
13600 #include <linux/syscore_ops.h>
13601 #include <linux/uaccess.h>
13602+#include <linux/vs_pid.h>
13603
13604 /*
13605 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
bb20add7 13606@@ -269,6 +270,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off);
c2e5f7c8
JR
13607
13608 static DEFINE_MUTEX(reboot_mutex);
13609
13610+long vs_reboot(unsigned int, void __user *);
13611+
13612 /*
13613 * Reboot system call: for obvious reasons only root may call it,
13614 * and even root needs to set up some magic numbers in the registers
bb20add7 13615@@ -311,6 +314,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int
c2e5f7c8
JR
13616 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
13617 cmd = LINUX_REBOOT_CMD_HALT;
13618
13619+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
13620+ return vs_reboot(cmd, arg);
13621+
13622 mutex_lock(&reboot_mutex);
13623 switch (cmd) {
13624 case LINUX_REBOOT_CMD_RESTART:
8931d859
AM
13625diff -NurpP --minimal linux-4.4.161/kernel/sched/core.c linux-4.4.161-vs2.3.9.8/kernel/sched/core.c
13626--- linux-4.4.161/kernel/sched/core.c 2018-10-20 02:34:31.000000000 +0000
13627+++ linux-4.4.161-vs2.3.9.8/kernel/sched/core.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 13628@@ -74,6 +74,8 @@
4bf69007 13629 #include <linux/binfmts.h>
b00e13aa 13630 #include <linux/context_tracking.h>
265de2f7 13631 #include <linux/compiler.h>
4bf69007
AM
13632+#include <linux/vs_sched.h>
13633+#include <linux/vs_cvirt.h>
13634
13635 #include <asm/switch_to.h>
13636 #include <asm/tlb.h>
8931d859 13637@@ -3560,7 +3562,7 @@ SYSCALL_DEFINE1(nice, int, increment)
4bf69007 13638
bb20add7 13639 nice = clamp_val(nice, MIN_NICE, MAX_NICE);
4bf69007
AM
13640 if (increment < 0 && !can_nice(current, nice))
13641- return -EPERM;
13642+ return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
13643
13644 retval = security_task_setnice(current, nice);
13645 if (retval)
8931d859
AM
13646diff -NurpP --minimal linux-4.4.161/kernel/sched/cputime.c linux-4.4.161-vs2.3.9.8/kernel/sched/cputime.c
13647--- linux-4.4.161/kernel/sched/cputime.c 2018-10-20 02:34:31.000000000 +0000
13648+++ linux-4.4.161-vs2.3.9.8/kernel/sched/cputime.c 2018-10-20 04:57:21.000000000 +0000
b00e13aa 13649@@ -4,6 +4,7 @@
4bf69007
AM
13650 #include <linux/kernel_stat.h>
13651 #include <linux/static_key.h>
b00e13aa 13652 #include <linux/context_tracking.h>
4bf69007
AM
13653+#include <linux/vs_sched.h>
13654 #include "sched.h"
13655
13656
bb20add7 13657@@ -135,14 +136,17 @@ static inline void task_group_account_fi
4bf69007
AM
13658 void account_user_time(struct task_struct *p, cputime_t cputime,
13659 cputime_t cputime_scaled)
13660 {
13661+ struct vx_info *vxi = p->vx_info; /* p is _always_ current */
ca5d134c 13662+ int nice = (task_nice(p) > 0);
4bf69007
AM
13663 int index;
13664
13665 /* Add user time to process. */
13666 p->utime += cputime;
13667 p->utimescaled += cputime_scaled;
13668+ vx_account_user(vxi, cputime, nice);
13669 account_group_user_time(p, cputime);
13670
ca5d134c 13671- index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
4bf69007
AM
13672+ index = (nice) ? CPUTIME_NICE : CPUTIME_USER;
13673
13674 /* Add user time to cpustat. */
13675 task_group_account_field(p, index, (__force u64) cputime);
bb20add7 13676@@ -189,9 +193,12 @@ static inline
ca5d134c
JR
13677 void __account_system_time(struct task_struct *p, cputime_t cputime,
13678 cputime_t cputime_scaled, int index)
13679 {
13680+ struct vx_info *vxi = p->vx_info; /* p is _always_ current */
13681+
13682 /* Add system time to process. */
13683 p->stime += cputime;
13684 p->stimescaled += cputime_scaled;
13685+ vx_account_system(vxi, cputime, 0 /* do we have idle time? */);
13686 account_group_system_time(p, cputime);
13687
13688 /* Add system time to cpustat. */
8931d859
AM
13689diff -NurpP --minimal linux-4.4.161/kernel/sched/fair.c linux-4.4.161-vs2.3.9.8/kernel/sched/fair.c
13690--- linux-4.4.161/kernel/sched/fair.c 2018-10-20 02:34:31.000000000 +0000
13691+++ linux-4.4.161-vs2.3.9.8/kernel/sched/fair.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 13692@@ -30,6 +30,7 @@
b00e13aa
AM
13693 #include <linux/mempolicy.h>
13694 #include <linux/migrate.h>
13695 #include <linux/task_work.h>
4bf69007
AM
13696+#include <linux/vs_cvirt.h>
13697
13698 #include <trace/events/sched.h>
13699
8931d859 13700@@ -3056,6 +3057,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
4bf69007
AM
13701 __enqueue_entity(cfs_rq, se);
13702 se->on_rq = 1;
13703
13704+ if (entity_is_task(se))
13705+ vx_activate_task(task_of(se));
13706 if (cfs_rq->nr_running == 1) {
13707 list_add_leaf_cfs_rq(cfs_rq);
13708 check_enqueue_throttle(cfs_rq);
8931d859 13709@@ -3137,6 +3140,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
4bf69007
AM
13710 if (se != cfs_rq->curr)
13711 __dequeue_entity(cfs_rq, se);
13712 se->on_rq = 0;
13713+ if (entity_is_task(se))
13714+ vx_deactivate_task(task_of(se));
4bf69007
AM
13715 account_entity_dequeue(cfs_rq, se);
13716
b00e13aa 13717 /*
8931d859
AM
13718diff -NurpP --minimal linux-4.4.161/kernel/sched/loadavg.c linux-4.4.161-vs2.3.9.8/kernel/sched/loadavg.c
13719--- linux-4.4.161/kernel/sched/loadavg.c 2018-10-20 02:34:31.000000000 +0000
13720+++ linux-4.4.161-vs2.3.9.8/kernel/sched/loadavg.c 2018-10-20 04:57:21.000000000 +0000
1d9ad342
AM
13721@@ -73,9 +73,16 @@ EXPORT_SYMBOL(avenrun); /* should be rem
13722 */
13723 void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
13724 {
13725- loads[0] = (avenrun[0] + offset) << shift;
13726- loads[1] = (avenrun[1] + offset) << shift;
13727- loads[2] = (avenrun[2] + offset) << shift;
13728+ if (vx_flags(VXF_VIRT_LOAD, 0)) {
13729+ struct vx_info *vxi = current_vx_info();
13730+ loads[0] = (vxi->cvirt.load[0] + offset) << shift;
13731+ loads[1] = (vxi->cvirt.load[1] + offset) << shift;
13732+ loads[2] = (vxi->cvirt.load[2] + offset) << shift;
13733+ } else {
13734+ loads[0] = (avenrun[0] + offset) << shift;
13735+ loads[1] = (avenrun[1] + offset) << shift;
13736+ loads[2] = (avenrun[2] + offset) << shift;
13737+ }
13738 }
13739
13740 long calc_load_fold_active(struct rq *this_rq)
8931d859
AM
13741diff -NurpP --minimal linux-4.4.161/kernel/signal.c linux-4.4.161-vs2.3.9.8/kernel/signal.c
13742--- linux-4.4.161/kernel/signal.c 2018-10-20 02:34:31.000000000 +0000
13743+++ linux-4.4.161-vs2.3.9.8/kernel/signal.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 13744@@ -34,6 +34,8 @@
b00e13aa 13745 #include <linux/compat.h>
09be7631 13746 #include <linux/cn_proc.h>
265de2f7 13747 #include <linux/compiler.h>
4bf69007
AM
13748+#include <linux/vs_context.h>
13749+#include <linux/vs_pid.h>
265de2f7 13750
4bf69007
AM
13751 #define CREATE_TRACE_POINTS
13752 #include <trace/events/signal.h>
f19bd705 13753@@ -726,9 +728,18 @@ static int check_kill_permission(int sig
4bf69007
AM
13754 struct pid *sid;
13755 int error;
13756
13757+ vxdprintk(VXD_CBIT(misc, 7),
13758+ "check_kill_permission(%d,%p,%p[#%u,%u])",
13759+ sig, info, t, vx_task_xid(t), t->pid);
d337f35e 13760+
4bf69007
AM
13761 if (!valid_signal(sig))
13762 return -EINVAL;
13763
13764+/* FIXME: needed? if so, why?
13765+ if ((info != SEND_SIG_NOINFO) &&
13766+ (is_si_special(info) || !si_fromuser(info)))
13767+ goto skip; */
d337f35e 13768+
4bf69007
AM
13769 if (!si_fromuser(info))
13770 return 0;
13771
f19bd705 13772@@ -752,6 +763,20 @@ static int check_kill_permission(int sig
4bf69007
AM
13773 }
13774 }
13775
13776+ error = -EPERM;
13777+ if (t->pid == 1 && current->xid)
13778+ return error;
d337f35e 13779+
4bf69007
AM
13780+ error = -ESRCH;
13781+ /* FIXME: we shouldn't return ESRCH ever, to avoid
13782+ loops, maybe ENOENT or EACCES? */
13783+ if (!vx_check(vx_task_xid(t), VS_WATCH_P | VS_IDENT)) {
13784+ vxdprintk(current->xid || VXD_CBIT(misc, 7),
13785+ "signal %d[%p] xid mismatch %p[#%u,%u] xid=#%u",
13786+ sig, info, t, vx_task_xid(t), t->pid, current->xid);
13787+ return error;
2380c486 13788+ }
4bf69007
AM
13789+/* skip: */
13790 return security_task_kill(t, info, sig, 0);
13791 }
13792
f19bd705 13793@@ -1303,8 +1328,14 @@ int kill_pid_info(int sig, struct siginf
927ca606
AM
13794 for (;;) {
13795 rcu_read_lock();
13796 p = pid_task(pid, PIDTYPE_PID);
13797- if (p)
13798- error = group_send_sig_info(sig, info, p);
13799+ if (p) {
13800+ if (vx_check(vx_task_xid(p), VS_IDENT))
13801+ error = group_send_sig_info(sig, info, p);
13802+ else {
13803+ rcu_read_unlock();
13804+ return -ESRCH;
13805+ }
13806+ }
13807 rcu_read_unlock();
13808 if (likely(!p || error != -ESRCH))
13809 return error;
f19bd705 13810@@ -1349,7 +1380,7 @@ int kill_pid_info_as_cred(int sig, struc
4bf69007
AM
13811
13812 rcu_read_lock();
13813 p = pid_task(pid, PIDTYPE_PID);
13814- if (!p) {
13815+ if (!p || !vx_check(vx_task_xid(p), VS_IDENT)) {
13816 ret = -ESRCH;
13817 goto out_unlock;
13818 }
8931d859 13819@@ -1405,8 +1436,10 @@ static int kill_something_info(int sig,
4bf69007
AM
13820 struct task_struct * p;
13821
13822 for_each_process(p) {
13823- if (task_pid_vnr(p) > 1 &&
13824- !same_thread_group(p, current)) {
13825+ if (vx_check(vx_task_xid(p), VS_ADMIN|VS_IDENT) &&
13826+ task_pid_vnr(p) > 1 &&
13827+ !same_thread_group(p, current) &&
13828+ !vx_current_initpid(p->pid)) {
13829 int err = group_send_sig_info(sig, info, p);
13830 ++count;
13831 if (err != -EPERM)
8931d859 13832@@ -2259,6 +2292,11 @@ relock:
4bf69007
AM
13833 !sig_kernel_only(signr))
13834 continue;
13835
13836+ /* virtual init is protected against user signals */
bb20add7 13837+ if ((ksig->info.si_code == SI_USER) &&
4bf69007
AM
13838+ vx_current_initpid(current->pid))
13839+ continue;
d337f35e 13840+
4bf69007
AM
13841 if (sig_kernel_stop(signr)) {
13842 /*
13843 * The default action is to stop all threads in
8931d859
AM
13844diff -NurpP --minimal linux-4.4.161/kernel/softirq.c linux-4.4.161-vs2.3.9.8/kernel/softirq.c
13845--- linux-4.4.161/kernel/softirq.c 2016-01-10 23:01:32.000000000 +0000
13846+++ linux-4.4.161-vs2.3.9.8/kernel/softirq.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 13847@@ -26,6 +26,7 @@
4bf69007
AM
13848 #include <linux/smpboot.h>
13849 #include <linux/tick.h>
265de2f7 13850 #include <linux/irq.h>
4bf69007
AM
13851+#include <linux/vs_context.h>
13852
13853 #define CREATE_TRACE_POINTS
13854 #include <trace/events/irq.h>
8931d859
AM
13855diff -NurpP --minimal linux-4.4.161/kernel/sys.c linux-4.4.161-vs2.3.9.8/kernel/sys.c
13856--- linux-4.4.161/kernel/sys.c 2018-10-20 02:34:31.000000000 +0000
13857+++ linux-4.4.161-vs2.3.9.8/kernel/sys.c 2018-10-20 05:50:20.000000000 +0000
13858@@ -56,6 +56,7 @@
13859 #include <linux/nospec.h>
4bf69007
AM
13860
13861 #include <linux/kmsg_dump.h>
b00e13aa 13862+#include <linux/vs_pid.h>
4bf69007 13863 /* Move somewhere else to avoid recompiling? */
b00e13aa
AM
13864 #include <generated/utsrelease.h>
13865
8931d859 13866@@ -159,7 +160,10 @@ static int set_one_prio(struct task_stru
4bf69007
AM
13867 goto out;
13868 }
13869 if (niceval < task_nice(p) && !can_nice(p, niceval)) {
13870- error = -EACCES;
13871+ if (vx_flags(VXF_IGNEG_NICE, 0))
13872+ error = 0;
13873+ else
13874+ error = -EACCES;
13875 goto out;
13876 }
13877 no_nice = security_task_setnice(p, niceval);
8931d859 13878@@ -210,6 +214,8 @@ SYSCALL_DEFINE3(setpriority, int, which,
bb20add7
AM
13879 else
13880 pgrp = task_pgrp(current);
13881 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
13882+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13883+ continue;
13884 error = set_one_prio(p, niceval, error);
13885 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
13886 break;
8931d859 13887@@ -276,6 +282,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
bb20add7
AM
13888 else
13889 pgrp = task_pgrp(current);
13890 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
13891+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13892+ continue;
13893 niceval = nice_to_rlimit(task_nice(p));
13894 if (niceval > retval)
13895 retval = niceval;
8931d859 13896@@ -292,6 +300,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
bb20add7
AM
13897 goto out_unlock; /* No processes for this user */
13898 }
13899 do_each_thread(g, p) {
13900+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13901+ continue;
927ca606 13902 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
bb20add7 13903 niceval = nice_to_rlimit(task_nice(p));
4bf69007 13904 if (niceval > retval)
8931d859 13905@@ -1211,7 +1221,8 @@ SYSCALL_DEFINE2(sethostname, char __user
4bf69007
AM
13906 int errno;
13907 char tmp[__NEW_UTS_LEN];
13908
13909- if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
13910+ if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
13911+ CAP_SYS_ADMIN, VXC_SET_UTSNAME))
13912 return -EPERM;
13913
13914 if (len < 0 || len > __NEW_UTS_LEN)
8931d859 13915@@ -1264,7 +1275,8 @@ SYSCALL_DEFINE2(setdomainname, char __us
4bf69007
AM
13916 int errno;
13917 char tmp[__NEW_UTS_LEN];
13918
13919- if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
13920+ if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
13921+ CAP_SYS_ADMIN, VXC_SET_UTSNAME))
13922 return -EPERM;
13923 if (len < 0 || len > __NEW_UTS_LEN)
13924 return -EINVAL;
8931d859 13925@@ -1384,7 +1396,7 @@ int do_prlimit(struct task_struct *tsk,
4bf69007
AM
13926 /* Keep the capable check against init_user_ns until
13927 cgroups can contain all limits */
13928 if (new_rlim->rlim_max > rlim->rlim_max &&
13929- !capable(CAP_SYS_RESOURCE))
13930+ !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
13931 retval = -EPERM;
13932 if (!retval)
13933 retval = security_task_setrlimit(tsk->group_leader,
8931d859 13934@@ -1437,7 +1449,8 @@ static int check_prlimit_permission(stru
4bf69007
AM
13935 gid_eq(cred->gid, tcred->sgid) &&
13936 gid_eq(cred->gid, tcred->gid))
13937 return 0;
13938- if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
13939+ if (vx_ns_capable(tcred->user_ns,
13940+ CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
13941 return 0;
13942
13943 return -EPERM;
8931d859
AM
13944diff -NurpP --minimal linux-4.4.161/kernel/sysctl.c linux-4.4.161-vs2.3.9.8/kernel/sysctl.c
13945--- linux-4.4.161/kernel/sysctl.c 2018-10-20 02:34:31.000000000 +0000
13946+++ linux-4.4.161-vs2.3.9.8/kernel/sysctl.c 2018-10-20 05:50:20.000000000 +0000
927ca606 13947@@ -87,6 +87,7 @@
4bf69007
AM
13948 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
13949 #include <linux/lockdep.h>
13950 #endif
13951+extern char vshelper_path[];
13952 #ifdef CONFIG_CHR_DEV_SG
13953 #include <scsi/sg.h>
13954 #endif
927ca606 13955@@ -279,6 +280,13 @@ static int max_extfrag_threshold = 1000;
bb20add7
AM
13956
13957 static struct ctl_table kern_table[] = {
13958 {
4bf69007
AM
13959+ .procname = "vshelper",
13960+ .data = &vshelper_path,
13961+ .maxlen = 256,
13962+ .mode = 0644,
bb20add7 13963+ .proc_handler = proc_dostring,
4bf69007 13964+ },
bb20add7
AM
13965+ {
13966 .procname = "sched_child_runs_first",
13967 .data = &sysctl_sched_child_runs_first,
13968 .maxlen = sizeof(unsigned int),
8931d859 13969@@ -1386,7 +1394,6 @@ static struct ctl_table vm_table[] = {
927ca606
AM
13970 .extra1 = &zero,
13971 .extra2 = &one,
bb20add7
AM
13972 },
13973-
13974 #endif /* CONFIG_COMPACTION */
4bf69007 13975 {
bb20add7 13976 .procname = "min_free_kbytes",
8931d859
AM
13977diff -NurpP --minimal linux-4.4.161/kernel/sysctl_binary.c linux-4.4.161-vs2.3.9.8/kernel/sysctl_binary.c
13978--- linux-4.4.161/kernel/sysctl_binary.c 2018-10-20 02:34:31.000000000 +0000
13979+++ linux-4.4.161-vs2.3.9.8/kernel/sysctl_binary.c 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 13980@@ -73,6 +73,7 @@ static const struct bin_table bin_kern_t
4bf69007
AM
13981
13982 { CTL_INT, KERN_PANIC, "panic" },
13983 { CTL_INT, KERN_REALROOTDEV, "real-root-dev" },
13984+ { CTL_STR, KERN_VSHELPER, "vshelper" },
13985
13986 { CTL_STR, KERN_SPARC_REBOOT, "reboot-cmd" },
13987 { CTL_INT, KERN_CTLALTDEL, "ctrl-alt-del" },
8931d859
AM
13988diff -NurpP --minimal linux-4.4.161/kernel/time/posix-timers.c linux-4.4.161-vs2.3.9.8/kernel/time/posix-timers.c
13989--- linux-4.4.161/kernel/time/posix-timers.c 2018-10-20 02:34:31.000000000 +0000
13990+++ linux-4.4.161-vs2.3.9.8/kernel/time/posix-timers.c 2018-10-20 04:57:21.000000000 +0000
bb20add7
AM
13991@@ -48,6 +48,7 @@
13992 #include <linux/workqueue.h>
13993 #include <linux/export.h>
13994 #include <linux/hashtable.h>
13995+#include <linux/vs_context.h>
4bf69007 13996
bb20add7 13997 #include "timekeeping.h"
4bf69007 13998
927ca606 13999@@ -407,6 +408,7 @@ int posix_timer_event(struct k_itimer *t
bb20add7
AM
14000 {
14001 struct task_struct *task;
14002 int shared, ret = -1;
14003+
14004 /*
14005 * FIXME: if ->sigq is queued we can race with
14006 * dequeue_signal()->do_schedule_next_timer().
927ca606 14007@@ -423,10 +425,18 @@ int posix_timer_event(struct k_itimer *t
bb20add7
AM
14008 rcu_read_lock();
14009 task = pid_task(timr->it_pid, PIDTYPE_PID);
14010 if (task) {
14011+ struct vx_info_save vxis;
14012+ struct vx_info *vxi;
14013+
14014+ vxi = get_vx_info(task->vx_info);
14015+ enter_vx_info(vxi, &vxis);
14016 shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
14017 ret = send_sigqueue(timr->sigq, task, shared);
14018+ leave_vx_info(&vxis);
14019+ put_vx_info(vxi);
14020 }
14021 rcu_read_unlock();
14022+
14023 /* If we failed to send the signal the timer stops. */
14024 return ret > 0;
4bf69007 14025 }
8931d859
AM
14026diff -NurpP --minimal linux-4.4.161/kernel/time/time.c linux-4.4.161-vs2.3.9.8/kernel/time/time.c
14027--- linux-4.4.161/kernel/time/time.c 2018-10-20 02:34:31.000000000 +0000
14028+++ linux-4.4.161-vs2.3.9.8/kernel/time/time.c 2018-10-20 04:57:21.000000000 +0000
14029@@ -38,6 +38,7 @@
4bf69007
AM
14030 #include <linux/fs.h>
14031 #include <linux/math64.h>
14032 #include <linux/ptrace.h>
14033+#include <linux/vs_time.h>
14034
14035 #include <asm/uaccess.h>
14036 #include <asm/unistd.h>
8931d859 14037@@ -94,7 +95,7 @@ SYSCALL_DEFINE1(stime, time_t __user *,
4bf69007
AM
14038 if (err)
14039 return err;
14040
14041- do_settimeofday(&tv);
14042+ vx_settimeofday(&tv);
14043 return 0;
14044 }
14045
8931d859 14046@@ -187,7 +188,7 @@ int do_sys_settimeofday(const struct tim
4bf69007
AM
14047 }
14048 }
14049 if (tv)
14050- return do_settimeofday(tv);
14051+ return vx_settimeofday(tv);
14052 return 0;
14053 }
14054
8931d859
AM
14055diff -NurpP --minimal linux-4.4.161/kernel/time/timekeeping.c linux-4.4.161-vs2.3.9.8/kernel/time/timekeeping.c
14056--- linux-4.4.161/kernel/time/timekeeping.c 2018-10-20 02:34:31.000000000 +0000
14057+++ linux-4.4.161-vs2.3.9.8/kernel/time/timekeeping.c 2018-10-20 04:57:21.000000000 +0000
bb20add7
AM
14058@@ -23,6 +23,7 @@
14059 #include <linux/stop_machine.h>
14060 #include <linux/pvclock_gtod.h>
14061 #include <linux/compiler.h>
14062+#include <linux/vs_time.h>
14063
14064 #include "tick-internal.h"
14065 #include "ntp_internal.h"
8931d859 14066@@ -920,7 +921,9 @@ void ktime_get_raw_and_real_ts64(struct
bb20add7
AM
14067 } while (read_seqcount_retry(&tk_core.seq, seq));
14068
927ca606 14069 timespec64_add_ns(ts_raw, nsecs_raw);
bb20add7 14070+ vx_adjust_timespec(ts_raw);
927ca606 14071 timespec64_add_ns(ts_real, nsecs_real);
bb20add7
AM
14072+ vx_adjust_timespec(ts_real);
14073 }
927ca606 14074 EXPORT_SYMBOL(ktime_get_raw_and_real_ts64);
bb20add7 14075
8931d859
AM
14076diff -NurpP --minimal linux-4.4.161/kernel/time/timer.c linux-4.4.161-vs2.3.9.8/kernel/time/timer.c
14077--- linux-4.4.161/kernel/time/timer.c 2018-10-20 02:34:31.000000000 +0000
14078+++ linux-4.4.161-vs2.3.9.8/kernel/time/timer.c 2018-10-20 04:57:21.000000000 +0000
09be7631 14079@@ -42,6 +42,10 @@
b00e13aa 14080 #include <linux/sched/sysctl.h>
4bf69007 14081 #include <linux/slab.h>
09be7631 14082 #include <linux/compat.h>
4bf69007
AM
14083+#include <linux/vs_base.h>
14084+#include <linux/vs_cvirt.h>
14085+#include <linux/vs_pid.h>
14086+#include <linux/vserver/sched.h>
14087
14088 #include <asm/uaccess.h>
14089 #include <asm/unistd.h>
8931d859
AM
14090diff -NurpP --minimal linux-4.4.161/kernel/user_namespace.c linux-4.4.161-vs2.3.9.8/kernel/user_namespace.c
14091--- linux-4.4.161/kernel/user_namespace.c 2018-10-20 02:34:31.000000000 +0000
14092+++ linux-4.4.161-vs2.3.9.8/kernel/user_namespace.c 2018-10-20 05:50:20.000000000 +0000
b00e13aa 14093@@ -22,6 +22,7 @@
4bf69007
AM
14094 #include <linux/ctype.h>
14095 #include <linux/projid.h>
b00e13aa 14096 #include <linux/fs_struct.h>
4bf69007
AM
14097+#include <linux/vserver/global.h>
14098
14099 static struct kmem_cache *user_ns_cachep __read_mostly;
bb20add7 14100 static DEFINE_MUTEX(userns_state_mutex);
927ca606 14101@@ -97,6 +98,7 @@ int create_user_ns(struct cred *new)
4bf69007 14102
b00e13aa
AM
14103 atomic_set(&ns->count, 1);
14104 /* Leave the new->user_ns reference with the new user namespace. */
4bf69007
AM
14105+ atomic_inc(&vs_global_user_ns);
14106 ns->parent = parent_ns;
09be7631 14107 ns->level = parent_ns->level + 1;
4bf69007 14108 ns->owner = owner;
927ca606
AM
14109@@ -145,6 +147,7 @@ void free_user_ns(struct user_namespace
14110 key_put(ns->persistent_keyring_register);
14111 #endif
14112 ns_free_inum(&ns->ns);
14113+ atomic_dec(&vs_global_user_ns);
14114 kmem_cache_free(user_ns_cachep, ns);
14115 ns = parent;
14116 } while (atomic_dec_and_test(&parent->count));
14117@@ -358,6 +361,18 @@ gid_t from_kgid_munged(struct user_names
bb20add7
AM
14118 }
14119 EXPORT_SYMBOL(from_kgid_munged);
14120
14121+ktag_t make_ktag(struct user_namespace *from, vtag_t tag)
14122+{
14123+ return KTAGT_INIT(tag);
14124+}
14125+EXPORT_SYMBOL(make_ktag);
14126+
14127+vtag_t from_ktag(struct user_namespace *to, ktag_t tag)
14128+{
14129+ return __ktag_val(tag);
14130+}
14131+EXPORT_SYMBOL(from_ktag);
14132+
14133 /**
14134 * make_kprojid - Map a user-namespace projid pair into a kprojid.
14135 * @ns: User namespace that the projid is in
8931d859
AM
14136diff -NurpP --minimal linux-4.4.161/kernel/utsname.c linux-4.4.161-vs2.3.9.8/kernel/utsname.c
14137--- linux-4.4.161/kernel/utsname.c 2016-01-10 23:01:32.000000000 +0000
14138+++ linux-4.4.161-vs2.3.9.8/kernel/utsname.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
14139@@ -16,14 +16,17 @@
14140 #include <linux/slab.h>
14141 #include <linux/user_namespace.h>
09be7631 14142 #include <linux/proc_ns.h>
4bf69007
AM
14143+#include <linux/vserver/global.h>
14144
14145 static struct uts_namespace *create_uts_ns(void)
14146 {
14147 struct uts_namespace *uts_ns;
14148
14149 uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
14150- if (uts_ns)
14151+ if (uts_ns) {
c2e5f7c8 14152 kref_init(&uts_ns->kref);
4bf69007
AM
14153+ atomic_inc(&vs_global_uts_ns);
14154+ }
14155 return uts_ns;
14156 }
14157
927ca606 14158@@ -87,6 +90,7 @@ void free_uts_ns(struct kref *kref)
4bf69007
AM
14159 ns = container_of(kref, struct uts_namespace, kref);
14160 put_user_ns(ns->user_ns);
927ca606 14161 ns_free_inum(&ns->ns);
4bf69007
AM
14162+ atomic_dec(&vs_global_uts_ns);
14163 kfree(ns);
14164 }
14165
8931d859
AM
14166diff -NurpP --minimal linux-4.4.161/kernel/vserver/Kconfig linux-4.4.161-vs2.3.9.8/kernel/vserver/Kconfig
14167--- linux-4.4.161/kernel/vserver/Kconfig 1970-01-01 00:00:00.000000000 +0000
14168+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/Kconfig 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 14169@@ -0,0 +1,230 @@
4bf69007
AM
14170+#
14171+# Linux VServer configuration
14172+#
d337f35e 14173+
4bf69007 14174+menu "Linux VServer"
d337f35e 14175+
4bf69007
AM
14176+config VSERVER_AUTO_LBACK
14177+ bool "Automatically Assign Loopback IP"
14178+ default y
14179+ help
14180+ Automatically assign a guest specific loopback
14181+ IP and add it to the kernel network stack on
14182+ startup.
d337f35e 14183+
4bf69007
AM
14184+config VSERVER_AUTO_SINGLE
14185+ bool "Automatic Single IP Special Casing"
c2e5f7c8 14186+ default n
4bf69007
AM
14187+ help
14188+ This allows network contexts with a single IP to
14189+ automatically remap 0.0.0.0 bindings to that IP,
14190+ avoiding further network checks and improving
14191+ performance.
d337f35e 14192+
4bf69007
AM
14193+ (note: such guests do not allow to change the ip
14194+ on the fly and do not show loopback addresses)
2380c486 14195+
4bf69007
AM
14196+config VSERVER_COWBL
14197+ bool "Enable COW Immutable Link Breaking"
14198+ default y
14199+ help
14200+ This enables the COW (Copy-On-Write) link break code.
14201+ It allows you to treat unified files like normal files
14202+ when writing to them (which will implicitely break the
14203+ link and create a copy of the unified file)
d337f35e 14204+
4bf69007 14205+config VSERVER_VTIME
c2e5f7c8 14206+ bool "Enable Virtualized Guest Time (EXPERIMENTAL)"
4bf69007
AM
14207+ default n
14208+ help
14209+ This enables per guest time offsets to allow for
14210+ adjusting the system clock individually per guest.
14211+ this adds some overhead to the time functions and
14212+ therefore should not be enabled without good reason.
d337f35e 14213+
4bf69007 14214+config VSERVER_DEVICE
c2e5f7c8 14215+ bool "Enable Guest Device Mapping (EXPERIMENTAL)"
4bf69007
AM
14216+ default n
14217+ help
14218+ This enables generic device remapping.
d337f35e 14219+
4bf69007
AM
14220+config VSERVER_PROC_SECURE
14221+ bool "Enable Proc Security"
14222+ depends on PROC_FS
14223+ default y
14224+ help
14225+ This configures ProcFS security to initially hide
14226+ non-process entries for all contexts except the main and
14227+ spectator context (i.e. for all guests), which is a secure
14228+ default.
d337f35e 14229+
4bf69007 14230+ (note: on 1.2x the entries were visible by default)
d337f35e 14231+
4bf69007
AM
14232+choice
14233+ prompt "Persistent Inode Tagging"
14234+ default TAGGING_ID24
14235+ help
14236+ This adds persistent context information to filesystems
14237+ mounted with the tagxid option. Tagging is a requirement
14238+ for per-context disk limits and per-context quota.
d337f35e 14239+
d337f35e 14240+
4bf69007
AM
14241+config TAGGING_NONE
14242+ bool "Disabled"
14243+ help
14244+ do not store per-context information in inodes.
d337f35e 14245+
4bf69007
AM
14246+config TAGGING_UID16
14247+ bool "UID16/GID32"
14248+ help
14249+ reduces UID to 16 bit, but leaves GID at 32 bit.
d337f35e 14250+
4bf69007
AM
14251+config TAGGING_GID16
14252+ bool "UID32/GID16"
14253+ help
14254+ reduces GID to 16 bit, but leaves UID at 32 bit.
d337f35e 14255+
4bf69007
AM
14256+config TAGGING_ID24
14257+ bool "UID24/GID24"
14258+ help
14259+ uses the upper 8bit from UID and GID for XID tagging
14260+ which leaves 24bit for UID/GID each, which should be
14261+ more than sufficient for normal use.
d337f35e 14262+
4bf69007
AM
14263+config TAGGING_INTERN
14264+ bool "UID32/GID32"
14265+ help
14266+ this uses otherwise reserved inode fields in the on
14267+ disk representation, which limits the use to a few
14268+ filesystems (currently ext2 and ext3)
d337f35e 14269+
4bf69007 14270+endchoice
d337f35e 14271+
4bf69007
AM
14272+config TAG_NFSD
14273+ bool "Tag NFSD User Auth and Files"
14274+ default n
14275+ help
14276+ Enable this if you do want the in-kernel NFS
14277+ Server to use the tagging specified above.
14278+ (will require patched clients too)
2380c486 14279+
4bf69007
AM
14280+config VSERVER_PRIVACY
14281+ bool "Honor Privacy Aspects of Guests"
14282+ default n
14283+ help
14284+ When enabled, most context checks will disallow
14285+ access to structures assigned to a specific context,
14286+ like ptys or loop devices.
2380c486 14287+
4bf69007
AM
14288+config VSERVER_CONTEXTS
14289+ int "Maximum number of Contexts (1-65533)" if EMBEDDED
14290+ range 1 65533
14291+ default "768" if 64BIT
14292+ default "256"
14293+ help
14294+ This setting will optimize certain data structures
14295+ and memory allocations according to the expected
14296+ maximum.
2380c486 14297+
4bf69007 14298+ note: this is not a strict upper limit.
2380c486 14299+
4bf69007
AM
14300+config VSERVER_WARN
14301+ bool "VServer Warnings"
14302+ default y
14303+ help
14304+ This enables various runtime warnings, which will
14305+ notify about potential manipulation attempts or
14306+ resource shortage. It is generally considered to
14307+ be a good idea to have that enabled.
2380c486 14308+
4bf69007
AM
14309+config VSERVER_WARN_DEVPTS
14310+ bool "VServer DevPTS Warnings"
14311+ depends on VSERVER_WARN
14312+ default y
14313+ help
14314+ This enables DevPTS related warnings, issued when a
14315+ process inside a context tries to lookup or access
14316+ a dynamic pts from the host or a different context.
d337f35e 14317+
4bf69007
AM
14318+config VSERVER_DEBUG
14319+ bool "VServer Debugging Code"
14320+ default n
14321+ help
14322+ Set this to yes if you want to be able to activate
14323+ debugging output at runtime. It adds a very small
14324+ overhead to all vserver related functions and
14325+ increases the kernel size by about 20k.
d337f35e 14326+
4bf69007
AM
14327+config VSERVER_HISTORY
14328+ bool "VServer History Tracing"
14329+ depends on VSERVER_DEBUG
14330+ default n
14331+ help
14332+ Set this to yes if you want to record the history of
14333+ linux-vserver activities, so they can be replayed in
14334+ the event of a kernel panic or oops.
d337f35e 14335+
4bf69007
AM
14336+config VSERVER_HISTORY_SIZE
14337+ int "Per-CPU History Size (32-65536)"
14338+ depends on VSERVER_HISTORY
14339+ range 32 65536
14340+ default 64
14341+ help
14342+ This allows you to specify the number of entries in
14343+ the per-CPU history buffer.
d337f35e 14344+
4bf69007
AM
14345+config VSERVER_EXTRA_MNT_CHECK
14346+ bool "Extra Checks for Reachability"
14347+ default n
14348+ help
14349+ Set this to yes if you want to do extra checks for
14350+ vfsmount reachability in the proc filesystem code.
14351+ This shouldn't be required on any setup utilizing
14352+ mnt namespaces.
d337f35e 14353+
4bf69007
AM
14354+choice
14355+ prompt "Quotes used in debug and warn messages"
14356+ default QUOTES_ISO8859
d337f35e 14357+
4bf69007
AM
14358+config QUOTES_ISO8859
14359+ bool "Extended ASCII (ISO 8859) angle quotes"
14360+ help
14361+ This uses the extended ASCII characters \xbb
14362+ and \xab for quoting file and process names.
d337f35e 14363+
4bf69007
AM
14364+config QUOTES_UTF8
14365+ bool "UTF-8 angle quotes"
14366+ help
14367+ This uses the the UTF-8 sequences for angle
14368+ quotes to quote file and process names.
d337f35e 14369+
4bf69007
AM
14370+config QUOTES_ASCII
14371+ bool "ASCII single quotes"
14372+ help
14373+ This uses the ASCII single quote character
14374+ (\x27) to quote file and process names.
d337f35e 14375+
4bf69007 14376+endchoice
d337f35e 14377+
4bf69007 14378+endmenu
d337f35e 14379+
d337f35e 14380+
4bf69007
AM
14381+config VSERVER
14382+ bool
14383+ default y
14384+ select NAMESPACES
14385+ select UTS_NS
14386+ select IPC_NS
14387+# select USER_NS
14388+ select SYSVIPC
d337f35e 14389+
4bf69007
AM
14390+config VSERVER_SECURITY
14391+ bool
14392+ depends on SECURITY
14393+ default y
14394+ select SECURITY_CAPABILITIES
d337f35e 14395+
4bf69007
AM
14396+config VSERVER_DISABLED
14397+ bool
14398+ default n
d337f35e 14399+
8931d859
AM
14400diff -NurpP --minimal linux-4.4.161/kernel/vserver/Makefile linux-4.4.161-vs2.3.9.8/kernel/vserver/Makefile
14401--- linux-4.4.161/kernel/vserver/Makefile 1970-01-01 00:00:00.000000000 +0000
14402+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/Makefile 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
14403@@ -0,0 +1,18 @@
14404+#
14405+# Makefile for the Linux vserver routines.
14406+#
d337f35e 14407+
d337f35e 14408+
4bf69007 14409+obj-y += vserver.o
2380c486 14410+
4bf69007
AM
14411+vserver-y := switch.o context.o space.o sched.o network.o inode.o \
14412+ limit.o cvirt.o cacct.o signal.o helper.o init.o \
14413+ dlimit.o tag.o
d337f35e 14414+
4bf69007
AM
14415+vserver-$(CONFIG_INET) += inet.o
14416+vserver-$(CONFIG_PROC_FS) += proc.o
14417+vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
14418+vserver-$(CONFIG_VSERVER_HISTORY) += history.o
14419+vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
14420+vserver-$(CONFIG_VSERVER_DEVICE) += device.o
d337f35e 14421+
8931d859
AM
14422diff -NurpP --minimal linux-4.4.161/kernel/vserver/cacct.c linux-4.4.161-vs2.3.9.8/kernel/vserver/cacct.c
14423--- linux-4.4.161/kernel/vserver/cacct.c 1970-01-01 00:00:00.000000000 +0000
14424+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/cacct.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
14425@@ -0,0 +1,42 @@
14426+/*
14427+ * linux/kernel/vserver/cacct.c
14428+ *
14429+ * Virtual Server: Context Accounting
14430+ *
d6221c00 14431+ * Copyright (C) 2006-2007 Herbert P?tzl
4bf69007
AM
14432+ *
14433+ * V0.01 added accounting stats
14434+ *
14435+ */
d337f35e 14436+
4bf69007
AM
14437+#include <linux/types.h>
14438+#include <linux/vs_context.h>
14439+#include <linux/vserver/cacct_cmd.h>
14440+#include <linux/vserver/cacct_int.h>
d337f35e 14441+
4bf69007
AM
14442+#include <asm/errno.h>
14443+#include <asm/uaccess.h>
14444+
14445+
14446+int vc_sock_stat(struct vx_info *vxi, void __user *data)
d337f35e 14447+{
4bf69007
AM
14448+ struct vcmd_sock_stat_v0 vc_data;
14449+ int j, field;
d337f35e 14450+
2380c486
JR
14451+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
14452+ return -EFAULT;
14453+
4bf69007
AM
14454+ field = vc_data.field;
14455+ if ((field < 0) || (field >= VXA_SOCK_SIZE))
14456+ return -EINVAL;
7e46296a 14457+
4bf69007
AM
14458+ for (j = 0; j < 3; j++) {
14459+ vc_data.count[j] = vx_sock_count(&vxi->cacct, field, j);
14460+ vc_data.total[j] = vx_sock_total(&vxi->cacct, field, j);
14461+ }
7e46296a
AM
14462+
14463+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
14464+ return -EFAULT;
14465+ return 0;
14466+}
14467+
8931d859
AM
14468diff -NurpP --minimal linux-4.4.161/kernel/vserver/cacct_init.h linux-4.4.161-vs2.3.9.8/kernel/vserver/cacct_init.h
14469--- linux-4.4.161/kernel/vserver/cacct_init.h 1970-01-01 00:00:00.000000000 +0000
14470+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/cacct_init.h 2018-10-20 04:57:21.000000000 +0000
4bf69007 14471@@ -0,0 +1,25 @@
7e46296a
AM
14472+
14473+
4bf69007 14474+static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
265d6dcc 14475+{
4bf69007 14476+ int i, j;
265d6dcc 14477+
265d6dcc 14478+
4bf69007
AM
14479+ for (i = 0; i < VXA_SOCK_SIZE; i++) {
14480+ for (j = 0; j < 3; j++) {
14481+ atomic_long_set(&cacct->sock[i][j].count, 0);
14482+ atomic_long_set(&cacct->sock[i][j].total, 0);
14483+ }
14484+ }
14485+ for (i = 0; i < 8; i++)
14486+ atomic_set(&cacct->slab[i], 0);
14487+ for (i = 0; i < 5; i++)
14488+ for (j = 0; j < 4; j++)
14489+ atomic_set(&cacct->page[i][j], 0);
265d6dcc
JR
14490+}
14491+
4bf69007 14492+static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
265d6dcc 14493+{
4bf69007 14494+ return;
265d6dcc
JR
14495+}
14496+
8931d859
AM
14497diff -NurpP --minimal linux-4.4.161/kernel/vserver/cacct_proc.h linux-4.4.161-vs2.3.9.8/kernel/vserver/cacct_proc.h
14498--- linux-4.4.161/kernel/vserver/cacct_proc.h 1970-01-01 00:00:00.000000000 +0000
14499+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/cacct_proc.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
14500@@ -0,0 +1,53 @@
14501+#ifndef _VX_CACCT_PROC_H
14502+#define _VX_CACCT_PROC_H
265d6dcc 14503+
4bf69007 14504+#include <linux/vserver/cacct_int.h>
d337f35e 14505+
d337f35e 14506+
4bf69007
AM
14507+#define VX_SOCKA_TOP \
14508+ "Type\t recv #/bytes\t\t send #/bytes\t\t fail #/bytes\n"
d337f35e 14509+
4bf69007 14510+static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
d337f35e 14511+{
4bf69007
AM
14512+ int i, j, length = 0;
14513+ static char *type[VXA_SOCK_SIZE] = {
14514+ "UNSPEC", "UNIX", "INET", "INET6", "PACKET", "OTHER"
14515+ };
d337f35e 14516+
4bf69007
AM
14517+ length += sprintf(buffer + length, VX_SOCKA_TOP);
14518+ for (i = 0; i < VXA_SOCK_SIZE; i++) {
14519+ length += sprintf(buffer + length, "%s:", type[i]);
14520+ for (j = 0; j < 3; j++) {
14521+ length += sprintf(buffer + length,
14522+ "\t%10lu/%-10lu",
14523+ vx_sock_count(cacct, i, j),
14524+ vx_sock_total(cacct, i, j));
14525+ }
14526+ buffer[length++] = '\n';
14527+ }
d337f35e 14528+
4bf69007
AM
14529+ length += sprintf(buffer + length, "\n");
14530+ length += sprintf(buffer + length,
14531+ "slab:\t %8u %8u %8u %8u\n",
14532+ atomic_read(&cacct->slab[1]),
14533+ atomic_read(&cacct->slab[4]),
14534+ atomic_read(&cacct->slab[0]),
14535+ atomic_read(&cacct->slab[2]));
d337f35e 14536+
4bf69007
AM
14537+ length += sprintf(buffer + length, "\n");
14538+ for (i = 0; i < 5; i++) {
14539+ length += sprintf(buffer + length,
14540+ "page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n", i,
14541+ atomic_read(&cacct->page[i][0]),
14542+ atomic_read(&cacct->page[i][1]),
14543+ atomic_read(&cacct->page[i][2]),
14544+ atomic_read(&cacct->page[i][3]),
14545+ atomic_read(&cacct->page[i][4]),
14546+ atomic_read(&cacct->page[i][5]),
14547+ atomic_read(&cacct->page[i][6]),
14548+ atomic_read(&cacct->page[i][7]));
14549+ }
14550+ return length;
14551+}
d337f35e 14552+
4bf69007 14553+#endif /* _VX_CACCT_PROC_H */
8931d859
AM
14554diff -NurpP --minimal linux-4.4.161/kernel/vserver/context.c linux-4.4.161-vs2.3.9.8/kernel/vserver/context.c
14555--- linux-4.4.161/kernel/vserver/context.c 1970-01-01 00:00:00.000000000 +0000
14556+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/context.c 2018-10-20 04:57:21.000000000 +0000
4bf69007 14557@@ -0,0 +1,1119 @@
2380c486 14558+/*
4bf69007 14559+ * linux/kernel/vserver/context.c
2380c486 14560+ *
4bf69007 14561+ * Virtual Server: Context Support
2380c486 14562+ *
d6221c00 14563+ * Copyright (C) 2003-2011 Herbert P?tzl
2380c486 14564+ *
4bf69007
AM
14565+ * V0.01 context helper
14566+ * V0.02 vx_ctx_kill syscall command
14567+ * V0.03 replaced context_info calls
14568+ * V0.04 redesign of struct (de)alloc
14569+ * V0.05 rlimit basic implementation
14570+ * V0.06 task_xid and info commands
14571+ * V0.07 context flags and caps
14572+ * V0.08 switch to RCU based hash
14573+ * V0.09 revert to non RCU for now
14574+ * V0.10 and back to working RCU hash
14575+ * V0.11 and back to locking again
14576+ * V0.12 referenced context store
14577+ * V0.13 separate per cpu data
14578+ * V0.14 changed vcmds to vxi arg
14579+ * V0.15 added context stat
14580+ * V0.16 have __create claim() the vxi
14581+ * V0.17 removed older and legacy stuff
14582+ * V0.18 added user credentials
14583+ * V0.19 added warn mask
2380c486
JR
14584+ *
14585+ */
d337f35e 14586+
4bf69007 14587+#include <linux/slab.h>
2380c486 14588+#include <linux/types.h>
4bf69007
AM
14589+#include <linux/security.h>
14590+#include <linux/pid_namespace.h>
14591+#include <linux/capability.h>
1e8b8f9b 14592+
4bf69007
AM
14593+#include <linux/vserver/context.h>
14594+#include <linux/vserver/network.h>
14595+#include <linux/vserver/debug.h>
14596+#include <linux/vserver/limit.h>
14597+#include <linux/vserver/limit_int.h>
14598+#include <linux/vserver/space.h>
14599+#include <linux/init_task.h>
14600+#include <linux/fs_struct.h>
14601+#include <linux/cred.h>
1e8b8f9b 14602+
4bf69007
AM
14603+#include <linux/vs_context.h>
14604+#include <linux/vs_limit.h>
14605+#include <linux/vs_pid.h>
14606+#include <linux/vserver/context_cmd.h>
d337f35e 14607+
4bf69007
AM
14608+#include "cvirt_init.h"
14609+#include "cacct_init.h"
14610+#include "limit_init.h"
14611+#include "sched_init.h"
d337f35e 14612+
d337f35e 14613+
4bf69007
AM
14614+atomic_t vx_global_ctotal = ATOMIC_INIT(0);
14615+atomic_t vx_global_cactive = ATOMIC_INIT(0);
d337f35e 14616+
d337f35e 14617+
4bf69007 14618+/* now inactive context structures */
d337f35e 14619+
4bf69007 14620+static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
2380c486 14621+
4bf69007 14622+static DEFINE_SPINLOCK(vx_info_inactive_lock);
d337f35e 14623+
2380c486 14624+
4bf69007 14625+/* __alloc_vx_info()
d337f35e 14626+
4bf69007
AM
14627+ * allocate an initialized vx_info struct
14628+ * doesn't make it visible (hash) */
d337f35e 14629+
61333608 14630+static struct vx_info *__alloc_vx_info(vxid_t xid)
4bf69007
AM
14631+{
14632+ struct vx_info *new = NULL;
14633+ int cpu, index;
d337f35e 14634+
4bf69007 14635+ vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
d337f35e 14636+
4bf69007
AM
14637+ /* would this benefit from a slab cache? */
14638+ new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
14639+ if (!new)
14640+ return 0;
2380c486 14641+
4bf69007
AM
14642+ memset(new, 0, sizeof(struct vx_info));
14643+#ifdef CONFIG_SMP
14644+ new->ptr_pc = alloc_percpu(struct _vx_info_pc);
14645+ if (!new->ptr_pc)
14646+ goto error;
14647+#endif
14648+ new->vx_id = xid;
14649+ INIT_HLIST_NODE(&new->vx_hlist);
14650+ atomic_set(&new->vx_usecnt, 0);
14651+ atomic_set(&new->vx_tasks, 0);
14652+ new->vx_parent = NULL;
14653+ new->vx_state = 0;
14654+ init_waitqueue_head(&new->vx_wait);
2380c486 14655+
4bf69007
AM
14656+ /* prepare reaper */
14657+ get_task_struct(init_pid_ns.child_reaper);
14658+ new->vx_reaper = init_pid_ns.child_reaper;
14659+ new->vx_badness_bias = 0;
d337f35e 14660+
4bf69007
AM
14661+ /* rest of init goes here */
14662+ vx_info_init_limit(&new->limit);
14663+ vx_info_init_sched(&new->sched);
14664+ vx_info_init_cvirt(&new->cvirt);
14665+ vx_info_init_cacct(&new->cacct);
d337f35e 14666+
4bf69007
AM
14667+ /* per cpu data structures */
14668+ for_each_possible_cpu(cpu) {
14669+ vx_info_init_sched_pc(
14670+ &vx_per_cpu(new, sched_pc, cpu), cpu);
14671+ vx_info_init_cvirt_pc(
14672+ &vx_per_cpu(new, cvirt_pc, cpu), cpu);
14673+ }
d337f35e 14674+
4bf69007
AM
14675+ new->vx_flags = VXF_INIT_SET;
14676+ new->vx_bcaps = CAP_FULL_SET; // maybe ~CAP_SETPCAP
14677+ new->vx_ccaps = 0;
14678+ new->vx_umask = 0;
14679+ new->vx_wmask = 0;
d337f35e 14680+
4bf69007
AM
14681+ new->reboot_cmd = 0;
14682+ new->exit_code = 0;
d337f35e 14683+
4bf69007
AM
14684+ // preconfig spaces
14685+ for (index = 0; index < VX_SPACES; index++) {
14686+ struct _vx_space *space = &new->space[index];
d337f35e 14687+
4bf69007
AM
14688+ // filesystem
14689+ spin_lock(&init_fs.lock);
14690+ init_fs.users++;
14691+ spin_unlock(&init_fs.lock);
14692+ space->vx_fs = &init_fs;
2380c486 14693+
4bf69007
AM
14694+ /* FIXME: do we want defaults? */
14695+ // space->vx_real_cred = 0;
14696+ // space->vx_cred = 0;
2380c486 14697+ }
4bf69007
AM
14698+
14699+
14700+ vxdprintk(VXD_CBIT(xid, 0),
14701+ "alloc_vx_info(%d) = %p", xid, new);
14702+ vxh_alloc_vx_info(new);
14703+ atomic_inc(&vx_global_ctotal);
14704+ return new;
14705+#ifdef CONFIG_SMP
14706+error:
14707+ kfree(new);
14708+ return 0;
14709+#endif
d337f35e
JR
14710+}
14711+
4bf69007 14712+/* __dealloc_vx_info()
d337f35e 14713+
4bf69007 14714+ * final disposal of vx_info */
d337f35e 14715+
4bf69007 14716+static void __dealloc_vx_info(struct vx_info *vxi)
d337f35e 14717+{
4bf69007
AM
14718+#ifdef CONFIG_VSERVER_WARN
14719+ struct vx_info_save vxis;
14720+ int cpu;
14721+#endif
14722+ vxdprintk(VXD_CBIT(xid, 0),
14723+ "dealloc_vx_info(%p)", vxi);
14724+ vxh_dealloc_vx_info(vxi);
d337f35e 14725+
4bf69007
AM
14726+#ifdef CONFIG_VSERVER_WARN
14727+ enter_vx_info(vxi, &vxis);
14728+ vx_info_exit_limit(&vxi->limit);
14729+ vx_info_exit_sched(&vxi->sched);
14730+ vx_info_exit_cvirt(&vxi->cvirt);
14731+ vx_info_exit_cacct(&vxi->cacct);
d337f35e 14732+
4bf69007
AM
14733+ for_each_possible_cpu(cpu) {
14734+ vx_info_exit_sched_pc(
14735+ &vx_per_cpu(vxi, sched_pc, cpu), cpu);
14736+ vx_info_exit_cvirt_pc(
14737+ &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
14738+ }
14739+ leave_vx_info(&vxis);
14740+#endif
d337f35e 14741+
4bf69007
AM
14742+ vxi->vx_id = -1;
14743+ vxi->vx_state |= VXS_RELEASED;
d337f35e 14744+
4bf69007
AM
14745+#ifdef CONFIG_SMP
14746+ free_percpu(vxi->ptr_pc);
14747+#endif
14748+ kfree(vxi);
14749+ atomic_dec(&vx_global_ctotal);
d337f35e
JR
14750+}
14751+
4bf69007 14752+static void __shutdown_vx_info(struct vx_info *vxi)
d337f35e 14753+{
4bf69007
AM
14754+ struct nsproxy *nsproxy;
14755+ struct fs_struct *fs;
14756+ struct cred *cred;
14757+ int index, kill;
d337f35e 14758+
4bf69007 14759+ might_sleep();
d337f35e 14760+
4bf69007
AM
14761+ vxi->vx_state |= VXS_SHUTDOWN;
14762+ vs_state_change(vxi, VSC_SHUTDOWN);
d337f35e 14763+
4bf69007
AM
14764+ for (index = 0; index < VX_SPACES; index++) {
14765+ struct _vx_space *space = &vxi->space[index];
d337f35e 14766+
4bf69007
AM
14767+ nsproxy = xchg(&space->vx_nsproxy, NULL);
14768+ if (nsproxy)
14769+ put_nsproxy(nsproxy);
2380c486 14770+
4bf69007
AM
14771+ fs = xchg(&space->vx_fs, NULL);
14772+ spin_lock(&fs->lock);
14773+ kill = !--fs->users;
14774+ spin_unlock(&fs->lock);
14775+ if (kill)
14776+ free_fs_struct(fs);
d337f35e 14777+
4bf69007
AM
14778+ cred = (struct cred *)xchg(&space->vx_cred, NULL);
14779+ if (cred)
14780+ abort_creds(cred);
14781+ }
d337f35e
JR
14782+}
14783+
4bf69007 14784+/* exported stuff */
d337f35e 14785+
4bf69007 14786+void free_vx_info(struct vx_info *vxi)
d337f35e 14787+{
4bf69007
AM
14788+ unsigned long flags;
14789+ unsigned index;
d337f35e 14790+
4bf69007
AM
14791+ /* check for reference counts first */
14792+ BUG_ON(atomic_read(&vxi->vx_usecnt));
14793+ BUG_ON(atomic_read(&vxi->vx_tasks));
2380c486 14794+
4bf69007
AM
14795+ /* context must not be hashed */
14796+ BUG_ON(vx_info_state(vxi, VXS_HASHED));
d337f35e 14797+
4bf69007
AM
14798+ /* context shutdown is mandatory */
14799+ BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
d337f35e 14800+
4bf69007
AM
14801+ /* spaces check */
14802+ for (index = 0; index < VX_SPACES; index++) {
14803+ struct _vx_space *space = &vxi->space[index];
d337f35e 14804+
4bf69007
AM
14805+ BUG_ON(space->vx_nsproxy);
14806+ BUG_ON(space->vx_fs);
14807+ // BUG_ON(space->vx_real_cred);
14808+ // BUG_ON(space->vx_cred);
14809+ }
d337f35e 14810+
4bf69007
AM
14811+ spin_lock_irqsave(&vx_info_inactive_lock, flags);
14812+ hlist_del(&vxi->vx_hlist);
14813+ spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
d337f35e 14814+
4bf69007
AM
14815+ __dealloc_vx_info(vxi);
14816+}
eab5a9a6 14817+
d337f35e 14818+
4bf69007 14819+/* hash table for vx_info hash */
93de0823 14820+
4bf69007 14821+#define VX_HASH_SIZE 13
d337f35e 14822+
4bf69007
AM
14823+static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
14824+ { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
d337f35e 14825+
4bf69007 14826+static DEFINE_SPINLOCK(vx_info_hash_lock);
d337f35e 14827+
93de0823 14828+
61333608 14829+static inline unsigned int __hashval(vxid_t xid)
4bf69007
AM
14830+{
14831+ return (xid % VX_HASH_SIZE);
d337f35e
JR
14832+}
14833+
14834+
d337f35e 14835+
4bf69007 14836+/* __hash_vx_info()
d337f35e 14837+
4bf69007
AM
14838+ * add the vxi to the global hash table
14839+ * requires the hash_lock to be held */
d337f35e 14840+
4bf69007 14841+static inline void __hash_vx_info(struct vx_info *vxi)
d337f35e 14842+{
4bf69007 14843+ struct hlist_head *head;
d337f35e 14844+
4bf69007
AM
14845+ vxd_assert_lock(&vx_info_hash_lock);
14846+ vxdprintk(VXD_CBIT(xid, 4),
14847+ "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
14848+ vxh_hash_vx_info(vxi);
d337f35e 14849+
4bf69007
AM
14850+ /* context must not be hashed */
14851+ BUG_ON(vx_info_state(vxi, VXS_HASHED));
d337f35e 14852+
4bf69007
AM
14853+ vxi->vx_state |= VXS_HASHED;
14854+ head = &vx_info_hash[__hashval(vxi->vx_id)];
14855+ hlist_add_head(&vxi->vx_hlist, head);
14856+ atomic_inc(&vx_global_cactive);
2380c486 14857+}
d337f35e 14858+
4bf69007 14859+/* __unhash_vx_info()
d337f35e 14860+
4bf69007
AM
14861+ * remove the vxi from the global hash table
14862+ * requires the hash_lock to be held */
d337f35e 14863+
4bf69007 14864+static inline void __unhash_vx_info(struct vx_info *vxi)
d337f35e 14865+{
4bf69007
AM
14866+ unsigned long flags;
14867+
14868+ vxd_assert_lock(&vx_info_hash_lock);
14869+ vxdprintk(VXD_CBIT(xid, 4),
14870+ "__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
14871+ atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
14872+ vxh_unhash_vx_info(vxi);
14873+
14874+ /* context must be hashed */
14875+ BUG_ON(!vx_info_state(vxi, VXS_HASHED));
14876+ /* but without tasks */
14877+ BUG_ON(atomic_read(&vxi->vx_tasks));
14878+
14879+ vxi->vx_state &= ~VXS_HASHED;
14880+ hlist_del_init(&vxi->vx_hlist);
14881+ spin_lock_irqsave(&vx_info_inactive_lock, flags);
14882+ hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
14883+ spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
14884+ atomic_dec(&vx_global_cactive);
2380c486 14885+}
d337f35e 14886+
d337f35e 14887+
4bf69007 14888+/* __lookup_vx_info()
d337f35e 14889+
4bf69007
AM
14890+ * requires the hash_lock to be held
14891+ * doesn't increment the vx_refcnt */
2380c486 14892+
61333608 14893+static inline struct vx_info *__lookup_vx_info(vxid_t xid)
d337f35e 14894+{
4bf69007
AM
14895+ struct hlist_head *head = &vx_info_hash[__hashval(xid)];
14896+ struct hlist_node *pos;
14897+ struct vx_info *vxi;
d337f35e 14898+
4bf69007
AM
14899+ vxd_assert_lock(&vx_info_hash_lock);
14900+ hlist_for_each(pos, head) {
14901+ vxi = hlist_entry(pos, struct vx_info, vx_hlist);
d337f35e 14902+
4bf69007
AM
14903+ if (vxi->vx_id == xid)
14904+ goto found;
14905+ }
14906+ vxi = NULL;
14907+found:
14908+ vxdprintk(VXD_CBIT(xid, 0),
14909+ "__lookup_vx_info(#%u): %p[#%u]",
14910+ xid, vxi, vxi ? vxi->vx_id : 0);
14911+ vxh_lookup_vx_info(vxi, xid);
14912+ return vxi;
14913+}
d337f35e 14914+
d337f35e 14915+
4bf69007 14916+/* __create_vx_info()
d337f35e 14917+
4bf69007
AM
14918+ * create the requested context
14919+ * get(), claim() and hash it */
2380c486 14920+
4bf69007
AM
14921+static struct vx_info *__create_vx_info(int id)
14922+{
14923+ struct vx_info *new, *vxi = NULL;
2380c486 14924+
4bf69007 14925+ vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
d337f35e 14926+
4bf69007
AM
14927+ if (!(new = __alloc_vx_info(id)))
14928+ return ERR_PTR(-ENOMEM);
d337f35e 14929+
4bf69007
AM
14930+ /* required to make dynamic xids unique */
14931+ spin_lock(&vx_info_hash_lock);
d337f35e 14932+
4bf69007
AM
14933+ /* static context requested */
14934+ if ((vxi = __lookup_vx_info(id))) {
14935+ vxdprintk(VXD_CBIT(xid, 0),
14936+ "create_vx_info(%d) = %p (already there)", id, vxi);
14937+ if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
14938+ vxi = ERR_PTR(-EBUSY);
14939+ else
14940+ vxi = ERR_PTR(-EEXIST);
14941+ goto out_unlock;
14942+ }
14943+ /* new context */
14944+ vxdprintk(VXD_CBIT(xid, 0),
14945+ "create_vx_info(%d) = %p (new)", id, new);
14946+ claim_vx_info(new, NULL);
14947+ __hash_vx_info(get_vx_info(new));
14948+ vxi = new, new = NULL;
d337f35e 14949+
4bf69007
AM
14950+out_unlock:
14951+ spin_unlock(&vx_info_hash_lock);
14952+ vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
14953+ if (new)
14954+ __dealloc_vx_info(new);
14955+ return vxi;
14956+}
d337f35e 14957+
d337f35e 14958+
4bf69007 14959+/* exported stuff */
d337f35e 14960+
d337f35e 14961+
4bf69007 14962+void unhash_vx_info(struct vx_info *vxi)
d337f35e 14963+{
4bf69007
AM
14964+ spin_lock(&vx_info_hash_lock);
14965+ __unhash_vx_info(vxi);
14966+ spin_unlock(&vx_info_hash_lock);
14967+ __shutdown_vx_info(vxi);
14968+ __wakeup_vx_info(vxi);
2380c486 14969+}
d337f35e 14970+
2380c486 14971+
4bf69007 14972+/* lookup_vx_info()
2380c486 14973+
4bf69007
AM
14974+ * search for a vx_info and get() it
14975+ * negative id means current */
2380c486 14976+
4bf69007 14977+struct vx_info *lookup_vx_info(int id)
2380c486 14978+{
4bf69007
AM
14979+ struct vx_info *vxi = NULL;
14980+
14981+ if (id < 0) {
14982+ vxi = get_vx_info(current_vx_info());
14983+ } else if (id > 1) {
14984+ spin_lock(&vx_info_hash_lock);
14985+ vxi = get_vx_info(__lookup_vx_info(id));
14986+ spin_unlock(&vx_info_hash_lock);
2380c486 14987+ }
4bf69007 14988+ return vxi;
d337f35e
JR
14989+}
14990+
4bf69007 14991+/* xid_is_hashed()
d337f35e 14992+
4bf69007 14993+ * verify that xid is still hashed */
d337f35e 14994+
61333608 14995+int xid_is_hashed(vxid_t xid)
4bf69007
AM
14996+{
14997+ int hashed;
d337f35e 14998+
4bf69007
AM
14999+ spin_lock(&vx_info_hash_lock);
15000+ hashed = (__lookup_vx_info(xid) != NULL);
15001+ spin_unlock(&vx_info_hash_lock);
15002+ return hashed;
15003+}
d337f35e 15004+
4bf69007 15005+#ifdef CONFIG_PROC_FS
d337f35e 15006+
4bf69007 15007+/* get_xid_list()
d337f35e 15008+
4bf69007
AM
15009+ * get a subset of hashed xids for proc
15010+ * assumes size is at least one */
d337f35e 15011+
4bf69007
AM
15012+int get_xid_list(int index, unsigned int *xids, int size)
15013+{
15014+ int hindex, nr_xids = 0;
d337f35e 15015+
4bf69007
AM
15016+ /* only show current and children */
15017+ if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
15018+ if (index > 0)
15019+ return 0;
15020+ xids[nr_xids] = vx_current_xid();
15021+ return 1;
15022+ }
d337f35e 15023+
4bf69007
AM
15024+ for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
15025+ struct hlist_head *head = &vx_info_hash[hindex];
15026+ struct hlist_node *pos;
d337f35e 15027+
4bf69007
AM
15028+ spin_lock(&vx_info_hash_lock);
15029+ hlist_for_each(pos, head) {
15030+ struct vx_info *vxi;
d337f35e 15031+
4bf69007
AM
15032+ if (--index > 0)
15033+ continue;
d337f35e 15034+
4bf69007
AM
15035+ vxi = hlist_entry(pos, struct vx_info, vx_hlist);
15036+ xids[nr_xids] = vxi->vx_id;
15037+ if (++nr_xids >= size) {
15038+ spin_unlock(&vx_info_hash_lock);
15039+ goto out;
15040+ }
15041+ }
15042+ /* keep the lock time short */
15043+ spin_unlock(&vx_info_hash_lock);
15044+ }
15045+out:
15046+ return nr_xids;
15047+}
15048+#endif
d337f35e 15049+
4bf69007 15050+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 15051+
4bf69007 15052+void dump_vx_info_inactive(int level)
d337f35e 15053+{
4bf69007 15054+ struct hlist_node *entry, *next;
d337f35e 15055+
4bf69007
AM
15056+ hlist_for_each_safe(entry, next, &vx_info_inactive) {
15057+ struct vx_info *vxi =
15058+ list_entry(entry, struct vx_info, vx_hlist);
d337f35e 15059+
4bf69007
AM
15060+ dump_vx_info(vxi, level);
15061+ }
d337f35e
JR
15062+}
15063+
4bf69007 15064+#endif
d337f35e 15065+
4bf69007
AM
15066+#if 0
15067+int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
d337f35e 15068+{
4bf69007 15069+ struct user_struct *new_user, *old_user;
d337f35e 15070+
4bf69007
AM
15071+ if (!p || !vxi)
15072+ BUG();
d337f35e 15073+
4bf69007
AM
15074+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
15075+ return -EACCES;
d337f35e 15076+
4bf69007
AM
15077+ new_user = alloc_uid(vxi->vx_id, p->uid);
15078+ if (!new_user)
15079+ return -ENOMEM;
d337f35e 15080+
4bf69007
AM
15081+ old_user = p->user;
15082+ if (new_user != old_user) {
15083+ atomic_inc(&new_user->processes);
15084+ atomic_dec(&old_user->processes);
15085+ p->user = new_user;
d337f35e 15086+ }
4bf69007
AM
15087+ free_uid(old_user);
15088+ return 0;
d337f35e 15089+}
4bf69007 15090+#endif
d337f35e 15091+
4bf69007
AM
15092+#if 0
15093+void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
d337f35e 15094+{
4bf69007
AM
15095+ // p->cap_effective &= vxi->vx_cap_bset;
15096+ p->cap_effective =
15097+ cap_intersect(p->cap_effective, vxi->cap_bset);
15098+ // p->cap_inheritable &= vxi->vx_cap_bset;
15099+ p->cap_inheritable =
15100+ cap_intersect(p->cap_inheritable, vxi->cap_bset);
15101+ // p->cap_permitted &= vxi->vx_cap_bset;
15102+ p->cap_permitted =
15103+ cap_intersect(p->cap_permitted, vxi->cap_bset);
15104+}
15105+#endif
d337f35e
JR
15106+
15107+
4bf69007
AM
15108+#include <linux/file.h>
15109+#include <linux/fdtable.h>
d337f35e 15110+
4bf69007
AM
15111+static int vx_openfd_task(struct task_struct *tsk)
15112+{
15113+ struct files_struct *files = tsk->files;
15114+ struct fdtable *fdt;
15115+ const unsigned long *bptr;
15116+ int count, total;
d337f35e 15117+
4bf69007
AM
15118+ /* no rcu_read_lock() because of spin_lock() */
15119+ spin_lock(&files->file_lock);
15120+ fdt = files_fdtable(files);
15121+ bptr = fdt->open_fds;
15122+ count = fdt->max_fds / (sizeof(unsigned long) * 8);
15123+ for (total = 0; count > 0; count--) {
15124+ if (*bptr)
15125+ total += hweight_long(*bptr);
15126+ bptr++;
15127+ }
15128+ spin_unlock(&files->file_lock);
15129+ return total;
d337f35e
JR
15130+}
15131+
d337f35e 15132+
4bf69007
AM
15133+/* for *space compatibility */
15134+
15135+asmlinkage long sys_unshare(unsigned long);
15136+
15137+/*
15138+ * migrate task to new context
15139+ * gets vxi, puts old_vxi on change
15140+ * optionally unshares namespaces (hack)
2380c486 15141+ */
4bf69007
AM
15142+
15143+int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
2380c486 15144+{
4bf69007
AM
15145+ struct vx_info *old_vxi;
15146+ int ret = 0;
d337f35e 15147+
4bf69007
AM
15148+ if (!p || !vxi)
15149+ BUG();
d337f35e 15150+
4bf69007
AM
15151+ vxdprintk(VXD_CBIT(xid, 5),
15152+ "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
15153+ vxi->vx_id, atomic_read(&vxi->vx_usecnt));
d337f35e 15154+
4bf69007
AM
15155+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
15156+ !vx_info_flags(vxi, VXF_STATE_SETUP, 0))
15157+ return -EACCES;
2380c486 15158+
4bf69007
AM
15159+ if (vx_info_state(vxi, VXS_SHUTDOWN))
15160+ return -EFAULT;
d337f35e 15161+
4bf69007
AM
15162+ old_vxi = task_get_vx_info(p);
15163+ if (old_vxi == vxi)
15164+ goto out;
d337f35e 15165+
4bf69007
AM
15166+// if (!(ret = vx_migrate_user(p, vxi))) {
15167+ {
15168+ int openfd;
d337f35e 15169+
4bf69007
AM
15170+ task_lock(p);
15171+ openfd = vx_openfd_task(p);
15172+
15173+ if (old_vxi) {
15174+ atomic_dec(&old_vxi->cvirt.nr_threads);
15175+ atomic_dec(&old_vxi->cvirt.nr_running);
15176+ __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
15177+ /* FIXME: what about the struct files here? */
15178+ __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
15179+ /* account for the executable */
15180+ __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
2380c486 15181+ }
4bf69007
AM
15182+ atomic_inc(&vxi->cvirt.nr_threads);
15183+ atomic_inc(&vxi->cvirt.nr_running);
15184+ __rlim_inc(&vxi->limit, RLIMIT_NPROC);
15185+ /* FIXME: what about the struct files here? */
15186+ __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
15187+ /* account for the executable */
15188+ __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
2380c486 15189+
4bf69007
AM
15190+ if (old_vxi) {
15191+ release_vx_info(old_vxi, p);
15192+ clr_vx_info(&p->vx_info);
15193+ }
15194+ claim_vx_info(vxi, p);
15195+ set_vx_info(&p->vx_info, vxi);
15196+ p->xid = vxi->vx_id;
d337f35e 15197+
4bf69007
AM
15198+ vxdprintk(VXD_CBIT(xid, 5),
15199+ "moved task %p into vxi:%p[#%d]",
15200+ p, vxi, vxi->vx_id);
d337f35e 15201+
4bf69007
AM
15202+ // vx_mask_cap_bset(vxi, p);
15203+ task_unlock(p);
d337f35e 15204+
4bf69007
AM
15205+ /* hack for *spaces to provide compatibility */
15206+ if (unshare) {
15207+ struct nsproxy *old_nsp, *new_nsp;
d337f35e 15208+
4bf69007
AM
15209+ ret = unshare_nsproxy_namespaces(
15210+ CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER,
b00e13aa 15211+ &new_nsp, NULL, NULL);
4bf69007
AM
15212+ if (ret)
15213+ goto out;
d337f35e 15214+
4bf69007
AM
15215+ old_nsp = xchg(&p->nsproxy, new_nsp);
15216+ vx_set_space(vxi,
15217+ CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER, 0);
15218+ put_nsproxy(old_nsp);
15219+ }
15220+ }
15221+out:
15222+ put_vx_info(old_vxi);
2380c486
JR
15223+ return ret;
15224+}
d337f35e 15225+
4bf69007 15226+int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
d337f35e 15227+{
4bf69007
AM
15228+ struct task_struct *old_reaper;
15229+ struct vx_info *reaper_vxi;
d337f35e 15230+
4bf69007
AM
15231+ if (!vxi)
15232+ return -EINVAL;
d337f35e 15233+
4bf69007
AM
15234+ vxdprintk(VXD_CBIT(xid, 6),
15235+ "vx_set_reaper(%p[#%d],%p[#%d,%d])",
15236+ vxi, vxi->vx_id, p, p->xid, p->pid);
d337f35e 15237+
4bf69007
AM
15238+ old_reaper = vxi->vx_reaper;
15239+ if (old_reaper == p)
15240+ return 0;
d337f35e 15241+
4bf69007
AM
15242+ reaper_vxi = task_get_vx_info(p);
15243+ if (reaper_vxi && reaper_vxi != vxi) {
15244+ vxwprintk(1,
15245+ "Unsuitable reaper [" VS_Q("%s") ",%u:#%u] "
15246+ "for [xid #%u]",
15247+ p->comm, p->pid, p->xid, vx_current_xid());
2380c486
JR
15248+ goto out;
15249+ }
4bf69007
AM
15250+
15251+ /* set new child reaper */
15252+ get_task_struct(p);
15253+ vxi->vx_reaper = p;
15254+ put_task_struct(old_reaper);
2380c486 15255+out:
4bf69007
AM
15256+ put_vx_info(reaper_vxi);
15257+ return 0;
2380c486 15258+}
d337f35e 15259+
4bf69007 15260+int vx_set_init(struct vx_info *vxi, struct task_struct *p)
d337f35e 15261+{
4bf69007
AM
15262+ if (!vxi)
15263+ return -EINVAL;
d337f35e 15264+
4bf69007
AM
15265+ vxdprintk(VXD_CBIT(xid, 6),
15266+ "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
15267+ vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
d337f35e 15268+
4bf69007
AM
15269+ vxi->vx_flags &= ~VXF_STATE_INIT;
15270+ // vxi->vx_initpid = p->tgid;
15271+ vxi->vx_initpid = p->pid;
2380c486 15272+ return 0;
d337f35e
JR
15273+}
15274+
4bf69007 15275+void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
d337f35e 15276+{
4bf69007
AM
15277+ vxdprintk(VXD_CBIT(xid, 6),
15278+ "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
15279+ vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
2380c486 15280+
4bf69007
AM
15281+ vxi->exit_code = code;
15282+ vxi->vx_initpid = 0;
d337f35e
JR
15283+}
15284+
2380c486 15285+
4bf69007 15286+void vx_set_persistent(struct vx_info *vxi)
d337f35e 15287+{
4bf69007
AM
15288+ vxdprintk(VXD_CBIT(xid, 6),
15289+ "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
2380c486 15290+
4bf69007
AM
15291+ get_vx_info(vxi);
15292+ claim_vx_info(vxi, NULL);
d337f35e
JR
15293+}
15294+
4bf69007 15295+void vx_clear_persistent(struct vx_info *vxi)
2380c486 15296+{
4bf69007
AM
15297+ vxdprintk(VXD_CBIT(xid, 6),
15298+ "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
d337f35e 15299+
4bf69007
AM
15300+ release_vx_info(vxi, NULL);
15301+ put_vx_info(vxi);
2380c486 15302+}
d337f35e 15303+
4bf69007 15304+void vx_update_persistent(struct vx_info *vxi)
d337f35e 15305+{
4bf69007
AM
15306+ if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
15307+ vx_set_persistent(vxi);
2380c486 15308+ else
4bf69007 15309+ vx_clear_persistent(vxi);
2380c486 15310+}
d337f35e 15311+
d337f35e 15312+
4bf69007
AM
15313+/* task must be current or locked */
15314+
15315+void exit_vx_info(struct task_struct *p, int code)
2380c486 15316+{
4bf69007 15317+ struct vx_info *vxi = p->vx_info;
d337f35e 15318+
4bf69007
AM
15319+ if (vxi) {
15320+ atomic_dec(&vxi->cvirt.nr_threads);
15321+ vx_nproc_dec(p);
d337f35e 15322+
4bf69007
AM
15323+ vxi->exit_code = code;
15324+ release_vx_info(vxi, p);
15325+ }
2380c486 15326+}
d337f35e 15327+
4bf69007 15328+void exit_vx_info_early(struct task_struct *p, int code)
2380c486 15329+{
4bf69007 15330+ struct vx_info *vxi = p->vx_info;
d337f35e 15331+
4bf69007
AM
15332+ if (vxi) {
15333+ if (vxi->vx_initpid == p->pid)
15334+ vx_exit_init(vxi, p, code);
15335+ if (vxi->vx_reaper == p)
15336+ vx_set_reaper(vxi, init_pid_ns.child_reaper);
15337+ }
d337f35e
JR
15338+}
15339+
15340+
4bf69007 15341+/* vserver syscall commands below here */
d337f35e 15342+
4bf69007 15343+/* taks xid and vx_info functions */
d337f35e 15344+
4bf69007 15345+#include <asm/uaccess.h>
d337f35e 15346+
d337f35e 15347+
4bf69007 15348+int vc_task_xid(uint32_t id)
d337f35e 15349+{
61333608 15350+ vxid_t xid;
d337f35e 15351+
4bf69007
AM
15352+ if (id) {
15353+ struct task_struct *tsk;
d337f35e 15354+
4bf69007
AM
15355+ rcu_read_lock();
15356+ tsk = find_task_by_real_pid(id);
15357+ xid = (tsk) ? tsk->xid : -ESRCH;
15358+ rcu_read_unlock();
15359+ } else
15360+ xid = vx_current_xid();
15361+ return xid;
d337f35e
JR
15362+}
15363+
d337f35e 15364+
4bf69007
AM
15365+int vc_vx_info(struct vx_info *vxi, void __user *data)
15366+{
15367+ struct vcmd_vx_info_v0 vc_data;
d337f35e 15368+
4bf69007
AM
15369+ vc_data.xid = vxi->vx_id;
15370+ vc_data.initpid = vxi->vx_initpid;
d337f35e 15371+
4bf69007
AM
15372+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15373+ return -EFAULT;
15374+ return 0;
15375+}
d337f35e 15376+
d337f35e 15377+
4bf69007 15378+int vc_ctx_stat(struct vx_info *vxi, void __user *data)
d337f35e 15379+{
4bf69007 15380+ struct vcmd_ctx_stat_v0 vc_data;
d337f35e 15381+
4bf69007
AM
15382+ vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
15383+ vc_data.tasks = atomic_read(&vxi->vx_tasks);
d337f35e 15384+
4bf69007
AM
15385+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15386+ return -EFAULT;
15387+ return 0;
d337f35e
JR
15388+}
15389+
d337f35e 15390+
4bf69007 15391+/* context functions */
d337f35e 15392+
4bf69007 15393+int vc_ctx_create(uint32_t xid, void __user *data)
d337f35e 15394+{
4bf69007
AM
15395+ struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
15396+ struct vx_info *new_vxi;
15397+ int ret;
d337f35e 15398+
4bf69007
AM
15399+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
15400+ return -EFAULT;
d337f35e 15401+
4bf69007
AM
15402+ if ((xid > MAX_S_CONTEXT) || (xid < 2))
15403+ return -EINVAL;
d337f35e 15404+
4bf69007
AM
15405+ new_vxi = __create_vx_info(xid);
15406+ if (IS_ERR(new_vxi))
15407+ return PTR_ERR(new_vxi);
d337f35e 15408+
4bf69007
AM
15409+ /* initial flags */
15410+ new_vxi->vx_flags = vc_data.flagword;
d337f35e 15411+
4bf69007
AM
15412+ ret = -ENOEXEC;
15413+ if (vs_state_change(new_vxi, VSC_STARTUP))
15414+ goto out;
d337f35e 15415+
4bf69007
AM
15416+ ret = vx_migrate_task(current, new_vxi, (!data));
15417+ if (ret)
15418+ goto out;
d337f35e 15419+
4bf69007
AM
15420+ /* return context id on success */
15421+ ret = new_vxi->vx_id;
d337f35e 15422+
4bf69007
AM
15423+ /* get a reference for persistent contexts */
15424+ if ((vc_data.flagword & VXF_PERSISTENT))
15425+ vx_set_persistent(new_vxi);
15426+out:
15427+ release_vx_info(new_vxi, NULL);
15428+ put_vx_info(new_vxi);
15429+ return ret;
15430+}
d337f35e
JR
15431+
15432+
4bf69007 15433+int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
d337f35e 15434+{
4bf69007
AM
15435+ struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
15436+ int ret;
d337f35e 15437+
4bf69007
AM
15438+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
15439+ return -EFAULT;
d337f35e 15440+
4bf69007
AM
15441+ ret = vx_migrate_task(current, vxi, 0);
15442+ if (ret)
15443+ return ret;
15444+ if (vc_data.flagword & VXM_SET_INIT)
15445+ ret = vx_set_init(vxi, current);
15446+ if (ret)
15447+ return ret;
15448+ if (vc_data.flagword & VXM_SET_REAPER)
15449+ ret = vx_set_reaper(vxi, current);
15450+ return ret;
15451+}
d337f35e 15452+
d337f35e 15453+
4bf69007 15454+int vc_get_cflags(struct vx_info *vxi, void __user *data)
d337f35e 15455+{
4bf69007 15456+ struct vcmd_ctx_flags_v0 vc_data;
d337f35e 15457+
4bf69007 15458+ vc_data.flagword = vxi->vx_flags;
d337f35e 15459+
4bf69007
AM
15460+ /* special STATE flag handling */
15461+ vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
d337f35e 15462+
4bf69007
AM
15463+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15464+ return -EFAULT;
15465+ return 0;
d337f35e
JR
15466+}
15467+
4bf69007
AM
15468+int vc_set_cflags(struct vx_info *vxi, void __user *data)
15469+{
15470+ struct vcmd_ctx_flags_v0 vc_data;
15471+ uint64_t mask, trigger;
d337f35e 15472+
4bf69007
AM
15473+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15474+ return -EFAULT;
d337f35e 15475+
4bf69007
AM
15476+ /* special STATE flag handling */
15477+ mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
15478+ trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
d337f35e 15479+
4bf69007
AM
15480+ if (vxi == current_vx_info()) {
15481+ /* if (trigger & VXF_STATE_SETUP)
15482+ vx_mask_cap_bset(vxi, current); */
15483+ if (trigger & VXF_STATE_INIT) {
15484+ int ret;
d337f35e 15485+
4bf69007
AM
15486+ ret = vx_set_init(vxi, current);
15487+ if (ret)
15488+ return ret;
15489+ ret = vx_set_reaper(vxi, current);
15490+ if (ret)
15491+ return ret;
d337f35e
JR
15492+ }
15493+ }
4bf69007
AM
15494+
15495+ vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
15496+ vc_data.flagword, mask);
15497+ if (trigger & VXF_PERSISTENT)
15498+ vx_update_persistent(vxi);
15499+
15500+ return 0;
d337f35e
JR
15501+}
15502+
15503+
4bf69007 15504+static inline uint64_t caps_from_cap_t(kernel_cap_t c)
d337f35e 15505+{
4bf69007 15506+ uint64_t v = c.cap[0] | ((uint64_t)c.cap[1] << 32);
d337f35e 15507+
4bf69007
AM
15508+ // printk("caps_from_cap_t(%08x:%08x) = %016llx\n", c.cap[1], c.cap[0], v);
15509+ return v;
d337f35e
JR
15510+}
15511+
4bf69007 15512+static inline kernel_cap_t cap_t_from_caps(uint64_t v)
d337f35e 15513+{
4bf69007 15514+ kernel_cap_t c = __cap_empty_set;
d337f35e 15515+
4bf69007
AM
15516+ c.cap[0] = v & 0xFFFFFFFF;
15517+ c.cap[1] = (v >> 32) & 0xFFFFFFFF;
d337f35e 15518+
4bf69007
AM
15519+ // printk("cap_t_from_caps(%016llx) = %08x:%08x\n", v, c.cap[1], c.cap[0]);
15520+ return c;
d337f35e
JR
15521+}
15522+
15523+
4bf69007 15524+static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
d337f35e 15525+{
4bf69007
AM
15526+ if (bcaps)
15527+ *bcaps = caps_from_cap_t(vxi->vx_bcaps);
15528+ if (ccaps)
15529+ *ccaps = vxi->vx_ccaps;
d337f35e 15530+
4bf69007
AM
15531+ return 0;
15532+}
d337f35e 15533+
4bf69007
AM
15534+int vc_get_ccaps(struct vx_info *vxi, void __user *data)
15535+{
15536+ struct vcmd_ctx_caps_v1 vc_data;
15537+ int ret;
d337f35e 15538+
4bf69007
AM
15539+ ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
15540+ if (ret)
15541+ return ret;
15542+ vc_data.cmask = ~0ULL;
d337f35e 15543+
4bf69007
AM
15544+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15545+ return -EFAULT;
15546+ return 0;
d337f35e
JR
15547+}
15548+
4bf69007
AM
15549+static int do_set_caps(struct vx_info *vxi,
15550+ uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
d337f35e 15551+{
4bf69007 15552+ uint64_t bcold = caps_from_cap_t(vxi->vx_bcaps);
d337f35e 15553+
4bf69007
AM
15554+#if 0
15555+ printk("do_set_caps(%16llx, %16llx, %16llx, %16llx)\n",
15556+ bcaps, bmask, ccaps, cmask);
15557+#endif
15558+ vxi->vx_bcaps = cap_t_from_caps(
15559+ vs_mask_flags(bcold, bcaps, bmask));
15560+ vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
d337f35e 15561+
4bf69007 15562+ return 0;
d337f35e
JR
15563+}
15564+
4bf69007 15565+int vc_set_ccaps(struct vx_info *vxi, void __user *data)
d337f35e 15566+{
4bf69007 15567+ struct vcmd_ctx_caps_v1 vc_data;
d337f35e 15568+
2380c486 15569+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15570+ return -EFAULT;
15571+
4bf69007 15572+ return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
d337f35e
JR
15573+}
15574+
4bf69007 15575+int vc_get_bcaps(struct vx_info *vxi, void __user *data)
d337f35e 15576+{
4bf69007
AM
15577+ struct vcmd_bcaps vc_data;
15578+ int ret;
d337f35e 15579+
4bf69007
AM
15580+ ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
15581+ if (ret)
15582+ return ret;
15583+ vc_data.bmask = ~0ULL;
d337f35e 15584+
4bf69007
AM
15585+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15586+ return -EFAULT;
15587+ return 0;
d337f35e
JR
15588+}
15589+
4bf69007 15590+int vc_set_bcaps(struct vx_info *vxi, void __user *data)
d337f35e 15591+{
4bf69007 15592+ struct vcmd_bcaps vc_data;
d337f35e 15593+
2380c486 15594+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15595+ return -EFAULT;
15596+
4bf69007 15597+ return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
d337f35e
JR
15598+}
15599+
d337f35e 15600+
4bf69007 15601+int vc_get_umask(struct vx_info *vxi, void __user *data)
d337f35e 15602+{
4bf69007 15603+ struct vcmd_umask vc_data;
7e46296a 15604+
4bf69007
AM
15605+ vc_data.umask = vxi->vx_umask;
15606+ vc_data.mask = ~0ULL;
d337f35e 15607+
4bf69007
AM
15608+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15609+ return -EFAULT;
15610+ return 0;
15611+}
d337f35e 15612+
4bf69007
AM
15613+int vc_set_umask(struct vx_info *vxi, void __user *data)
15614+{
15615+ struct vcmd_umask vc_data;
d337f35e 15616+
4bf69007
AM
15617+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15618+ return -EFAULT;
7e46296a 15619+
4bf69007
AM
15620+ vxi->vx_umask = vs_mask_flags(vxi->vx_umask,
15621+ vc_data.umask, vc_data.mask);
15622+ return 0;
15623+}
7e46296a 15624+
d337f35e 15625+
4bf69007
AM
15626+int vc_get_wmask(struct vx_info *vxi, void __user *data)
15627+{
15628+ struct vcmd_wmask vc_data;
d337f35e 15629+
4bf69007
AM
15630+ vc_data.wmask = vxi->vx_wmask;
15631+ vc_data.mask = ~0ULL;
d337f35e 15632+
4bf69007
AM
15633+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15634+ return -EFAULT;
15635+ return 0;
d337f35e
JR
15636+}
15637+
4bf69007 15638+int vc_set_wmask(struct vx_info *vxi, void __user *data)
d337f35e 15639+{
4bf69007 15640+ struct vcmd_wmask vc_data;
d337f35e 15641+
2380c486 15642+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15643+ return -EFAULT;
15644+
4bf69007
AM
15645+ vxi->vx_wmask = vs_mask_flags(vxi->vx_wmask,
15646+ vc_data.wmask, vc_data.mask);
15647+ return 0;
d337f35e
JR
15648+}
15649+
d337f35e 15650+
4bf69007 15651+int vc_get_badness(struct vx_info *vxi, void __user *data)
d337f35e 15652+{
4bf69007
AM
15653+ struct vcmd_badness_v0 vc_data;
15654+
15655+ vc_data.bias = vxi->vx_badness_bias;
15656+
15657+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15658+ return -EFAULT;
15659+ return 0;
15660+}
15661+
15662+int vc_set_badness(struct vx_info *vxi, void __user *data)
15663+{
15664+ struct vcmd_badness_v0 vc_data;
d337f35e 15665+
2380c486 15666+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15667+ return -EFAULT;
15668+
4bf69007
AM
15669+ vxi->vx_badness_bias = vc_data.bias;
15670+ return 0;
d337f35e
JR
15671+}
15672+
4bf69007 15673+#include <linux/module.h>
d337f35e 15674+
4bf69007 15675+EXPORT_SYMBOL_GPL(free_vx_info);
d337f35e 15676+
8931d859
AM
15677diff -NurpP --minimal linux-4.4.161/kernel/vserver/cvirt.c linux-4.4.161-vs2.3.9.8/kernel/vserver/cvirt.c
15678--- linux-4.4.161/kernel/vserver/cvirt.c 1970-01-01 00:00:00.000000000 +0000
15679+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/cvirt.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
15680@@ -0,0 +1,313 @@
15681+/*
15682+ * linux/kernel/vserver/cvirt.c
15683+ *
15684+ * Virtual Server: Context Virtualization
15685+ *
d6221c00 15686+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
15687+ *
15688+ * V0.01 broken out from limit.c
15689+ * V0.02 added utsname stuff
15690+ * V0.03 changed vcmds to vxi arg
15691+ *
15692+ */
d337f35e 15693+
4bf69007
AM
15694+#include <linux/types.h>
15695+#include <linux/utsname.h>
15696+#include <linux/vs_cvirt.h>
15697+#include <linux/vserver/switch.h>
15698+#include <linux/vserver/cvirt_cmd.h>
d337f35e 15699+
4bf69007 15700+#include <asm/uaccess.h>
d337f35e 15701+
d337f35e 15702+
4bf69007
AM
15703+void vx_vsi_boottime(struct timespec *boottime)
15704+{
15705+ struct vx_info *vxi = current_vx_info();
d337f35e 15706+
4bf69007
AM
15707+ set_normalized_timespec(boottime,
15708+ boottime->tv_sec + vxi->cvirt.bias_uptime.tv_sec,
15709+ boottime->tv_nsec + vxi->cvirt.bias_uptime.tv_nsec);
15710+ return;
d337f35e
JR
15711+}
15712+
4bf69007 15713+void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
d337f35e 15714+{
4bf69007 15715+ struct vx_info *vxi = current_vx_info();
d337f35e 15716+
4bf69007
AM
15717+ set_normalized_timespec(uptime,
15718+ uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
15719+ uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
15720+ if (!idle)
15721+ return;
15722+ set_normalized_timespec(idle,
15723+ idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
15724+ idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
15725+ return;
d337f35e
JR
15726+}
15727+
4bf69007 15728+uint64_t vx_idle_jiffies(void)
d337f35e 15729+{
4bf69007 15730+ return init_task.utime + init_task.stime;
d337f35e
JR
15731+}
15732+
d337f35e
JR
15733+
15734+
4bf69007
AM
15735+static inline uint32_t __update_loadavg(uint32_t load,
15736+ int wsize, int delta, int n)
d337f35e 15737+{
4bf69007 15738+ unsigned long long calc, prev;
d337f35e 15739+
4bf69007
AM
15740+ /* just set it to n */
15741+ if (unlikely(delta >= wsize))
15742+ return (n << FSHIFT);
d337f35e 15743+
4bf69007
AM
15744+ calc = delta * n;
15745+ calc <<= FSHIFT;
15746+ prev = (wsize - delta);
15747+ prev *= load;
15748+ calc += prev;
15749+ do_div(calc, wsize);
15750+ return calc;
15751+}
d337f35e 15752+
d337f35e 15753+
4bf69007
AM
15754+void vx_update_load(struct vx_info *vxi)
15755+{
15756+ uint32_t now, last, delta;
15757+ unsigned int nr_running, nr_uninterruptible;
15758+ unsigned int total;
15759+ unsigned long flags;
d337f35e 15760+
4bf69007 15761+ spin_lock_irqsave(&vxi->cvirt.load_lock, flags);
d337f35e 15762+
4bf69007
AM
15763+ now = jiffies;
15764+ last = vxi->cvirt.load_last;
15765+ delta = now - last;
d337f35e 15766+
4bf69007
AM
15767+ if (delta < 5*HZ)
15768+ goto out;
d337f35e 15769+
4bf69007
AM
15770+ nr_running = atomic_read(&vxi->cvirt.nr_running);
15771+ nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
15772+ total = nr_running + nr_uninterruptible;
d337f35e 15773+
4bf69007
AM
15774+ vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
15775+ 60*HZ, delta, total);
15776+ vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
15777+ 5*60*HZ, delta, total);
15778+ vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
15779+ 15*60*HZ, delta, total);
d337f35e 15780+
4bf69007
AM
15781+ vxi->cvirt.load_last = now;
15782+out:
15783+ atomic_inc(&vxi->cvirt.load_updates);
15784+ spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags);
d337f35e
JR
15785+}
15786+
d337f35e 15787+
d337f35e 15788+/*
4bf69007 15789+ * Commands to do_syslog:
d337f35e 15790+ *
4bf69007
AM
15791+ * 0 -- Close the log. Currently a NOP.
15792+ * 1 -- Open the log. Currently a NOP.
15793+ * 2 -- Read from the log.
15794+ * 3 -- Read all messages remaining in the ring buffer.
15795+ * 4 -- Read and clear all messages remaining in the ring buffer
15796+ * 5 -- Clear ring buffer.
15797+ * 6 -- Disable printk's to console
15798+ * 7 -- Enable printk's to console
15799+ * 8 -- Set level of messages printed to console
15800+ * 9 -- Return number of unread characters in the log buffer
15801+ * 10 -- Return size of the log buffer
d337f35e 15802+ */
4bf69007
AM
15803+int vx_do_syslog(int type, char __user *buf, int len)
15804+{
15805+ int error = 0;
15806+ int do_clear = 0;
15807+ struct vx_info *vxi = current_vx_info();
15808+ struct _vx_syslog *log;
d337f35e 15809+
4bf69007
AM
15810+ if (!vxi)
15811+ return -EINVAL;
15812+ log = &vxi->cvirt.syslog;
15813+
15814+ switch (type) {
15815+ case 0: /* Close log */
15816+ case 1: /* Open log */
15817+ break;
15818+ case 2: /* Read from log */
15819+ error = wait_event_interruptible(log->log_wait,
15820+ (log->log_start - log->log_end));
15821+ if (error)
15822+ break;
15823+ spin_lock_irq(&log->logbuf_lock);
15824+ spin_unlock_irq(&log->logbuf_lock);
15825+ break;
15826+ case 4: /* Read/clear last kernel messages */
15827+ do_clear = 1;
15828+ /* fall through */
15829+ case 3: /* Read last kernel messages */
15830+ return 0;
d337f35e 15831+
4bf69007
AM
15832+ case 5: /* Clear ring buffer */
15833+ return 0;
d337f35e 15834+
4bf69007
AM
15835+ case 6: /* Disable logging to console */
15836+ case 7: /* Enable logging to console */
15837+ case 8: /* Set level of messages printed to console */
15838+ break;
d337f35e 15839+
4bf69007
AM
15840+ case 9: /* Number of chars in the log buffer */
15841+ return 0;
15842+ case 10: /* Size of the log buffer */
15843+ return 0;
15844+ default:
15845+ error = -EINVAL;
15846+ break;
15847+ }
15848+ return error;
1e8b8f9b 15849+}
d337f35e 15850+
4bf69007
AM
15851+
15852+/* virtual host info names */
15853+
15854+static char *vx_vhi_name(struct vx_info *vxi, int id)
d337f35e 15855+{
4bf69007
AM
15856+ struct nsproxy *nsproxy;
15857+ struct uts_namespace *uts;
d337f35e 15858+
4bf69007
AM
15859+ if (id == VHIN_CONTEXT)
15860+ return vxi->vx_name;
15861+
15862+ nsproxy = vxi->space[0].vx_nsproxy;
15863+ if (!nsproxy)
15864+ return NULL;
15865+
15866+ uts = nsproxy->uts_ns;
15867+ if (!uts)
15868+ return NULL;
15869+
15870+ switch (id) {
15871+ case VHIN_SYSNAME:
15872+ return uts->name.sysname;
15873+ case VHIN_NODENAME:
15874+ return uts->name.nodename;
15875+ case VHIN_RELEASE:
15876+ return uts->name.release;
15877+ case VHIN_VERSION:
15878+ return uts->name.version;
15879+ case VHIN_MACHINE:
15880+ return uts->name.machine;
15881+ case VHIN_DOMAINNAME:
15882+ return uts->name.domainname;
15883+ default:
15884+ return NULL;
d337f35e 15885+ }
4bf69007 15886+ return NULL;
d337f35e
JR
15887+}
15888+
4bf69007 15889+int vc_set_vhi_name(struct vx_info *vxi, void __user *data)
d337f35e 15890+{
4bf69007
AM
15891+ struct vcmd_vhi_name_v0 vc_data;
15892+ char *name;
d337f35e 15893+
4bf69007
AM
15894+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15895+ return -EFAULT;
d337f35e 15896+
4bf69007
AM
15897+ name = vx_vhi_name(vxi, vc_data.field);
15898+ if (!name)
15899+ return -EINVAL;
d337f35e 15900+
4bf69007
AM
15901+ memcpy(name, vc_data.name, 65);
15902+ return 0;
15903+}
d337f35e 15904+
4bf69007
AM
15905+int vc_get_vhi_name(struct vx_info *vxi, void __user *data)
15906+{
15907+ struct vcmd_vhi_name_v0 vc_data;
15908+ char *name;
d337f35e 15909+
4bf69007
AM
15910+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15911+ return -EFAULT;
d337f35e 15912+
4bf69007
AM
15913+ name = vx_vhi_name(vxi, vc_data.field);
15914+ if (!name)
15915+ return -EINVAL;
d337f35e 15916+
4bf69007
AM
15917+ memcpy(vc_data.name, name, 65);
15918+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15919+ return -EFAULT;
15920+ return 0;
15921+}
d337f35e 15922+
d337f35e 15923+
4bf69007
AM
15924+int vc_virt_stat(struct vx_info *vxi, void __user *data)
15925+{
15926+ struct vcmd_virt_stat_v0 vc_data;
15927+ struct _vx_cvirt *cvirt = &vxi->cvirt;
15928+ struct timespec uptime;
99a884b4 15929+
927ca606 15930+ ktime_get_ts(&uptime);
4bf69007
AM
15931+ set_normalized_timespec(&uptime,
15932+ uptime.tv_sec - cvirt->bias_uptime.tv_sec,
15933+ uptime.tv_nsec - cvirt->bias_uptime.tv_nsec);
d337f35e 15934+
4bf69007
AM
15935+ vc_data.offset = timespec_to_ns(&cvirt->bias_ts);
15936+ vc_data.uptime = timespec_to_ns(&uptime);
15937+ vc_data.nr_threads = atomic_read(&cvirt->nr_threads);
15938+ vc_data.nr_running = atomic_read(&cvirt->nr_running);
15939+ vc_data.nr_uninterruptible = atomic_read(&cvirt->nr_uninterruptible);
15940+ vc_data.nr_onhold = atomic_read(&cvirt->nr_onhold);
15941+ vc_data.nr_forks = atomic_read(&cvirt->total_forks);
15942+ vc_data.load[0] = cvirt->load[0];
15943+ vc_data.load[1] = cvirt->load[1];
15944+ vc_data.load[2] = cvirt->load[2];
15945+
15946+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15947+ return -EFAULT;
15948+ return 0;
d337f35e
JR
15949+}
15950+
15951+
4bf69007
AM
15952+#ifdef CONFIG_VSERVER_VTIME
15953+
15954+/* virtualized time base */
15955+
15956+void vx_adjust_timespec(struct timespec *ts)
d337f35e 15957+{
4bf69007 15958+ struct vx_info *vxi;
d337f35e 15959+
4bf69007
AM
15960+ if (!vx_flags(VXF_VIRT_TIME, 0))
15961+ return;
d337f35e 15962+
4bf69007
AM
15963+ vxi = current_vx_info();
15964+ ts->tv_sec += vxi->cvirt.bias_ts.tv_sec;
15965+ ts->tv_nsec += vxi->cvirt.bias_ts.tv_nsec;
d337f35e 15966+
4bf69007
AM
15967+ if (ts->tv_nsec >= NSEC_PER_SEC) {
15968+ ts->tv_sec++;
15969+ ts->tv_nsec -= NSEC_PER_SEC;
15970+ } else if (ts->tv_nsec < 0) {
15971+ ts->tv_sec--;
15972+ ts->tv_nsec += NSEC_PER_SEC;
d337f35e 15973+ }
d337f35e
JR
15974+}
15975+
4bf69007 15976+int vx_settimeofday(const struct timespec *ts)
99a884b4 15977+{
4bf69007
AM
15978+ struct timespec ats, delta;
15979+ struct vx_info *vxi;
99a884b4 15980+
4bf69007
AM
15981+ if (!vx_flags(VXF_VIRT_TIME, 0))
15982+ return do_settimeofday(ts);
99a884b4 15983+
4bf69007
AM
15984+ getnstimeofday(&ats);
15985+ delta = timespec_sub(*ts, ats);
99a884b4 15986+
4bf69007
AM
15987+ vxi = current_vx_info();
15988+ vxi->cvirt.bias_ts = timespec_add(vxi->cvirt.bias_ts, delta);
99a884b4
AM
15989+ return 0;
15990+}
d337f35e 15991+
4bf69007 15992+#endif
d337f35e 15993+
8931d859
AM
15994diff -NurpP --minimal linux-4.4.161/kernel/vserver/cvirt_init.h linux-4.4.161-vs2.3.9.8/kernel/vserver/cvirt_init.h
15995--- linux-4.4.161/kernel/vserver/cvirt_init.h 1970-01-01 00:00:00.000000000 +0000
15996+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/cvirt_init.h 2018-10-20 04:57:21.000000000 +0000
4bf69007 15997@@ -0,0 +1,70 @@
d337f35e 15998+
d337f35e 15999+
4bf69007 16000+extern uint64_t vx_idle_jiffies(void);
d337f35e 16001+
4bf69007
AM
16002+static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
16003+{
16004+ uint64_t idle_jiffies = vx_idle_jiffies();
16005+ uint64_t nsuptime;
d337f35e 16006+
927ca606 16007+ ktime_get_ts(&cvirt->bias_uptime);
4bf69007
AM
16008+ nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
16009+ * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
16010+ cvirt->bias_clock = nsec_to_clock_t(nsuptime);
16011+ cvirt->bias_ts.tv_sec = 0;
16012+ cvirt->bias_ts.tv_nsec = 0;
d337f35e 16013+
4bf69007
AM
16014+ jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
16015+ atomic_set(&cvirt->nr_threads, 0);
16016+ atomic_set(&cvirt->nr_running, 0);
16017+ atomic_set(&cvirt->nr_uninterruptible, 0);
16018+ atomic_set(&cvirt->nr_onhold, 0);
d337f35e 16019+
4bf69007
AM
16020+ spin_lock_init(&cvirt->load_lock);
16021+ cvirt->load_last = jiffies;
16022+ atomic_set(&cvirt->load_updates, 0);
16023+ cvirt->load[0] = 0;
16024+ cvirt->load[1] = 0;
16025+ cvirt->load[2] = 0;
16026+ atomic_set(&cvirt->total_forks, 0);
d337f35e 16027+
4bf69007
AM
16028+ spin_lock_init(&cvirt->syslog.logbuf_lock);
16029+ init_waitqueue_head(&cvirt->syslog.log_wait);
16030+ cvirt->syslog.log_start = 0;
16031+ cvirt->syslog.log_end = 0;
16032+ cvirt->syslog.con_start = 0;
16033+ cvirt->syslog.logged_chars = 0;
16034+}
16035+
16036+static inline
16037+void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
d337f35e 16038+{
4bf69007
AM
16039+ // cvirt_pc->cpustat = { 0 };
16040+}
d337f35e 16041+
4bf69007
AM
16042+static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
16043+{
16044+#ifdef CONFIG_VSERVER_WARN
16045+ int value;
16046+#endif
16047+ vxwprintk_xid((value = atomic_read(&cvirt->nr_threads)),
16048+ "!!! cvirt: %p[nr_threads] = %d on exit.",
16049+ cvirt, value);
16050+ vxwprintk_xid((value = atomic_read(&cvirt->nr_running)),
16051+ "!!! cvirt: %p[nr_running] = %d on exit.",
16052+ cvirt, value);
16053+ vxwprintk_xid((value = atomic_read(&cvirt->nr_uninterruptible)),
16054+ "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
16055+ cvirt, value);
16056+ vxwprintk_xid((value = atomic_read(&cvirt->nr_onhold)),
16057+ "!!! cvirt: %p[nr_onhold] = %d on exit.",
16058+ cvirt, value);
16059+ return;
16060+}
d337f35e 16061+
4bf69007
AM
16062+static inline
16063+void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
16064+{
16065+ return;
16066+}
d337f35e 16067+
8931d859
AM
16068diff -NurpP --minimal linux-4.4.161/kernel/vserver/cvirt_proc.h linux-4.4.161-vs2.3.9.8/kernel/vserver/cvirt_proc.h
16069--- linux-4.4.161/kernel/vserver/cvirt_proc.h 1970-01-01 00:00:00.000000000 +0000
16070+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/cvirt_proc.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
16071@@ -0,0 +1,123 @@
16072+#ifndef _VX_CVIRT_PROC_H
16073+#define _VX_CVIRT_PROC_H
d337f35e 16074+
4bf69007
AM
16075+#include <linux/nsproxy.h>
16076+#include <linux/mnt_namespace.h>
16077+#include <linux/ipc_namespace.h>
16078+#include <linux/utsname.h>
16079+#include <linux/ipc.h>
d337f35e 16080+
4bf69007 16081+extern int vx_info_mnt_namespace(struct mnt_namespace *, char *);
d337f35e 16082+
4bf69007
AM
16083+static inline
16084+int vx_info_proc_nsproxy(struct nsproxy *nsproxy, char *buffer)
16085+{
16086+ struct mnt_namespace *ns;
16087+ struct uts_namespace *uts;
16088+ struct ipc_namespace *ipc;
16089+ int length = 0;
d337f35e 16090+
4bf69007
AM
16091+ if (!nsproxy)
16092+ goto out;
d337f35e 16093+
4bf69007
AM
16094+ length += sprintf(buffer + length,
16095+ "NSProxy:\t%p [%p,%p,%p]\n",
16096+ nsproxy, nsproxy->mnt_ns,
16097+ nsproxy->uts_ns, nsproxy->ipc_ns);
d337f35e 16098+
4bf69007
AM
16099+ ns = nsproxy->mnt_ns;
16100+ if (!ns)
16101+ goto skip_ns;
d337f35e 16102+
4bf69007 16103+ length += vx_info_mnt_namespace(ns, buffer + length);
d337f35e 16104+
4bf69007 16105+skip_ns:
d337f35e 16106+
4bf69007
AM
16107+ uts = nsproxy->uts_ns;
16108+ if (!uts)
16109+ goto skip_uts;
d337f35e 16110+
4bf69007
AM
16111+ length += sprintf(buffer + length,
16112+ "SysName:\t%.*s\n"
16113+ "NodeName:\t%.*s\n"
16114+ "Release:\t%.*s\n"
16115+ "Version:\t%.*s\n"
16116+ "Machine:\t%.*s\n"
16117+ "DomainName:\t%.*s\n",
16118+ __NEW_UTS_LEN, uts->name.sysname,
16119+ __NEW_UTS_LEN, uts->name.nodename,
16120+ __NEW_UTS_LEN, uts->name.release,
16121+ __NEW_UTS_LEN, uts->name.version,
16122+ __NEW_UTS_LEN, uts->name.machine,
16123+ __NEW_UTS_LEN, uts->name.domainname);
16124+skip_uts:
d337f35e 16125+
4bf69007
AM
16126+ ipc = nsproxy->ipc_ns;
16127+ if (!ipc)
16128+ goto skip_ipc;
d337f35e 16129+
4bf69007
AM
16130+ length += sprintf(buffer + length,
16131+ "SEMS:\t\t%d %d %d %d %d\n"
16132+ "MSG:\t\t%d %d %d\n"
b00e13aa 16133+ "SHM:\t\t%lu %lu %d %ld\n",
4bf69007
AM
16134+ ipc->sem_ctls[0], ipc->sem_ctls[1],
16135+ ipc->sem_ctls[2], ipc->sem_ctls[3],
16136+ ipc->used_sems,
16137+ ipc->msg_ctlmax, ipc->msg_ctlmnb, ipc->msg_ctlmni,
16138+ (unsigned long)ipc->shm_ctlmax,
16139+ (unsigned long)ipc->shm_ctlall,
16140+ ipc->shm_ctlmni, ipc->shm_tot);
16141+skip_ipc:
16142+out:
16143+ return length;
16144+}
d337f35e
JR
16145+
16146+
4bf69007 16147+#include <linux/sched.h>
d337f35e 16148+
4bf69007
AM
16149+#define LOAD_INT(x) ((x) >> FSHIFT)
16150+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1 - 1)) * 100)
d337f35e 16151+
4bf69007
AM
16152+static inline
16153+int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
d337f35e 16154+{
4bf69007
AM
16155+ int length = 0;
16156+ int a, b, c;
d337f35e 16157+
4bf69007
AM
16158+ length += sprintf(buffer + length,
16159+ "BiasUptime:\t%lu.%02lu\n",
16160+ (unsigned long)cvirt->bias_uptime.tv_sec,
16161+ (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
d337f35e 16162+
4bf69007
AM
16163+ a = cvirt->load[0] + (FIXED_1 / 200);
16164+ b = cvirt->load[1] + (FIXED_1 / 200);
16165+ c = cvirt->load[2] + (FIXED_1 / 200);
16166+ length += sprintf(buffer + length,
16167+ "nr_threads:\t%d\n"
16168+ "nr_running:\t%d\n"
16169+ "nr_unintr:\t%d\n"
16170+ "nr_onhold:\t%d\n"
16171+ "load_updates:\t%d\n"
16172+ "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
16173+ "total_forks:\t%d\n",
16174+ atomic_read(&cvirt->nr_threads),
16175+ atomic_read(&cvirt->nr_running),
16176+ atomic_read(&cvirt->nr_uninterruptible),
16177+ atomic_read(&cvirt->nr_onhold),
16178+ atomic_read(&cvirt->load_updates),
16179+ LOAD_INT(a), LOAD_FRAC(a),
16180+ LOAD_INT(b), LOAD_FRAC(b),
16181+ LOAD_INT(c), LOAD_FRAC(c),
16182+ atomic_read(&cvirt->total_forks));
16183+ return length;
d337f35e
JR
16184+}
16185+
4bf69007
AM
16186+static inline
16187+int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
16188+ char *buffer, int cpu)
16189+{
16190+ int length = 0;
16191+ return length;
16192+}
d337f35e 16193+
4bf69007 16194+#endif /* _VX_CVIRT_PROC_H */
8931d859
AM
16195diff -NurpP --minimal linux-4.4.161/kernel/vserver/debug.c linux-4.4.161-vs2.3.9.8/kernel/vserver/debug.c
16196--- linux-4.4.161/kernel/vserver/debug.c 1970-01-01 00:00:00.000000000 +0000
16197+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/debug.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
16198@@ -0,0 +1,32 @@
16199+/*
16200+ * kernel/vserver/debug.c
16201+ *
d6221c00 16202+ * Copyright (C) 2005-2007 Herbert P?tzl
4bf69007
AM
16203+ *
16204+ * V0.01 vx_info dump support
16205+ *
16206+ */
d337f35e 16207+
4bf69007 16208+#include <linux/module.h>
d337f35e 16209+
4bf69007 16210+#include <linux/vserver/context.h>
d337f35e 16211+
d337f35e 16212+
4bf69007 16213+void dump_vx_info(struct vx_info *vxi, int level)
d337f35e 16214+{
4bf69007
AM
16215+ printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
16216+ atomic_read(&vxi->vx_usecnt),
16217+ atomic_read(&vxi->vx_tasks),
16218+ vxi->vx_state);
16219+ if (level > 0) {
16220+ __dump_vx_limit(&vxi->limit);
16221+ __dump_vx_sched(&vxi->sched);
16222+ __dump_vx_cvirt(&vxi->cvirt);
16223+ __dump_vx_cacct(&vxi->cacct);
16224+ }
16225+ printk("---\n");
16226+}
d337f35e 16227+
d337f35e 16228+
4bf69007 16229+EXPORT_SYMBOL_GPL(dump_vx_info);
d337f35e 16230+
8931d859
AM
16231diff -NurpP --minimal linux-4.4.161/kernel/vserver/device.c linux-4.4.161-vs2.3.9.8/kernel/vserver/device.c
16232--- linux-4.4.161/kernel/vserver/device.c 1970-01-01 00:00:00.000000000 +0000
16233+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/device.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
16234@@ -0,0 +1,443 @@
16235+/*
16236+ * linux/kernel/vserver/device.c
16237+ *
16238+ * Linux-VServer: Device Support
16239+ *
d6221c00 16240+ * Copyright (C) 2006 Herbert P?tzl
4bf69007
AM
16241+ * Copyright (C) 2007 Daniel Hokka Zakrisson
16242+ *
16243+ * V0.01 device mapping basics
16244+ * V0.02 added defaults
16245+ *
16246+ */
d337f35e 16247+
4bf69007
AM
16248+#include <linux/slab.h>
16249+#include <linux/rcupdate.h>
16250+#include <linux/fs.h>
16251+#include <linux/namei.h>
16252+#include <linux/hash.h>
d337f35e 16253+
4bf69007
AM
16254+#include <asm/errno.h>
16255+#include <asm/uaccess.h>
16256+#include <linux/vserver/base.h>
16257+#include <linux/vserver/debug.h>
16258+#include <linux/vserver/context.h>
16259+#include <linux/vserver/device.h>
16260+#include <linux/vserver/device_cmd.h>
d337f35e 16261+
d337f35e 16262+
4bf69007 16263+#define DMAP_HASH_BITS 4
d337f35e 16264+
d337f35e 16265+
4bf69007
AM
16266+struct vs_mapping {
16267+ union {
16268+ struct hlist_node hlist;
16269+ struct list_head list;
16270+ } u;
16271+#define dm_hlist u.hlist
16272+#define dm_list u.list
61333608 16273+ vxid_t xid;
4bf69007
AM
16274+ dev_t device;
16275+ struct vx_dmap_target target;
16276+};
d337f35e 16277+
d337f35e 16278+
4bf69007 16279+static struct hlist_head dmap_main_hash[1 << DMAP_HASH_BITS];
d337f35e 16280+
4bf69007 16281+static DEFINE_SPINLOCK(dmap_main_hash_lock);
d337f35e 16282+
4bf69007
AM
16283+static struct vx_dmap_target dmap_defaults[2] = {
16284+ { .flags = DATTR_OPEN },
16285+ { .flags = DATTR_OPEN },
16286+};
d337f35e
JR
16287+
16288+
4bf69007 16289+struct kmem_cache *dmap_cachep __read_mostly;
d337f35e 16290+
4bf69007
AM
16291+int __init dmap_cache_init(void)
16292+{
16293+ dmap_cachep = kmem_cache_create("dmap_cache",
16294+ sizeof(struct vs_mapping), 0,
16295+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
d337f35e
JR
16296+ return 0;
16297+}
16298+
4bf69007 16299+__initcall(dmap_cache_init);
d337f35e 16300+
4bf69007
AM
16301+
16302+static inline unsigned int __hashval(dev_t dev, int bits)
d337f35e 16303+{
4bf69007
AM
16304+ return hash_long((unsigned long)dev, bits);
16305+}
d337f35e 16306+
d337f35e 16307+
4bf69007
AM
16308+/* __hash_mapping()
16309+ * add the mapping to the hash table
16310+ */
16311+static inline void __hash_mapping(struct vx_info *vxi, struct vs_mapping *vdm)
16312+{
16313+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16314+ struct hlist_head *head, *hash = dmap_main_hash;
16315+ int device = vdm->device;
d337f35e 16316+
4bf69007
AM
16317+ spin_lock(hash_lock);
16318+ vxdprintk(VXD_CBIT(misc, 8), "__hash_mapping: %p[#%d] %08x:%08x",
16319+ vxi, vxi ? vxi->vx_id : 0, device, vdm->target.target);
d337f35e 16320+
4bf69007
AM
16321+ head = &hash[__hashval(device, DMAP_HASH_BITS)];
16322+ hlist_add_head(&vdm->dm_hlist, head);
16323+ spin_unlock(hash_lock);
16324+}
16325+
16326+
16327+static inline int __mode_to_default(umode_t mode)
16328+{
16329+ switch (mode) {
16330+ case S_IFBLK:
16331+ return 0;
16332+ case S_IFCHR:
16333+ return 1;
16334+ default:
16335+ BUG();
d337f35e 16336+ }
d337f35e
JR
16337+}
16338+
4bf69007
AM
16339+
16340+/* __set_default()
16341+ * set a default
16342+ */
16343+static inline void __set_default(struct vx_info *vxi, umode_t mode,
16344+ struct vx_dmap_target *vdmt)
d337f35e 16345+{
4bf69007
AM
16346+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16347+ spin_lock(hash_lock);
d337f35e 16348+
4bf69007
AM
16349+ if (vxi)
16350+ vxi->dmap.targets[__mode_to_default(mode)] = *vdmt;
16351+ else
16352+ dmap_defaults[__mode_to_default(mode)] = *vdmt;
d337f35e 16353+
d337f35e 16354+
4bf69007 16355+ spin_unlock(hash_lock);
d337f35e 16356+
4bf69007
AM
16357+ vxdprintk(VXD_CBIT(misc, 8), "__set_default: %p[#%u] %08x %04x",
16358+ vxi, vxi ? vxi->vx_id : 0, vdmt->target, vdmt->flags);
d337f35e
JR
16359+}
16360+
d337f35e 16361+
4bf69007
AM
16362+/* __remove_default()
16363+ * remove a default
16364+ */
16365+static inline int __remove_default(struct vx_info *vxi, umode_t mode)
d337f35e 16366+{
4bf69007
AM
16367+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16368+ spin_lock(hash_lock);
d337f35e 16369+
4bf69007
AM
16370+ if (vxi)
16371+ vxi->dmap.targets[__mode_to_default(mode)].flags = 0;
16372+ else /* remove == reset */
16373+ dmap_defaults[__mode_to_default(mode)].flags = DATTR_OPEN | mode;
d337f35e 16374+
4bf69007
AM
16375+ spin_unlock(hash_lock);
16376+ return 0;
d337f35e
JR
16377+}
16378+
d337f35e 16379+
4bf69007
AM
16380+/* __find_mapping()
16381+ * find a mapping in the hash table
16382+ *
16383+ * caller must hold hash_lock
16384+ */
61333608 16385+static inline int __find_mapping(vxid_t xid, dev_t device, umode_t mode,
4bf69007
AM
16386+ struct vs_mapping **local, struct vs_mapping **global)
16387+{
16388+ struct hlist_head *hash = dmap_main_hash;
16389+ struct hlist_head *head = &hash[__hashval(device, DMAP_HASH_BITS)];
16390+ struct hlist_node *pos;
16391+ struct vs_mapping *vdm;
d337f35e 16392+
4bf69007
AM
16393+ *local = NULL;
16394+ if (global)
16395+ *global = NULL;
d337f35e 16396+
4bf69007
AM
16397+ hlist_for_each(pos, head) {
16398+ vdm = hlist_entry(pos, struct vs_mapping, dm_hlist);
d337f35e 16399+
4bf69007
AM
16400+ if ((vdm->device == device) &&
16401+ !((vdm->target.flags ^ mode) & S_IFMT)) {
16402+ if (vdm->xid == xid) {
16403+ *local = vdm;
16404+ return 1;
16405+ } else if (global && vdm->xid == 0)
16406+ *global = vdm;
2380c486
JR
16407+ }
16408+ }
16409+
4bf69007
AM
16410+ if (global && *global)
16411+ return 0;
16412+ else
16413+ return -ENOENT;
2380c486
JR
16414+}
16415+
16416+
4bf69007
AM
16417+/* __lookup_mapping()
16418+ * find a mapping and store the result in target and flags
16419+ */
16420+static inline int __lookup_mapping(struct vx_info *vxi,
16421+ dev_t device, dev_t *target, int *flags, umode_t mode)
2380c486 16422+{
4bf69007
AM
16423+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16424+ struct vs_mapping *vdm, *global;
16425+ struct vx_dmap_target *vdmt;
2380c486 16426+ int ret = 0;
61333608 16427+ vxid_t xid = vxi->vx_id;
4bf69007 16428+ int index;
2380c486 16429+
4bf69007
AM
16430+ spin_lock(hash_lock);
16431+ if (__find_mapping(xid, device, mode, &vdm, &global) > 0) {
2380c486 16432+ ret = 1;
4bf69007
AM
16433+ vdmt = &vdm->target;
16434+ goto found;
16435+ }
2380c486 16436+
4bf69007
AM
16437+ index = __mode_to_default(mode);
16438+ if (vxi && vxi->dmap.targets[index].flags) {
16439+ ret = 2;
16440+ vdmt = &vxi->dmap.targets[index];
16441+ } else if (global) {
16442+ ret = 3;
16443+ vdmt = &global->target;
16444+ goto found;
16445+ } else {
16446+ ret = 4;
16447+ vdmt = &dmap_defaults[index];
d337f35e 16448+ }
2380c486 16449+
4bf69007
AM
16450+found:
16451+ if (target && (vdmt->flags & DATTR_REMAP))
16452+ *target = vdmt->target;
16453+ else if (target)
16454+ *target = device;
16455+ if (flags)
16456+ *flags = vdmt->flags;
16457+
16458+ spin_unlock(hash_lock);
2380c486
JR
16459+
16460+ return ret;
d337f35e
JR
16461+}
16462+
16463+
4bf69007
AM
16464+/* __remove_mapping()
16465+ * remove a mapping from the hash table
16466+ */
16467+static inline int __remove_mapping(struct vx_info *vxi, dev_t device,
16468+ umode_t mode)
d337f35e 16469+{
4bf69007
AM
16470+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16471+ struct vs_mapping *vdm = NULL;
d337f35e
JR
16472+ int ret = 0;
16473+
4bf69007
AM
16474+ spin_lock(hash_lock);
16475+
16476+ ret = __find_mapping((vxi ? vxi->vx_id : 0), device, mode, &vdm,
16477+ NULL);
16478+ vxdprintk(VXD_CBIT(misc, 8), "__remove_mapping: %p[#%d] %08x %04x",
16479+ vxi, vxi ? vxi->vx_id : 0, device, mode);
16480+ if (ret < 0)
2380c486 16481+ goto out;
4bf69007 16482+ hlist_del(&vdm->dm_hlist);
2380c486 16483+
2380c486 16484+out:
4bf69007
AM
16485+ spin_unlock(hash_lock);
16486+ if (vdm)
16487+ kmem_cache_free(dmap_cachep, vdm);
2380c486
JR
16488+ return ret;
16489+}
16490+
16491+
2380c486 16492+
4bf69007
AM
16493+int vs_map_device(struct vx_info *vxi,
16494+ dev_t device, dev_t *target, umode_t mode)
2380c486 16495+{
4bf69007 16496+ int ret, flags = DATTR_MASK;
2380c486 16497+
4bf69007
AM
16498+ if (!vxi) {
16499+ if (target)
16500+ *target = device;
2380c486 16501+ goto out;
2380c486 16502+ }
4bf69007
AM
16503+ ret = __lookup_mapping(vxi, device, target, &flags, mode);
16504+ vxdprintk(VXD_CBIT(misc, 8), "vs_map_device: %08x target: %08x flags: %04x mode: %04x mapped=%d",
16505+ device, target ? *target : 0, flags, mode, ret);
2380c486 16506+out:
4bf69007 16507+ return (flags & DATTR_MASK);
2380c486
JR
16508+}
16509+
2380c486 16510+
4bf69007
AM
16511+
16512+static int do_set_mapping(struct vx_info *vxi,
16513+ dev_t device, dev_t target, int flags, umode_t mode)
2380c486 16514+{
4bf69007
AM
16515+ if (device) {
16516+ struct vs_mapping *new;
2380c486 16517+
4bf69007
AM
16518+ new = kmem_cache_alloc(dmap_cachep, GFP_KERNEL);
16519+ if (!new)
16520+ return -ENOMEM;
16521+
16522+ INIT_HLIST_NODE(&new->dm_hlist);
16523+ new->device = device;
16524+ new->target.target = target;
16525+ new->target.flags = flags | mode;
16526+ new->xid = (vxi ? vxi->vx_id : 0);
16527+
16528+ vxdprintk(VXD_CBIT(misc, 8), "do_set_mapping: %08x target: %08x flags: %04x", device, target, flags);
16529+ __hash_mapping(vxi, new);
16530+ } else {
16531+ struct vx_dmap_target new = {
16532+ .target = target,
16533+ .flags = flags | mode,
16534+ };
16535+ __set_default(vxi, mode, &new);
16536+ }
16537+ return 0;
2380c486
JR
16538+}
16539+
4bf69007
AM
16540+
16541+static int do_unset_mapping(struct vx_info *vxi,
16542+ dev_t device, dev_t target, int flags, umode_t mode)
2380c486 16543+{
4bf69007 16544+ int ret = -EINVAL;
763640ca 16545+
4bf69007
AM
16546+ if (device) {
16547+ ret = __remove_mapping(vxi, device, mode);
16548+ if (ret < 0)
16549+ goto out;
16550+ } else {
16551+ ret = __remove_default(vxi, mode);
16552+ if (ret < 0)
16553+ goto out;
16554+ }
2380c486 16555+
4bf69007
AM
16556+out:
16557+ return ret;
16558+}
2380c486 16559+
2380c486 16560+
4bf69007
AM
16561+static inline int __user_device(const char __user *name, dev_t *dev,
16562+ umode_t *mode)
16563+{
927ca606 16564+ struct path path;
4bf69007 16565+ int ret;
2380c486 16566+
4bf69007
AM
16567+ if (!name) {
16568+ *dev = 0;
16569+ return 0;
16570+ }
927ca606 16571+ ret = user_lpath(name, &path);
4bf69007
AM
16572+ if (ret)
16573+ return ret;
927ca606
AM
16574+ if (path.dentry->d_inode) {
16575+ *dev = path.dentry->d_inode->i_rdev;
16576+ *mode = path.dentry->d_inode->i_mode;
4bf69007 16577+ }
927ca606 16578+ path_put(&path);
4bf69007
AM
16579+ return 0;
16580+}
2380c486 16581+
4bf69007
AM
16582+static inline int __mapping_mode(dev_t device, dev_t target,
16583+ umode_t device_mode, umode_t target_mode, umode_t *mode)
16584+{
16585+ if (device)
16586+ *mode = device_mode & S_IFMT;
16587+ else if (target)
16588+ *mode = target_mode & S_IFMT;
16589+ else
16590+ return -EINVAL;
2380c486 16591+
4bf69007
AM
16592+ /* if both given, device and target mode have to match */
16593+ if (device && target &&
16594+ ((device_mode ^ target_mode) & S_IFMT))
16595+ return -EINVAL;
16596+ return 0;
16597+}
d337f35e 16598+
d337f35e 16599+
4bf69007
AM
16600+static inline int do_mapping(struct vx_info *vxi, const char __user *device_path,
16601+ const char __user *target_path, int flags, int set)
16602+{
16603+ dev_t device = ~0, target = ~0;
16604+ umode_t device_mode = 0, target_mode = 0, mode;
16605+ int ret;
2380c486 16606+
4bf69007
AM
16607+ ret = __user_device(device_path, &device, &device_mode);
16608+ if (ret)
16609+ return ret;
16610+ ret = __user_device(target_path, &target, &target_mode);
16611+ if (ret)
16612+ return ret;
2380c486 16613+
4bf69007
AM
16614+ ret = __mapping_mode(device, target,
16615+ device_mode, target_mode, &mode);
16616+ if (ret)
16617+ return ret;
2380c486 16618+
4bf69007
AM
16619+ if (set)
16620+ return do_set_mapping(vxi, device, target,
16621+ flags, mode);
16622+ else
16623+ return do_unset_mapping(vxi, device, target,
16624+ flags, mode);
d337f35e
JR
16625+}
16626+
d337f35e 16627+
4bf69007
AM
16628+int vc_set_mapping(struct vx_info *vxi, void __user *data)
16629+{
16630+ struct vcmd_set_mapping_v0 vc_data;
d337f35e 16631+
4bf69007
AM
16632+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16633+ return -EFAULT;
d337f35e 16634+
4bf69007
AM
16635+ return do_mapping(vxi, vc_data.device, vc_data.target,
16636+ vc_data.flags, 1);
16637+}
d337f35e 16638+
4bf69007 16639+int vc_unset_mapping(struct vx_info *vxi, void __user *data)
d337f35e 16640+{
4bf69007 16641+ struct vcmd_set_mapping_v0 vc_data;
d337f35e 16642+
4bf69007
AM
16643+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16644+ return -EFAULT;
16645+
16646+ return do_mapping(vxi, vc_data.device, vc_data.target,
16647+ vc_data.flags, 0);
d337f35e
JR
16648+}
16649+
16650+
4bf69007
AM
16651+#ifdef CONFIG_COMPAT
16652+
16653+int vc_set_mapping_x32(struct vx_info *vxi, void __user *data)
d337f35e 16654+{
4bf69007 16655+ struct vcmd_set_mapping_v0_x32 vc_data;
d337f35e 16656+
4bf69007
AM
16657+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16658+ return -EFAULT;
16659+
16660+ return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
16661+ compat_ptr(vc_data.target_ptr), vc_data.flags, 1);
d337f35e
JR
16662+}
16663+
4bf69007
AM
16664+int vc_unset_mapping_x32(struct vx_info *vxi, void __user *data)
16665+{
16666+ struct vcmd_set_mapping_v0_x32 vc_data;
16667+
16668+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16669+ return -EFAULT;
d337f35e 16670+
4bf69007
AM
16671+ return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
16672+ compat_ptr(vc_data.target_ptr), vc_data.flags, 0);
16673+}
d337f35e 16674+
4bf69007 16675+#endif /* CONFIG_COMPAT */
d337f35e 16676+
4bf69007 16677+
8931d859
AM
16678diff -NurpP --minimal linux-4.4.161/kernel/vserver/dlimit.c linux-4.4.161-vs2.3.9.8/kernel/vserver/dlimit.c
16679--- linux-4.4.161/kernel/vserver/dlimit.c 1970-01-01 00:00:00.000000000 +0000
16680+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/dlimit.c 2018-10-20 04:57:21.000000000 +0000
b00e13aa 16681@@ -0,0 +1,528 @@
d337f35e 16682+/*
4bf69007 16683+ * linux/kernel/vserver/dlimit.c
d337f35e 16684+ *
4bf69007 16685+ * Virtual Server: Context Disk Limits
d337f35e 16686+ *
d6221c00 16687+ * Copyright (C) 2004-2009 Herbert P?tzl
d337f35e 16688+ *
4bf69007
AM
16689+ * V0.01 initial version
16690+ * V0.02 compat32 splitup
16691+ * V0.03 extended interface
d337f35e
JR
16692+ *
16693+ */
16694+
4bf69007
AM
16695+#include <linux/statfs.h>
16696+#include <linux/sched.h>
2380c486 16697+#include <linux/namei.h>
d337f35e 16698+#include <linux/vs_tag.h>
4bf69007
AM
16699+#include <linux/vs_dlimit.h>
16700+#include <linux/vserver/dlimit_cmd.h>
16701+#include <linux/slab.h>
16702+// #include <linux/gfp.h>
d337f35e 16703+
d337f35e
JR
16704+#include <asm/uaccess.h>
16705+
4bf69007 16706+/* __alloc_dl_info()
d337f35e 16707+
4bf69007
AM
16708+ * allocate an initialized dl_info struct
16709+ * doesn't make it visible (hash) */
d337f35e 16710+
61333608 16711+static struct dl_info *__alloc_dl_info(struct super_block *sb, vtag_t tag)
4bf69007
AM
16712+{
16713+ struct dl_info *new = NULL;
d337f35e 16714+
4bf69007
AM
16715+ vxdprintk(VXD_CBIT(dlim, 5),
16716+ "alloc_dl_info(%p,%d)*", sb, tag);
d337f35e 16717+
4bf69007
AM
16718+ /* would this benefit from a slab cache? */
16719+ new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
16720+ if (!new)
16721+ return 0;
d337f35e 16722+
4bf69007
AM
16723+ memset(new, 0, sizeof(struct dl_info));
16724+ new->dl_tag = tag;
16725+ new->dl_sb = sb;
16726+ // INIT_RCU_HEAD(&new->dl_rcu);
16727+ INIT_HLIST_NODE(&new->dl_hlist);
16728+ spin_lock_init(&new->dl_lock);
16729+ atomic_set(&new->dl_refcnt, 0);
16730+ atomic_set(&new->dl_usecnt, 0);
d337f35e 16731+
4bf69007 16732+ /* rest of init goes here */
d337f35e 16733+
4bf69007
AM
16734+ vxdprintk(VXD_CBIT(dlim, 4),
16735+ "alloc_dl_info(%p,%d) = %p", sb, tag, new);
16736+ return new;
16737+}
d4263eb0 16738+
4bf69007 16739+/* __dealloc_dl_info()
d337f35e 16740+
4bf69007 16741+ * final disposal of dl_info */
d337f35e 16742+
4bf69007 16743+static void __dealloc_dl_info(struct dl_info *dli)
adc1caaa 16744+{
4bf69007
AM
16745+ vxdprintk(VXD_CBIT(dlim, 4),
16746+ "dealloc_dl_info(%p)", dli);
2380c486 16747+
4bf69007
AM
16748+ dli->dl_hlist.next = LIST_POISON1;
16749+ dli->dl_tag = -1;
16750+ dli->dl_sb = 0;
2380c486 16751+
4bf69007
AM
16752+ BUG_ON(atomic_read(&dli->dl_usecnt));
16753+ BUG_ON(atomic_read(&dli->dl_refcnt));
2380c486 16754+
4bf69007 16755+ kfree(dli);
adc1caaa 16756+}
2380c486 16757+
2380c486 16758+
4bf69007 16759+/* hash table for dl_info hash */
2380c486 16760+
4bf69007 16761+#define DL_HASH_SIZE 13
2380c486 16762+
4bf69007 16763+struct hlist_head dl_info_hash[DL_HASH_SIZE];
2380c486 16764+
4bf69007 16765+static DEFINE_SPINLOCK(dl_info_hash_lock);
2380c486 16766+
d33d7b00 16767+
61333608 16768+static inline unsigned int __hashval(struct super_block *sb, vtag_t tag)
adc1caaa 16769+{
4bf69007
AM
16770+ return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
16771+}
2380c486 16772+
2380c486 16773+
2380c486 16774+
4bf69007 16775+/* __hash_dl_info()
2380c486 16776+
4bf69007
AM
16777+ * add the dli to the global hash table
16778+ * requires the hash_lock to be held */
2380c486 16779+
4bf69007
AM
16780+static inline void __hash_dl_info(struct dl_info *dli)
16781+{
16782+ struct hlist_head *head;
d337f35e 16783+
4bf69007
AM
16784+ vxdprintk(VXD_CBIT(dlim, 6),
16785+ "__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
16786+ get_dl_info(dli);
16787+ head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
16788+ hlist_add_head_rcu(&dli->dl_hlist, head);
16789+}
d337f35e 16790+
4bf69007 16791+/* __unhash_dl_info()
3bac966d 16792+
4bf69007
AM
16793+ * remove the dli from the global hash table
16794+ * requires the hash_lock to be held */
3bac966d 16795+
4bf69007
AM
16796+static inline void __unhash_dl_info(struct dl_info *dli)
16797+{
16798+ vxdprintk(VXD_CBIT(dlim, 6),
16799+ "__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
16800+ hlist_del_rcu(&dli->dl_hlist);
16801+ put_dl_info(dli);
16802+}
3bac966d 16803+
3bac966d 16804+
4bf69007 16805+/* __lookup_dl_info()
3bac966d 16806+
4bf69007
AM
16807+ * requires the rcu_read_lock()
16808+ * doesn't increment the dl_refcnt */
3bac966d 16809+
61333608 16810+static inline struct dl_info *__lookup_dl_info(struct super_block *sb, vtag_t tag)
4bf69007
AM
16811+{
16812+ struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
4bf69007 16813+ struct dl_info *dli;
3bac966d 16814+
b00e13aa
AM
16815+ hlist_for_each_entry_rcu(dli, head, dl_hlist) {
16816+ if (dli->dl_tag == tag && dli->dl_sb == sb)
4bf69007 16817+ return dli;
d33d7b00 16818+ }
4bf69007
AM
16819+ return NULL;
16820+}
3bac966d 16821+
3bac966d 16822+
61333608 16823+struct dl_info *locate_dl_info(struct super_block *sb, vtag_t tag)
4bf69007
AM
16824+{
16825+ struct dl_info *dli;
16826+
16827+ rcu_read_lock();
16828+ dli = get_dl_info(__lookup_dl_info(sb, tag));
16829+ vxdprintk(VXD_CBIT(dlim, 7),
16830+ "locate_dl_info(%p,#%d) = %p", sb, tag, dli);
16831+ rcu_read_unlock();
16832+ return dli;
d33d7b00 16833+}
3bac966d 16834+
4bf69007 16835+void rcu_free_dl_info(struct rcu_head *head)
d33d7b00 16836+{
4bf69007
AM
16837+ struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
16838+ int usecnt, refcnt;
3bac966d 16839+
4bf69007 16840+ BUG_ON(!dli || !head);
3bac966d 16841+
4bf69007
AM
16842+ usecnt = atomic_read(&dli->dl_usecnt);
16843+ BUG_ON(usecnt < 0);
3bac966d 16844+
4bf69007
AM
16845+ refcnt = atomic_read(&dli->dl_refcnt);
16846+ BUG_ON(refcnt < 0);
16847+
16848+ vxdprintk(VXD_CBIT(dlim, 3),
16849+ "rcu_free_dl_info(%p)", dli);
16850+ if (!usecnt)
16851+ __dealloc_dl_info(dli);
16852+ else
16853+ printk("!!! rcu didn't free\n");
d33d7b00 16854+}
3bac966d 16855+
3bac966d 16856+
4bf69007
AM
16857+
16858+
16859+static int do_addrem_dlimit(uint32_t id, const char __user *name,
16860+ uint32_t flags, int add)
d33d7b00
AM
16861+{
16862+ struct path path;
d33d7b00 16863+ int ret;
3bac966d 16864+
4bf69007 16865+ ret = user_lpath(name, &path);
d33d7b00 16866+ if (!ret) {
4bf69007
AM
16867+ struct super_block *sb;
16868+ struct dl_info *dli;
16869+
16870+ ret = -EINVAL;
16871+ if (!path.dentry->d_inode)
16872+ goto out_release;
16873+ if (!(sb = path.dentry->d_inode->i_sb))
16874+ goto out_release;
16875+
16876+ if (add) {
16877+ dli = __alloc_dl_info(sb, id);
16878+ spin_lock(&dl_info_hash_lock);
16879+
16880+ ret = -EEXIST;
16881+ if (__lookup_dl_info(sb, id))
16882+ goto out_unlock;
16883+ __hash_dl_info(dli);
16884+ dli = NULL;
16885+ } else {
16886+ spin_lock(&dl_info_hash_lock);
16887+ dli = __lookup_dl_info(sb, id);
16888+
16889+ ret = -ESRCH;
16890+ if (!dli)
16891+ goto out_unlock;
16892+ __unhash_dl_info(dli);
16893+ }
16894+ ret = 0;
16895+ out_unlock:
16896+ spin_unlock(&dl_info_hash_lock);
16897+ if (add && dli)
16898+ __dealloc_dl_info(dli);
16899+ out_release:
d33d7b00
AM
16900+ path_put(&path);
16901+ }
d33d7b00
AM
16902+ return ret;
16903+}
3bac966d 16904+
4bf69007 16905+int vc_add_dlimit(uint32_t id, void __user *data)
d33d7b00 16906+{
4bf69007 16907+ struct vcmd_ctx_dlimit_base_v0 vc_data;
3bac966d 16908+
d33d7b00
AM
16909+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16910+ return -EFAULT;
3bac966d 16911+
4bf69007
AM
16912+ return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
16913+}
3bac966d 16914+
4bf69007
AM
16915+int vc_rem_dlimit(uint32_t id, void __user *data)
16916+{
16917+ struct vcmd_ctx_dlimit_base_v0 vc_data;
3bac966d 16918+
4bf69007 16919+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d33d7b00 16920+ return -EFAULT;
4bf69007
AM
16921+
16922+ return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
d33d7b00 16923+}
3bac966d 16924+
4bf69007 16925+#ifdef CONFIG_COMPAT
3bac966d 16926+
4bf69007
AM
16927+int vc_add_dlimit_x32(uint32_t id, void __user *data)
16928+{
16929+ struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
3bac966d 16930+
4bf69007
AM
16931+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16932+ return -EFAULT;
d337f35e 16933+
4bf69007
AM
16934+ return do_addrem_dlimit(id,
16935+ compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
16936+}
d337f35e 16937+
4bf69007 16938+int vc_rem_dlimit_x32(uint32_t id, void __user *data)
d33d7b00 16939+{
4bf69007 16940+ struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
d337f35e 16941+
4bf69007
AM
16942+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16943+ return -EFAULT;
16944+
16945+ return do_addrem_dlimit(id,
16946+ compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
d33d7b00 16947+}
d337f35e 16948+
4bf69007
AM
16949+#endif /* CONFIG_COMPAT */
16950+
16951+
16952+static inline
16953+int do_set_dlimit(uint32_t id, const char __user *name,
16954+ uint32_t space_used, uint32_t space_total,
16955+ uint32_t inodes_used, uint32_t inodes_total,
16956+ uint32_t reserved, uint32_t flags)
d33d7b00 16957+{
4bf69007
AM
16958+ struct path path;
16959+ int ret;
ba86f833 16960+
4bf69007
AM
16961+ ret = user_lpath(name, &path);
16962+ if (!ret) {
16963+ struct super_block *sb;
16964+ struct dl_info *dli;
d337f35e 16965+
4bf69007
AM
16966+ ret = -EINVAL;
16967+ if (!path.dentry->d_inode)
16968+ goto out_release;
16969+ if (!(sb = path.dentry->d_inode->i_sb))
16970+ goto out_release;
d337f35e 16971+
4bf69007
AM
16972+ /* sanity checks */
16973+ if ((reserved != CDLIM_KEEP &&
16974+ reserved > 100) ||
16975+ (inodes_used != CDLIM_KEEP &&
16976+ inodes_used > inodes_total) ||
16977+ (space_used != CDLIM_KEEP &&
16978+ space_used > space_total))
16979+ goto out_release;
d337f35e 16980+
4bf69007
AM
16981+ ret = -ESRCH;
16982+ dli = locate_dl_info(sb, id);
16983+ if (!dli)
16984+ goto out_release;
ba86f833 16985+
4bf69007 16986+ spin_lock(&dli->dl_lock);
d337f35e 16987+
4bf69007
AM
16988+ if (inodes_used != CDLIM_KEEP)
16989+ dli->dl_inodes_used = inodes_used;
16990+ if (inodes_total != CDLIM_KEEP)
16991+ dli->dl_inodes_total = inodes_total;
16992+ if (space_used != CDLIM_KEEP)
16993+ dli->dl_space_used = dlimit_space_32to64(
16994+ space_used, flags, DLIMS_USED);
d337f35e 16995+
4bf69007
AM
16996+ if (space_total == CDLIM_INFINITY)
16997+ dli->dl_space_total = DLIM_INFINITY;
16998+ else if (space_total != CDLIM_KEEP)
16999+ dli->dl_space_total = dlimit_space_32to64(
17000+ space_total, flags, DLIMS_TOTAL);
78865d5b 17001+
4bf69007
AM
17002+ if (reserved != CDLIM_KEEP)
17003+ dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
78865d5b 17004+
4bf69007 17005+ spin_unlock(&dli->dl_lock);
d337f35e 17006+
4bf69007
AM
17007+ put_dl_info(dli);
17008+ ret = 0;
d337f35e 17009+
4bf69007
AM
17010+ out_release:
17011+ path_put(&path);
17012+ }
17013+ return ret;
17014+}
d337f35e 17015+
4bf69007
AM
17016+int vc_set_dlimit(uint32_t id, void __user *data)
17017+{
17018+ struct vcmd_ctx_dlimit_v0 vc_data;
d337f35e 17019+
4bf69007
AM
17020+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17021+ return -EFAULT;
d337f35e 17022+
4bf69007
AM
17023+ return do_set_dlimit(id, vc_data.name,
17024+ vc_data.space_used, vc_data.space_total,
17025+ vc_data.inodes_used, vc_data.inodes_total,
17026+ vc_data.reserved, vc_data.flags);
17027+}
d337f35e 17028+
4bf69007 17029+#ifdef CONFIG_COMPAT
d337f35e 17030+
4bf69007
AM
17031+int vc_set_dlimit_x32(uint32_t id, void __user *data)
17032+{
17033+ struct vcmd_ctx_dlimit_v0_x32 vc_data;
d337f35e 17034+
4bf69007
AM
17035+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17036+ return -EFAULT;
d337f35e 17037+
4bf69007
AM
17038+ return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
17039+ vc_data.space_used, vc_data.space_total,
17040+ vc_data.inodes_used, vc_data.inodes_total,
17041+ vc_data.reserved, vc_data.flags);
17042+}
d337f35e 17043+
4bf69007 17044+#endif /* CONFIG_COMPAT */
d337f35e 17045+
d337f35e 17046+
4bf69007
AM
17047+static inline
17048+int do_get_dlimit(uint32_t id, const char __user *name,
17049+ uint32_t *space_used, uint32_t *space_total,
17050+ uint32_t *inodes_used, uint32_t *inodes_total,
17051+ uint32_t *reserved, uint32_t *flags)
17052+{
17053+ struct path path;
17054+ int ret;
d337f35e 17055+
4bf69007
AM
17056+ ret = user_lpath(name, &path);
17057+ if (!ret) {
17058+ struct super_block *sb;
17059+ struct dl_info *dli;
d337f35e 17060+
4bf69007
AM
17061+ ret = -EINVAL;
17062+ if (!path.dentry->d_inode)
17063+ goto out_release;
17064+ if (!(sb = path.dentry->d_inode->i_sb))
17065+ goto out_release;
d337f35e 17066+
4bf69007
AM
17067+ ret = -ESRCH;
17068+ dli = locate_dl_info(sb, id);
17069+ if (!dli)
17070+ goto out_release;
d337f35e 17071+
4bf69007
AM
17072+ spin_lock(&dli->dl_lock);
17073+ *inodes_used = dli->dl_inodes_used;
17074+ *inodes_total = dli->dl_inodes_total;
d337f35e 17075+
4bf69007
AM
17076+ *space_used = dlimit_space_64to32(
17077+ dli->dl_space_used, flags, DLIMS_USED);
d337f35e 17078+
4bf69007
AM
17079+ if (dli->dl_space_total == DLIM_INFINITY)
17080+ *space_total = CDLIM_INFINITY;
17081+ else
17082+ *space_total = dlimit_space_64to32(
17083+ dli->dl_space_total, flags, DLIMS_TOTAL);
d337f35e 17084+
4bf69007
AM
17085+ *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
17086+ spin_unlock(&dli->dl_lock);
d337f35e 17087+
4bf69007
AM
17088+ put_dl_info(dli);
17089+ ret = -EFAULT;
d337f35e 17090+
4bf69007
AM
17091+ ret = 0;
17092+ out_release:
17093+ path_put(&path);
17094+ }
17095+ return ret;
d337f35e
JR
17096+}
17097+
4bf69007
AM
17098+
17099+int vc_get_dlimit(uint32_t id, void __user *data)
d337f35e 17100+{
4bf69007 17101+ struct vcmd_ctx_dlimit_v0 vc_data;
d337f35e
JR
17102+ int ret;
17103+
2380c486 17104+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
17105+ return -EFAULT;
17106+
4bf69007
AM
17107+ ret = do_get_dlimit(id, vc_data.name,
17108+ &vc_data.space_used, &vc_data.space_total,
17109+ &vc_data.inodes_used, &vc_data.inodes_total,
17110+ &vc_data.reserved, &vc_data.flags);
d337f35e
JR
17111+ if (ret)
17112+ return ret;
17113+
2380c486 17114+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
d337f35e
JR
17115+ return -EFAULT;
17116+ return 0;
17117+}
17118+
4bf69007 17119+#ifdef CONFIG_COMPAT
d337f35e 17120+
4bf69007 17121+int vc_get_dlimit_x32(uint32_t id, void __user *data)
d337f35e 17122+{
4bf69007 17123+ struct vcmd_ctx_dlimit_v0_x32 vc_data;
d337f35e
JR
17124+ int ret;
17125+
2380c486 17126+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
17127+ return -EFAULT;
17128+
4bf69007
AM
17129+ ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
17130+ &vc_data.space_used, &vc_data.space_total,
17131+ &vc_data.inodes_used, &vc_data.inodes_total,
17132+ &vc_data.reserved, &vc_data.flags);
d337f35e
JR
17133+ if (ret)
17134+ return ret;
17135+
2380c486 17136+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
d337f35e
JR
17137+ return -EFAULT;
17138+ return 0;
17139+}
17140+
4bf69007 17141+#endif /* CONFIG_COMPAT */
ec22aa5c
AM
17142+
17143+
4bf69007 17144+void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
ec22aa5c 17145+{
4bf69007
AM
17146+ struct dl_info *dli;
17147+ __u64 blimit, bfree, bavail;
17148+ __u32 ifree;
ec22aa5c 17149+
4bf69007
AM
17150+ dli = locate_dl_info(sb, dx_current_tag());
17151+ if (!dli)
17152+ return;
ec22aa5c 17153+
4bf69007
AM
17154+ spin_lock(&dli->dl_lock);
17155+ if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
17156+ goto no_ilim;
ec22aa5c 17157+
4bf69007
AM
17158+ /* reduce max inodes available to limit */
17159+ if (buf->f_files > dli->dl_inodes_total)
17160+ buf->f_files = dli->dl_inodes_total;
ec22aa5c 17161+
4bf69007
AM
17162+ ifree = dli->dl_inodes_total - dli->dl_inodes_used;
17163+ /* reduce free inodes to min */
17164+ if (ifree < buf->f_ffree)
17165+ buf->f_ffree = ifree;
b2252bc2 17166+
4bf69007
AM
17167+no_ilim:
17168+ if (dli->dl_space_total == DLIM_INFINITY)
17169+ goto no_blim;
d337f35e 17170+
4bf69007 17171+ blimit = dli->dl_space_total >> sb->s_blocksize_bits;
d337f35e 17172+
4bf69007
AM
17173+ if (dli->dl_space_total < dli->dl_space_used)
17174+ bfree = 0;
17175+ else
17176+ bfree = (dli->dl_space_total - dli->dl_space_used)
17177+ >> sb->s_blocksize_bits;
d337f35e 17178+
4bf69007
AM
17179+ bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
17180+ if (bavail < dli->dl_space_used)
17181+ bavail = 0;
17182+ else
17183+ bavail = (bavail - dli->dl_space_used)
17184+ >> sb->s_blocksize_bits;
d337f35e 17185+
4bf69007
AM
17186+ /* reduce max space available to limit */
17187+ if (buf->f_blocks > blimit)
17188+ buf->f_blocks = blimit;
d337f35e 17189+
4bf69007
AM
17190+ /* reduce free space to min */
17191+ if (bfree < buf->f_bfree)
17192+ buf->f_bfree = bfree;
d337f35e 17193+
4bf69007
AM
17194+ /* reduce avail space to min */
17195+ if (bavail < buf->f_bavail)
17196+ buf->f_bavail = bavail;
d337f35e 17197+
4bf69007
AM
17198+no_blim:
17199+ spin_unlock(&dli->dl_lock);
17200+ put_dl_info(dli);
d337f35e 17201+
4bf69007 17202+ return;
d337f35e
JR
17203+}
17204+
4bf69007 17205+#include <linux/module.h>
d337f35e 17206+
4bf69007
AM
17207+EXPORT_SYMBOL_GPL(locate_dl_info);
17208+EXPORT_SYMBOL_GPL(rcu_free_dl_info);
e3afe727 17209+
8931d859
AM
17210diff -NurpP --minimal linux-4.4.161/kernel/vserver/helper.c linux-4.4.161-vs2.3.9.8/kernel/vserver/helper.c
17211--- linux-4.4.161/kernel/vserver/helper.c 1970-01-01 00:00:00.000000000 +0000
17212+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/helper.c 2018-10-20 04:57:21.000000000 +0000
09be7631 17213@@ -0,0 +1,242 @@
4bf69007
AM
17214+/*
17215+ * linux/kernel/vserver/helper.c
17216+ *
17217+ * Virtual Context Support
17218+ *
d6221c00 17219+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
17220+ *
17221+ * V0.01 basic helper
17222+ *
17223+ */
e3afe727 17224+
4bf69007
AM
17225+#include <linux/kmod.h>
17226+#include <linux/reboot.h>
17227+#include <linux/vs_context.h>
17228+#include <linux/vs_network.h>
17229+#include <linux/vserver/signal.h>
e3afe727 17230+
4bf69007
AM
17231+
17232+char vshelper_path[255] = "/sbin/vshelper";
17233+
17234+static int vshelper_init(struct subprocess_info *info, struct cred *new_cred)
17235+{
09be7631 17236+ current->flags &= ~PF_NO_SETAFFINITY;
4bf69007 17237+ return 0;
d337f35e
JR
17238+}
17239+
09be7631
JR
17240+static int vs_call_usermodehelper(char *path, char **argv, char **envp, int wait)
17241+{
17242+ struct subprocess_info *info;
17243+ gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
17244+
17245+ info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
17246+ vshelper_init, NULL, NULL);
17247+ if (info == NULL)
17248+ return -ENOMEM;
17249+
17250+ return call_usermodehelper_exec(info, wait);
17251+}
17252+
4bf69007 17253+static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
d337f35e 17254+{
4bf69007 17255+ int ret;
e3afe727 17256+
09be7631
JR
17257+ if ((ret = vs_call_usermodehelper(name, argv, envp,
17258+ sync ? UMH_WAIT_PROC : UMH_WAIT_EXEC))) {
4bf69007
AM
17259+ printk(KERN_WARNING "%s: (%s %s) returned %s with %d\n",
17260+ name, argv[1], argv[2],
17261+ sync ? "sync" : "async", ret);
17262+ }
17263+ vxdprintk(VXD_CBIT(switch, 4),
17264+ "%s: (%s %s) returned %s with %d",
17265+ name, argv[1], argv[2], sync ? "sync" : "async", ret);
17266+ return ret;
17267+}
e3afe727 17268+
4bf69007
AM
17269+/*
17270+ * vshelper path is set via /proc/sys
17271+ * invoked by vserver sys_reboot(), with
17272+ * the following arguments
17273+ *
17274+ * argv [0] = vshelper_path;
17275+ * argv [1] = action: "restart", "halt", "poweroff", ...
17276+ * argv [2] = context identifier
17277+ *
17278+ * envp [*] = type-specific parameters
17279+ */
e3afe727 17280+
4bf69007
AM
17281+long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
17282+{
17283+ char id_buf[8], cmd_buf[16];
17284+ char uid_buf[16], pid_buf[16];
17285+ int ret;
e3afe727 17286+
4bf69007
AM
17287+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
17288+ char *envp[] = {"HOME=/", "TERM=linux",
17289+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
17290+ uid_buf, pid_buf, cmd_buf, 0};
e3afe727 17291+
4bf69007
AM
17292+ if (vx_info_state(vxi, VXS_HELPER))
17293+ return -EAGAIN;
17294+ vxi->vx_state |= VXS_HELPER;
7b17263b 17295+
4bf69007 17296+ snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
d337f35e 17297+
4bf69007 17298+ snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
8ce283e1
AM
17299+ snprintf(uid_buf, sizeof(uid_buf), "VS_UID=%d",
17300+ from_kuid(&init_user_ns, current_uid()));
4bf69007 17301+ snprintf(pid_buf, sizeof(pid_buf), "VS_PID=%d", current->pid);
e3afe727 17302+
4bf69007
AM
17303+ switch (cmd) {
17304+ case LINUX_REBOOT_CMD_RESTART:
17305+ argv[1] = "restart";
17306+ break;
07a627a5 17307+
4bf69007
AM
17308+ case LINUX_REBOOT_CMD_HALT:
17309+ argv[1] = "halt";
17310+ break;
e3afe727 17311+
4bf69007
AM
17312+ case LINUX_REBOOT_CMD_POWER_OFF:
17313+ argv[1] = "poweroff";
17314+ break;
d337f35e 17315+
4bf69007
AM
17316+ case LINUX_REBOOT_CMD_SW_SUSPEND:
17317+ argv[1] = "swsusp";
17318+ break;
d337f35e 17319+
4bf69007
AM
17320+ case LINUX_REBOOT_CMD_OOM:
17321+ argv[1] = "oom";
17322+ break;
d337f35e 17323+
4bf69007
AM
17324+ default:
17325+ vxi->vx_state &= ~VXS_HELPER;
17326+ return 0;
d337f35e 17327+ }
4bf69007
AM
17328+
17329+ ret = do_vshelper(vshelper_path, argv, envp, 0);
17330+ vxi->vx_state &= ~VXS_HELPER;
17331+ __wakeup_vx_info(vxi);
17332+ return (ret) ? -EPERM : 0;
d337f35e
JR
17333+}
17334+
4bf69007
AM
17335+
17336+long vs_reboot(unsigned int cmd, void __user *arg)
d337f35e 17337+{
4bf69007
AM
17338+ struct vx_info *vxi = current_vx_info();
17339+ long ret = 0;
d337f35e 17340+
4bf69007
AM
17341+ vxdprintk(VXD_CBIT(misc, 5),
17342+ "vs_reboot(%p[#%d],%u)",
17343+ vxi, vxi ? vxi->vx_id : 0, cmd);
17344+
17345+ ret = vs_reboot_helper(vxi, cmd, arg);
17346+ if (ret)
17347+ return ret;
17348+
17349+ vxi->reboot_cmd = cmd;
17350+ if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
17351+ switch (cmd) {
17352+ case LINUX_REBOOT_CMD_RESTART:
17353+ case LINUX_REBOOT_CMD_HALT:
17354+ case LINUX_REBOOT_CMD_POWER_OFF:
17355+ vx_info_kill(vxi, 0, SIGKILL);
17356+ vx_info_kill(vxi, 1, SIGKILL);
17357+ default:
17358+ break;
17359+ }
d337f35e 17360+ }
4bf69007 17361+ return 0;
d337f35e
JR
17362+}
17363+
4bf69007
AM
17364+long vs_oom_action(unsigned int cmd)
17365+{
17366+ struct vx_info *vxi = current_vx_info();
17367+ long ret = 0;
d337f35e 17368+
4bf69007
AM
17369+ vxdprintk(VXD_CBIT(misc, 5),
17370+ "vs_oom_action(%p[#%d],%u)",
17371+ vxi, vxi ? vxi->vx_id : 0, cmd);
d337f35e 17372+
4bf69007
AM
17373+ ret = vs_reboot_helper(vxi, cmd, NULL);
17374+ if (ret)
17375+ return ret;
d337f35e 17376+
4bf69007
AM
17377+ vxi->reboot_cmd = cmd;
17378+ if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
17379+ vx_info_kill(vxi, 0, SIGKILL);
17380+ vx_info_kill(vxi, 1, SIGKILL);
17381+ }
17382+ return 0;
17383+}
d337f35e 17384+
4bf69007
AM
17385+/*
17386+ * argv [0] = vshelper_path;
17387+ * argv [1] = action: "startup", "shutdown"
17388+ * argv [2] = context identifier
17389+ *
17390+ * envp [*] = type-specific parameters
17391+ */
d337f35e 17392+
4bf69007 17393+long vs_state_change(struct vx_info *vxi, unsigned int cmd)
d337f35e 17394+{
4bf69007
AM
17395+ char id_buf[8], cmd_buf[16];
17396+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
17397+ char *envp[] = {"HOME=/", "TERM=linux",
17398+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
17399+
17400+ if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
17401+ return 0;
17402+
17403+ snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
17404+ snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17405+
17406+ switch (cmd) {
17407+ case VSC_STARTUP:
17408+ argv[1] = "startup";
17409+ break;
17410+ case VSC_SHUTDOWN:
17411+ argv[1] = "shutdown";
17412+ break;
17413+ default:
17414+ return 0;
17415+ }
17416+
17417+ return do_vshelper(vshelper_path, argv, envp, 1);
d337f35e
JR
17418+}
17419+
d337f35e 17420+
4bf69007
AM
17421+/*
17422+ * argv [0] = vshelper_path;
17423+ * argv [1] = action: "netup", "netdown"
17424+ * argv [2] = context identifier
17425+ *
17426+ * envp [*] = type-specific parameters
17427+ */
17428+
17429+long vs_net_change(struct nx_info *nxi, unsigned int cmd)
17430+{
17431+ char id_buf[8], cmd_buf[16];
17432+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
17433+ char *envp[] = {"HOME=/", "TERM=linux",
17434+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
17435+
17436+ if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
17437+ return 0;
17438+
17439+ snprintf(id_buf, sizeof(id_buf), "%d", nxi->nx_id);
17440+ snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17441+
17442+ switch (cmd) {
17443+ case VSC_NETUP:
17444+ argv[1] = "netup";
17445+ break;
17446+ case VSC_NETDOWN:
17447+ argv[1] = "netdown";
17448+ break;
17449+ default:
17450+ return 0;
17451+ }
17452+
17453+ return do_vshelper(vshelper_path, argv, envp, 1);
17454+}
d337f35e 17455+
8931d859
AM
17456diff -NurpP --minimal linux-4.4.161/kernel/vserver/history.c linux-4.4.161-vs2.3.9.8/kernel/vserver/history.c
17457--- linux-4.4.161/kernel/vserver/history.c 1970-01-01 00:00:00.000000000 +0000
17458+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/history.c 2018-10-20 04:57:21.000000000 +0000
4bf69007 17459@@ -0,0 +1,258 @@
d337f35e 17460+/*
4bf69007 17461+ * kernel/vserver/history.c
d337f35e 17462+ *
4bf69007 17463+ * Virtual Context History Backtrace
d337f35e 17464+ *
d6221c00 17465+ * Copyright (C) 2004-2007 Herbert P?tzl
d337f35e 17466+ *
4bf69007
AM
17467+ * V0.01 basic structure
17468+ * V0.02 hash/unhash and trace
17469+ * V0.03 preemption fixes
d337f35e
JR
17470+ *
17471+ */
17472+
4bf69007
AM
17473+#include <linux/module.h>
17474+#include <asm/uaccess.h>
d337f35e 17475+
4bf69007
AM
17476+#include <linux/vserver/context.h>
17477+#include <linux/vserver/debug.h>
17478+#include <linux/vserver/debug_cmd.h>
17479+#include <linux/vserver/history.h>
d337f35e
JR
17480+
17481+
4bf69007
AM
17482+#ifdef CONFIG_VSERVER_HISTORY
17483+#define VXH_SIZE CONFIG_VSERVER_HISTORY_SIZE
17484+#else
17485+#define VXH_SIZE 64
17486+#endif
d337f35e 17487+
4bf69007
AM
17488+struct _vx_history {
17489+ unsigned int counter;
2380c486 17490+
4bf69007
AM
17491+ struct _vx_hist_entry entry[VXH_SIZE + 1];
17492+};
2380c486 17493+
2380c486 17494+
4bf69007 17495+DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
2380c486 17496+
4bf69007 17497+unsigned volatile int vxh_active = 1;
2380c486 17498+
4bf69007 17499+static atomic_t sequence = ATOMIC_INIT(0);
2380c486 17500+
2380c486 17501+
4bf69007 17502+/* vxh_advance()
2380c486 17503+
4bf69007
AM
17504+ * requires disabled preemption */
17505+
17506+struct _vx_hist_entry *vxh_advance(void *loc)
2380c486 17507+{
4bf69007
AM
17508+ unsigned int cpu = smp_processor_id();
17509+ struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
17510+ struct _vx_hist_entry *entry;
17511+ unsigned int index;
17512+
17513+ index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
17514+ entry = &hist->entry[index];
17515+
17516+ entry->seq = atomic_inc_return(&sequence);
17517+ entry->loc = loc;
17518+ return entry;
2380c486
JR
17519+}
17520+
4bf69007 17521+EXPORT_SYMBOL_GPL(vxh_advance);
2380c486 17522+
2380c486 17523+
4bf69007 17524+#define VXH_LOC_FMTS "(#%04x,*%d):%p"
2380c486 17525+
4bf69007 17526+#define VXH_LOC_ARGS(e) (e)->seq, cpu, (e)->loc
2380c486 17527+
2380c486 17528+
4bf69007 17529+#define VXH_VXI_FMTS "%p[#%d,%d.%d]"
2380c486 17530+
4bf69007
AM
17531+#define VXH_VXI_ARGS(e) (e)->vxi.ptr, \
17532+ (e)->vxi.ptr ? (e)->vxi.xid : 0, \
17533+ (e)->vxi.ptr ? (e)->vxi.usecnt : 0, \
17534+ (e)->vxi.ptr ? (e)->vxi.tasks : 0
17535+
17536+void vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
2380c486 17537+{
4bf69007
AM
17538+ switch (e->type) {
17539+ case VXH_THROW_OOPS:
17540+ printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
17541+ break;
2380c486 17542+
4bf69007
AM
17543+ case VXH_GET_VX_INFO:
17544+ case VXH_PUT_VX_INFO:
17545+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
17546+ VXH_LOC_ARGS(e),
17547+ (e->type == VXH_GET_VX_INFO) ? "get" : "put",
17548+ VXH_VXI_ARGS(e));
17549+ break;
2380c486 17550+
4bf69007
AM
17551+ case VXH_INIT_VX_INFO:
17552+ case VXH_SET_VX_INFO:
17553+ case VXH_CLR_VX_INFO:
17554+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
17555+ VXH_LOC_ARGS(e),
17556+ (e->type == VXH_INIT_VX_INFO) ? "init" :
17557+ ((e->type == VXH_SET_VX_INFO) ? "set" : "clr"),
17558+ VXH_VXI_ARGS(e), e->sc.data);
17559+ break;
2380c486 17560+
4bf69007
AM
17561+ case VXH_CLAIM_VX_INFO:
17562+ case VXH_RELEASE_VX_INFO:
17563+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
17564+ VXH_LOC_ARGS(e),
17565+ (e->type == VXH_CLAIM_VX_INFO) ? "claim" : "release",
17566+ VXH_VXI_ARGS(e), e->sc.data);
17567+ break;
2380c486 17568+
4bf69007
AM
17569+ case VXH_ALLOC_VX_INFO:
17570+ case VXH_DEALLOC_VX_INFO:
17571+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
17572+ VXH_LOC_ARGS(e),
17573+ (e->type == VXH_ALLOC_VX_INFO) ? "alloc" : "dealloc",
17574+ VXH_VXI_ARGS(e));
17575+ break;
2380c486 17576+
4bf69007
AM
17577+ case VXH_HASH_VX_INFO:
17578+ case VXH_UNHASH_VX_INFO:
17579+ printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
17580+ VXH_LOC_ARGS(e),
17581+ (e->type == VXH_HASH_VX_INFO) ? "hash" : "unhash",
17582+ VXH_VXI_ARGS(e));
17583+ break;
2380c486 17584+
4bf69007
AM
17585+ case VXH_LOC_VX_INFO:
17586+ case VXH_LOOKUP_VX_INFO:
17587+ case VXH_CREATE_VX_INFO:
17588+ printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
17589+ VXH_LOC_ARGS(e),
17590+ (e->type == VXH_CREATE_VX_INFO) ? "create" :
17591+ ((e->type == VXH_LOC_VX_INFO) ? "loc" : "lookup"),
17592+ e->ll.arg, VXH_VXI_ARGS(e));
17593+ break;
2380c486
JR
17594+ }
17595+}
17596+
4bf69007
AM
17597+static void __vxh_dump_history(void)
17598+{
17599+ unsigned int i, cpu;
d337f35e 17600+
4bf69007
AM
17601+ printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
17602+ atomic_read(&sequence), NR_CPUS);
d337f35e 17603+
4bf69007
AM
17604+ for (i = 0; i < VXH_SIZE; i++) {
17605+ for_each_online_cpu(cpu) {
17606+ struct _vx_history *hist =
17607+ &per_cpu(vx_history_buffer, cpu);
17608+ unsigned int index = (hist->counter - i) % VXH_SIZE;
17609+ struct _vx_hist_entry *entry = &hist->entry[index];
d337f35e 17610+
4bf69007
AM
17611+ vxh_dump_entry(entry, cpu);
17612+ }
17613+ }
17614+}
d337f35e 17615+
4bf69007
AM
17616+void vxh_dump_history(void)
17617+{
17618+ vxh_active = 0;
17619+#ifdef CONFIG_SMP
17620+ local_irq_enable();
17621+ smp_send_stop();
17622+ local_irq_disable();
17623+#endif
17624+ __vxh_dump_history();
17625+}
d337f35e 17626+
d337f35e 17627+
4bf69007 17628+/* vserver syscall commands below here */
d337f35e 17629+
d337f35e 17630+
4bf69007
AM
17631+int vc_dump_history(uint32_t id)
17632+{
17633+ vxh_active = 0;
17634+ __vxh_dump_history();
17635+ vxh_active = 1;
2380c486 17636+
4bf69007 17637+ return 0;
d337f35e
JR
17638+}
17639+
d337f35e 17640+
4bf69007
AM
17641+int do_read_history(struct __user _vx_hist_entry *data,
17642+ int cpu, uint32_t *index, uint32_t *count)
d337f35e 17643+{
4bf69007
AM
17644+ int pos, ret = 0;
17645+ struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
17646+ int end = hist->counter;
17647+ int start = end - VXH_SIZE + 2;
17648+ int idx = *index;
d337f35e 17649+
4bf69007
AM
17650+ /* special case: get current pos */
17651+ if (!*count) {
17652+ *index = end;
17653+ return 0;
17654+ }
d337f35e 17655+
4bf69007
AM
17656+ /* have we lost some data? */
17657+ if (idx < start)
17658+ idx = start;
d337f35e 17659+
4bf69007
AM
17660+ for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
17661+ struct _vx_hist_entry *entry =
17662+ &hist->entry[idx % VXH_SIZE];
2380c486 17663+
4bf69007
AM
17664+ /* send entry to userspace */
17665+ ret = copy_to_user(&data[pos], entry, sizeof(*entry));
17666+ if (ret)
17667+ break;
17668+ }
17669+ /* save new index and count */
17670+ *index = idx;
17671+ *count = pos;
17672+ return ret ? ret : (*index < end);
d337f35e
JR
17673+}
17674+
4bf69007 17675+int vc_read_history(uint32_t id, void __user *data)
d337f35e 17676+{
4bf69007
AM
17677+ struct vcmd_read_history_v0 vc_data;
17678+ int ret;
d337f35e 17679+
4bf69007
AM
17680+ if (id >= NR_CPUS)
17681+ return -EINVAL;
d337f35e 17682+
4bf69007
AM
17683+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17684+ return -EFAULT;
d337f35e 17685+
4bf69007
AM
17686+ ret = do_read_history((struct __user _vx_hist_entry *)vc_data.data,
17687+ id, &vc_data.index, &vc_data.count);
d337f35e 17688+
4bf69007
AM
17689+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17690+ return -EFAULT;
17691+ return ret;
d337f35e
JR
17692+}
17693+
4bf69007 17694+#ifdef CONFIG_COMPAT
d337f35e 17695+
4bf69007 17696+int vc_read_history_x32(uint32_t id, void __user *data)
d337f35e 17697+{
4bf69007
AM
17698+ struct vcmd_read_history_v0_x32 vc_data;
17699+ int ret;
d337f35e 17700+
4bf69007
AM
17701+ if (id >= NR_CPUS)
17702+ return -EINVAL;
d337f35e 17703+
4bf69007
AM
17704+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17705+ return -EFAULT;
2380c486 17706+
4bf69007
AM
17707+ ret = do_read_history((struct __user _vx_hist_entry *)
17708+ compat_ptr(vc_data.data_ptr),
17709+ id, &vc_data.index, &vc_data.count);
d337f35e 17710+
4bf69007
AM
17711+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17712+ return -EFAULT;
17713+ return ret;
17714+}
d337f35e 17715+
4bf69007 17716+#endif /* CONFIG_COMPAT */
d337f35e 17717+
8931d859
AM
17718diff -NurpP --minimal linux-4.4.161/kernel/vserver/inet.c linux-4.4.161-vs2.3.9.8/kernel/vserver/inet.c
17719--- linux-4.4.161/kernel/vserver/inet.c 1970-01-01 00:00:00.000000000 +0000
17720+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/inet.c 2018-10-20 04:57:21.000000000 +0000
7a9e40b8 17721@@ -0,0 +1,236 @@
d337f35e 17722+
4bf69007
AM
17723+#include <linux/in.h>
17724+#include <linux/inetdevice.h>
17725+#include <linux/export.h>
17726+#include <linux/vs_inet.h>
17727+#include <linux/vs_inet6.h>
17728+#include <linux/vserver/debug.h>
17729+#include <net/route.h>
17730+#include <net/addrconf.h>
d337f35e
JR
17731+
17732+
4bf69007 17733+int nx_v4_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
d337f35e 17734+{
4bf69007
AM
17735+ int ret = 0;
17736+
17737+ if (!nxi1 || !nxi2 || nxi1 == nxi2)
17738+ ret = 1;
17739+ else {
17740+ struct nx_addr_v4 *ptr;
7a9e40b8 17741+ unsigned long irqflags;
d337f35e 17742+
7a9e40b8 17743+ spin_lock_irqsave(&nxi1->addr_lock, irqflags);
4bf69007
AM
17744+ for (ptr = &nxi1->v4; ptr; ptr = ptr->next) {
17745+ if (v4_nx_addr_in_nx_info(nxi2, ptr, -1)) {
17746+ ret = 1;
17747+ break;
17748+ }
17749+ }
7a9e40b8 17750+ spin_unlock_irqrestore(&nxi1->addr_lock, irqflags);
4bf69007 17751+ }
d337f35e 17752+
4bf69007
AM
17753+ vxdprintk(VXD_CBIT(net, 2),
17754+ "nx_v4_addr_conflict(%p,%p): %d",
17755+ nxi1, nxi2, ret);
d337f35e 17756+
4bf69007
AM
17757+ return ret;
17758+}
d337f35e 17759+
d337f35e 17760+
4bf69007
AM
17761+#ifdef CONFIG_IPV6
17762+
17763+int nx_v6_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
d337f35e 17764+{
4bf69007 17765+ int ret = 0;
d337f35e 17766+
4bf69007
AM
17767+ if (!nxi1 || !nxi2 || nxi1 == nxi2)
17768+ ret = 1;
17769+ else {
17770+ struct nx_addr_v6 *ptr;
7a9e40b8 17771+ unsigned long irqflags;
d337f35e 17772+
7a9e40b8 17773+ spin_lock_irqsave(&nxi1->addr_lock, irqflags);
4bf69007
AM
17774+ for (ptr = &nxi1->v6; ptr; ptr = ptr->next) {
17775+ if (v6_nx_addr_in_nx_info(nxi2, ptr, -1)) {
17776+ ret = 1;
17777+ break;
17778+ }
17779+ }
7a9e40b8 17780+ spin_unlock_irqrestore(&nxi1->addr_lock, irqflags);
4bf69007 17781+ }
d337f35e 17782+
4bf69007
AM
17783+ vxdprintk(VXD_CBIT(net, 2),
17784+ "nx_v6_addr_conflict(%p,%p): %d",
17785+ nxi1, nxi2, ret);
d337f35e 17786+
4bf69007
AM
17787+ return ret;
17788+}
d337f35e 17789+
4bf69007 17790+#endif
d337f35e 17791+
4bf69007 17792+int v4_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
d337f35e 17793+{
4bf69007
AM
17794+ struct in_device *in_dev;
17795+ struct in_ifaddr **ifap;
17796+ struct in_ifaddr *ifa;
17797+ int ret = 0;
d337f35e 17798+
4bf69007
AM
17799+ if (!dev)
17800+ goto out;
17801+ in_dev = in_dev_get(dev);
17802+ if (!in_dev)
17803+ goto out;
d337f35e 17804+
4bf69007
AM
17805+ for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
17806+ ifap = &ifa->ifa_next) {
17807+ if (v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW)) {
17808+ ret = 1;
17809+ break;
17810+ }
17811+ }
17812+ in_dev_put(in_dev);
17813+out:
17814+ return ret;
d337f35e
JR
17815+}
17816+
17817+
4bf69007 17818+#ifdef CONFIG_IPV6
d337f35e 17819+
4bf69007 17820+int v6_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
d337f35e 17821+{
4bf69007
AM
17822+ struct inet6_dev *in_dev;
17823+ struct inet6_ifaddr *ifa;
17824+ int ret = 0;
d337f35e 17825+
4bf69007
AM
17826+ if (!dev)
17827+ goto out;
17828+ in_dev = in6_dev_get(dev);
17829+ if (!in_dev)
17830+ goto out;
d337f35e 17831+
4bf69007
AM
17832+ // for (ifap = &in_dev->addr_list; (ifa = *ifap) != NULL;
17833+ list_for_each_entry(ifa, &in_dev->addr_list, if_list) {
17834+ if (v6_addr_in_nx_info(nxi, &ifa->addr, -1)) {
17835+ ret = 1;
17836+ break;
17837+ }
d337f35e 17838+ }
4bf69007
AM
17839+ in6_dev_put(in_dev);
17840+out:
17841+ return ret;
d337f35e
JR
17842+}
17843+
4bf69007 17844+#endif
d337f35e 17845+
4bf69007
AM
17846+int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
17847+{
17848+ int ret = 1;
d337f35e 17849+
4bf69007
AM
17850+ if (!nxi)
17851+ goto out;
17852+ if (nxi->v4.type && v4_dev_in_nx_info(dev, nxi))
17853+ goto out;
17854+#ifdef CONFIG_IPV6
17855+ ret = 2;
17856+ if (nxi->v6.type && v6_dev_in_nx_info(dev, nxi))
17857+ goto out;
17858+#endif
17859+ ret = 0;
17860+out:
17861+ vxdprintk(VXD_CBIT(net, 3),
17862+ "dev_in_nx_info(%p,%p[#%d]) = %d",
17863+ dev, nxi, nxi ? nxi->nx_id : 0, ret);
17864+ return ret;
17865+}
d337f35e 17866+
4bf69007
AM
17867+struct rtable *ip_v4_find_src(struct net *net, struct nx_info *nxi,
17868+ struct flowi4 *fl4)
d337f35e 17869+{
4bf69007 17870+ struct rtable *rt;
d337f35e 17871+
4bf69007
AM
17872+ if (!nxi)
17873+ return NULL;
d337f35e 17874+
4bf69007
AM
17875+ /* FIXME: handle lback only case */
17876+ if (!NX_IPV4(nxi))
17877+ return ERR_PTR(-EPERM);
d337f35e 17878+
4bf69007
AM
17879+ vxdprintk(VXD_CBIT(net, 4),
17880+ "ip_v4_find_src(%p[#%u]) " NIPQUAD_FMT " -> " NIPQUAD_FMT,
17881+ nxi, nxi ? nxi->nx_id : 0,
17882+ NIPQUAD(fl4->saddr), NIPQUAD(fl4->daddr));
d337f35e 17883+
4bf69007
AM
17884+ /* single IP is unconditional */
17885+ if (nx_info_flags(nxi, NXF_SINGLE_IP, 0) &&
17886+ (fl4->saddr == INADDR_ANY))
17887+ fl4->saddr = nxi->v4.ip[0].s_addr;
d337f35e 17888+
4bf69007
AM
17889+ if (fl4->saddr == INADDR_ANY) {
17890+ struct nx_addr_v4 *ptr;
17891+ __be32 found = 0;
17892+
17893+ rt = __ip_route_output_key(net, fl4);
17894+ if (!IS_ERR(rt)) {
17895+ found = fl4->saddr;
17896+ ip_rt_put(rt);
17897+ vxdprintk(VXD_CBIT(net, 4),
17898+ "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
17899+ nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(found));
17900+ if (v4_addr_in_nx_info(nxi, found, NXA_MASK_BIND))
17901+ goto found;
17902+ }
d337f35e 17903+
8d50a2ea 17904+ WARN_ON_ONCE(in_irq());
b00e13aa 17905+ spin_lock_bh(&nxi->addr_lock);
4bf69007
AM
17906+ for (ptr = &nxi->v4; ptr; ptr = ptr->next) {
17907+ __be32 primary = ptr->ip[0].s_addr;
17908+ __be32 mask = ptr->mask.s_addr;
17909+ __be32 neta = primary & mask;
d337f35e 17910+
4bf69007
AM
17911+ vxdprintk(VXD_CBIT(net, 4), "ip_v4_find_src(%p[#%u]) chk: "
17912+ NIPQUAD_FMT "/" NIPQUAD_FMT "/" NIPQUAD_FMT,
17913+ nxi, nxi ? nxi->nx_id : 0, NIPQUAD(primary),
17914+ NIPQUAD(mask), NIPQUAD(neta));
17915+ if ((found & mask) != neta)
17916+ continue;
d337f35e 17917+
4bf69007
AM
17918+ fl4->saddr = primary;
17919+ rt = __ip_route_output_key(net, fl4);
17920+ vxdprintk(VXD_CBIT(net, 4),
17921+ "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
17922+ nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(primary));
17923+ if (!IS_ERR(rt)) {
17924+ found = fl4->saddr;
17925+ ip_rt_put(rt);
17926+ if (found == primary)
5cb1760b 17927+ goto found_unlock;
4bf69007
AM
17928+ }
17929+ }
17930+ /* still no source ip? */
17931+ found = ipv4_is_loopback(fl4->daddr)
17932+ ? IPI_LOOPBACK : nxi->v4.ip[0].s_addr;
5cb1760b 17933+ found_unlock:
b00e13aa 17934+ spin_unlock_bh(&nxi->addr_lock);
4bf69007
AM
17935+ found:
17936+ /* assign src ip to flow */
17937+ fl4->saddr = found;
17938+
17939+ } else {
17940+ if (!v4_addr_in_nx_info(nxi, fl4->saddr, NXA_MASK_BIND))
17941+ return ERR_PTR(-EPERM);
17942+ }
d337f35e 17943+
4bf69007
AM
17944+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0)) {
17945+ if (ipv4_is_loopback(fl4->daddr))
17946+ fl4->daddr = nxi->v4_lback.s_addr;
17947+ if (ipv4_is_loopback(fl4->saddr))
17948+ fl4->saddr = nxi->v4_lback.s_addr;
17949+ } else if (ipv4_is_loopback(fl4->daddr) &&
17950+ !nx_info_flags(nxi, NXF_LBACK_ALLOW, 0))
17951+ return ERR_PTR(-EPERM);
d337f35e 17952+
4bf69007 17953+ return NULL;
d337f35e
JR
17954+}
17955+
4bf69007 17956+EXPORT_SYMBOL_GPL(ip_v4_find_src);
d337f35e 17957+
8931d859
AM
17958diff -NurpP --minimal linux-4.4.161/kernel/vserver/init.c linux-4.4.161-vs2.3.9.8/kernel/vserver/init.c
17959--- linux-4.4.161/kernel/vserver/init.c 1970-01-01 00:00:00.000000000 +0000
17960+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/init.c 2018-10-20 04:57:21.000000000 +0000
927ca606 17961@@ -0,0 +1,46 @@
4bf69007
AM
17962+/*
17963+ * linux/kernel/init.c
17964+ *
17965+ * Virtual Server Init
17966+ *
d6221c00 17967+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
17968+ *
17969+ * V0.01 basic structure
17970+ *
17971+ */
d337f35e 17972+
4bf69007 17973+#include <linux/init.h>
927ca606 17974+#include <linux/module.h>
4bf69007
AM
17975+
17976+int vserver_register_sysctl(void);
17977+void vserver_unregister_sysctl(void);
17978+
17979+
17980+static int __init init_vserver(void)
d337f35e 17981+{
4bf69007 17982+ int ret = 0;
d337f35e 17983+
4bf69007
AM
17984+#ifdef CONFIG_VSERVER_DEBUG
17985+ vserver_register_sysctl();
17986+#endif
17987+ return ret;
d337f35e
JR
17988+}
17989+
d337f35e 17990+
4bf69007 17991+static void __exit exit_vserver(void)
d337f35e 17992+{
d337f35e 17993+
4bf69007
AM
17994+#ifdef CONFIG_VSERVER_DEBUG
17995+ vserver_unregister_sysctl();
17996+#endif
17997+ return;
d337f35e
JR
17998+}
17999+
4bf69007
AM
18000+/* FIXME: GFP_ZONETYPES gone
18001+long vx_slab[GFP_ZONETYPES]; */
18002+long vx_area;
d337f35e 18003+
d337f35e 18004+
4bf69007
AM
18005+module_init(init_vserver);
18006+module_exit(exit_vserver);
d337f35e 18007+
8931d859
AM
18008diff -NurpP --minimal linux-4.4.161/kernel/vserver/inode.c linux-4.4.161-vs2.3.9.8/kernel/vserver/inode.c
18009--- linux-4.4.161/kernel/vserver/inode.c 1970-01-01 00:00:00.000000000 +0000
18010+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/inode.c 2018-10-20 04:57:21.000000000 +0000
09be7631 18011@@ -0,0 +1,440 @@
4bf69007
AM
18012+/*
18013+ * linux/kernel/vserver/inode.c
18014+ *
18015+ * Virtual Server: File System Support
18016+ *
d6221c00 18017+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
18018+ *
18019+ * V0.01 separated from vcontext V0.05
18020+ * V0.02 moved to tag (instead of xid)
18021+ *
18022+ */
d337f35e 18023+
4bf69007
AM
18024+#include <linux/tty.h>
18025+#include <linux/proc_fs.h>
18026+#include <linux/devpts_fs.h>
18027+#include <linux/fs.h>
18028+#include <linux/file.h>
18029+#include <linux/mount.h>
18030+#include <linux/parser.h>
18031+#include <linux/namei.h>
09be7631
JR
18032+#include <linux/magic.h>
18033+#include <linux/slab.h>
4bf69007
AM
18034+#include <linux/vserver/inode.h>
18035+#include <linux/vserver/inode_cmd.h>
18036+#include <linux/vs_base.h>
18037+#include <linux/vs_tag.h>
d337f35e 18038+
4bf69007 18039+#include <asm/uaccess.h>
09be7631 18040+#include <../../fs/proc/internal.h>
d337f35e 18041+
d337f35e 18042+
4bf69007 18043+static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
d337f35e 18044+{
4bf69007 18045+ struct proc_dir_entry *entry;
d337f35e 18046+
4bf69007
AM
18047+ if (!in || !in->i_sb)
18048+ return -ESRCH;
d337f35e 18049+
4bf69007
AM
18050+ *flags = IATTR_TAG
18051+ | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0)
18052+ | (IS_IXUNLINK(in) ? IATTR_IXUNLINK : 0)
18053+ | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
18054+ | (IS_COW(in) ? IATTR_COW : 0);
18055+ *mask = IATTR_IXUNLINK | IATTR_IMMUTABLE | IATTR_COW;
d337f35e 18056+
4bf69007
AM
18057+ if (S_ISDIR(in->i_mode))
18058+ *mask |= IATTR_BARRIER;
d337f35e 18059+
4bf69007
AM
18060+ if (IS_TAGGED(in)) {
18061+ *tag = i_tag_read(in);
18062+ *mask |= IATTR_TAG;
18063+ }
2380c486 18064+
4bf69007
AM
18065+ switch (in->i_sb->s_magic) {
18066+ case PROC_SUPER_MAGIC:
18067+ entry = PROC_I(in)->pde;
d337f35e 18068+
4bf69007
AM
18069+ /* check for specific inodes? */
18070+ if (entry)
18071+ *mask |= IATTR_FLAGS;
18072+ if (entry)
18073+ *flags |= (entry->vx_flags & IATTR_FLAGS);
18074+ else
18075+ *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
18076+ break;
d337f35e 18077+
4bf69007
AM
18078+ case DEVPTS_SUPER_MAGIC:
18079+ *tag = i_tag_read(in);
18080+ *mask |= IATTR_TAG;
18081+ break;
d337f35e 18082+
4bf69007
AM
18083+ default:
18084+ break;
18085+ }
18086+ return 0;
d337f35e
JR
18087+}
18088+
4bf69007 18089+int vc_get_iattr(void __user *data)
d337f35e 18090+{
4bf69007
AM
18091+ struct path path;
18092+ struct vcmd_ctx_iattr_v1 vc_data = { .tag = -1 };
18093+ int ret;
d337f35e 18094+
4bf69007
AM
18095+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18096+ return -EFAULT;
d337f35e 18097+
4bf69007
AM
18098+ ret = user_lpath(vc_data.name, &path);
18099+ if (!ret) {
18100+ ret = __vc_get_iattr(path.dentry->d_inode,
18101+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18102+ path_put(&path);
18103+ }
18104+ if (ret)
18105+ return ret;
d337f35e 18106+
4bf69007
AM
18107+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18108+ ret = -EFAULT;
18109+ return ret;
d337f35e
JR
18110+}
18111+
4bf69007 18112+#ifdef CONFIG_COMPAT
d337f35e 18113+
4bf69007 18114+int vc_get_iattr_x32(void __user *data)
d337f35e 18115+{
4bf69007
AM
18116+ struct path path;
18117+ struct vcmd_ctx_iattr_v1_x32 vc_data = { .tag = -1 };
18118+ int ret;
d337f35e 18119+
4bf69007
AM
18120+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18121+ return -EFAULT;
d337f35e 18122+
4bf69007
AM
18123+ ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
18124+ if (!ret) {
18125+ ret = __vc_get_iattr(path.dentry->d_inode,
18126+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18127+ path_put(&path);
18128+ }
18129+ if (ret)
18130+ return ret;
d337f35e 18131+
2380c486 18132+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
4bf69007
AM
18133+ ret = -EFAULT;
18134+ return ret;
d337f35e
JR
18135+}
18136+
4bf69007 18137+#endif /* CONFIG_COMPAT */
d337f35e 18138+
d337f35e 18139+
4bf69007 18140+int vc_fget_iattr(uint32_t fd, void __user *data)
d337f35e 18141+{
4bf69007
AM
18142+ struct file *filp;
18143+ struct vcmd_ctx_fiattr_v0 vc_data = { .tag = -1 };
d337f35e
JR
18144+ int ret;
18145+
4bf69007 18146+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
18147+ return -EFAULT;
18148+
4bf69007 18149+ filp = fget(fd);
927ca606 18150+ if (!filp || !filp->f_path.dentry || !filp->f_path.dentry->d_inode)
4bf69007 18151+ return -EBADF;
2380c486 18152+
927ca606 18153+ ret = __vc_get_iattr(filp->f_path.dentry->d_inode,
4bf69007 18154+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
2380c486 18155+
4bf69007 18156+ fput(filp);
2380c486 18157+
4bf69007
AM
18158+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18159+ ret = -EFAULT;
d337f35e
JR
18160+ return ret;
18161+}
18162+
18163+
4bf69007 18164+static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
2380c486 18165+{
4bf69007
AM
18166+ struct inode *in = de->d_inode;
18167+ int error = 0, is_proc = 0, has_tag = 0;
18168+ struct iattr attr = { 0 };
2380c486 18169+
4bf69007
AM
18170+ if (!in || !in->i_sb)
18171+ return -ESRCH;
2380c486 18172+
4bf69007
AM
18173+ is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
18174+ if ((*mask & IATTR_FLAGS) && !is_proc)
18175+ return -EINVAL;
2380c486 18176+
4bf69007
AM
18177+ has_tag = IS_TAGGED(in) ||
18178+ (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
18179+ if ((*mask & IATTR_TAG) && !has_tag)
18180+ return -EINVAL;
2380c486 18181+
4bf69007
AM
18182+ mutex_lock(&in->i_mutex);
18183+ if (*mask & IATTR_TAG) {
8ce283e1 18184+ attr.ia_tag = make_ktag(&init_user_ns, *tag);
4bf69007 18185+ attr.ia_valid |= ATTR_TAG;
2380c486
JR
18186+ }
18187+
4bf69007
AM
18188+ if (*mask & IATTR_FLAGS) {
18189+ struct proc_dir_entry *entry = PROC_I(in)->pde;
18190+ unsigned int iflags = PROC_I(in)->vx_flags;
2380c486 18191+
4bf69007
AM
18192+ iflags = (iflags & ~(*mask & IATTR_FLAGS))
18193+ | (*flags & IATTR_FLAGS);
18194+ PROC_I(in)->vx_flags = iflags;
18195+ if (entry)
18196+ entry->vx_flags = iflags;
18197+ }
9f7054f1 18198+
4bf69007
AM
18199+ if (*mask & (IATTR_IMMUTABLE | IATTR_IXUNLINK |
18200+ IATTR_BARRIER | IATTR_COW)) {
18201+ int iflags = in->i_flags;
18202+ int vflags = in->i_vflags;
9f7054f1 18203+
4bf69007
AM
18204+ if (*mask & IATTR_IMMUTABLE) {
18205+ if (*flags & IATTR_IMMUTABLE)
18206+ iflags |= S_IMMUTABLE;
18207+ else
18208+ iflags &= ~S_IMMUTABLE;
18209+ }
18210+ if (*mask & IATTR_IXUNLINK) {
18211+ if (*flags & IATTR_IXUNLINK)
18212+ iflags |= S_IXUNLINK;
18213+ else
18214+ iflags &= ~S_IXUNLINK;
18215+ }
18216+ if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
18217+ if (*flags & IATTR_BARRIER)
18218+ vflags |= V_BARRIER;
18219+ else
18220+ vflags &= ~V_BARRIER;
18221+ }
18222+ if (S_ISREG(in->i_mode) && (*mask & IATTR_COW)) {
18223+ if (*flags & IATTR_COW)
18224+ vflags |= V_COW;
18225+ else
18226+ vflags &= ~V_COW;
18227+ }
18228+ if (in->i_op && in->i_op->sync_flags) {
18229+ error = in->i_op->sync_flags(in, iflags, vflags);
18230+ if (error)
18231+ goto out;
18232+ }
18233+ }
9f7054f1 18234+
4bf69007
AM
18235+ if (attr.ia_valid) {
18236+ if (in->i_op && in->i_op->setattr)
18237+ error = in->i_op->setattr(de, &attr);
18238+ else {
18239+ error = inode_change_ok(in, &attr);
18240+ if (!error) {
18241+ setattr_copy(in, &attr);
18242+ mark_inode_dirty(in);
18243+ }
18244+ }
9f7054f1 18245+ }
9f7054f1 18246+
4bf69007
AM
18247+out:
18248+ mutex_unlock(&in->i_mutex);
18249+ return error;
18250+}
2380c486 18251+
4bf69007 18252+int vc_set_iattr(void __user *data)
d337f35e 18253+{
4bf69007
AM
18254+ struct path path;
18255+ struct vcmd_ctx_iattr_v1 vc_data;
18256+ int ret;
d337f35e 18257+
4bf69007
AM
18258+ if (!capable(CAP_LINUX_IMMUTABLE))
18259+ return -EPERM;
18260+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
18261+ return -EFAULT;
18262+
4bf69007
AM
18263+ ret = user_lpath(vc_data.name, &path);
18264+ if (!ret) {
18265+ ret = __vc_set_iattr(path.dentry,
18266+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18267+ path_put(&path);
d337f35e 18268+ }
4bf69007
AM
18269+
18270+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18271+ ret = -EFAULT;
d337f35e
JR
18272+ return ret;
18273+}
18274+
4bf69007
AM
18275+#ifdef CONFIG_COMPAT
18276+
18277+int vc_set_iattr_x32(void __user *data)
d337f35e 18278+{
4bf69007
AM
18279+ struct path path;
18280+ struct vcmd_ctx_iattr_v1_x32 vc_data;
18281+ int ret;
d337f35e 18282+
4bf69007
AM
18283+ if (!capable(CAP_LINUX_IMMUTABLE))
18284+ return -EPERM;
18285+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486
JR
18286+ return -EFAULT;
18287+
4bf69007
AM
18288+ ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
18289+ if (!ret) {
18290+ ret = __vc_set_iattr(path.dentry,
18291+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18292+ path_put(&path);
2380c486 18293+ }
4bf69007
AM
18294+
18295+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18296+ ret = -EFAULT;
18297+ return ret;
2380c486
JR
18298+}
18299+
4bf69007 18300+#endif /* CONFIG_COMPAT */
2380c486 18301+
4bf69007 18302+int vc_fset_iattr(uint32_t fd, void __user *data)
2380c486 18303+{
4bf69007
AM
18304+ struct file *filp;
18305+ struct vcmd_ctx_fiattr_v0 vc_data;
18306+ int ret;
2380c486 18307+
4bf69007
AM
18308+ if (!capable(CAP_LINUX_IMMUTABLE))
18309+ return -EPERM;
18310+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486
JR
18311+ return -EFAULT;
18312+
4bf69007 18313+ filp = fget(fd);
927ca606 18314+ if (!filp || !filp->f_path.dentry || !filp->f_path.dentry->d_inode)
4bf69007 18315+ return -EBADF;
2380c486 18316+
927ca606 18317+ ret = __vc_set_iattr(filp->f_path.dentry, &vc_data.tag,
4bf69007 18318+ &vc_data.flags, &vc_data.mask);
2380c486 18319+
4bf69007 18320+ fput(filp);
2380c486 18321+
4bf69007
AM
18322+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18323+ return -EFAULT;
18324+ return ret;
2380c486
JR
18325+}
18326+
2380c486 18327+
4bf69007 18328+enum { Opt_notagcheck, Opt_tag, Opt_notag, Opt_tagid, Opt_err };
2380c486 18329+
4bf69007
AM
18330+static match_table_t tokens = {
18331+ {Opt_notagcheck, "notagcheck"},
18332+#ifdef CONFIG_PROPAGATE
18333+ {Opt_notag, "notag"},
18334+ {Opt_tag, "tag"},
18335+ {Opt_tagid, "tagid=%u"},
18336+#endif
18337+ {Opt_err, NULL}
18338+};
2380c486 18339+
9f7054f1 18340+
4bf69007
AM
18341+static void __dx_parse_remove(char *string, char *opt)
18342+{
18343+ char *p = strstr(string, opt);
18344+ char *q = p;
2380c486 18345+
4bf69007
AM
18346+ if (p) {
18347+ while (*q != '\0' && *q != ',')
18348+ q++;
18349+ while (*q)
18350+ *p++ = *q++;
18351+ while (*p)
18352+ *p++ = '\0';
2380c486 18353+ }
2380c486
JR
18354+}
18355+
61333608 18356+int dx_parse_tag(char *string, vtag_t *tag, int remove, int *mnt_flags,
4bf69007 18357+ unsigned long *flags)
9f7054f1 18358+{
4bf69007
AM
18359+ int set = 0;
18360+ substring_t args[MAX_OPT_ARGS];
18361+ int token;
18362+ char *s, *p, *opts;
18363+#if defined(CONFIG_PROPAGATE) || defined(CONFIG_VSERVER_DEBUG)
18364+ int option = 0;
18365+#endif
9f7054f1 18366+
4bf69007
AM
18367+ if (!string)
18368+ return 0;
18369+ s = kstrdup(string, GFP_KERNEL | GFP_ATOMIC);
18370+ if (!s)
18371+ return 0;
9f7054f1 18372+
4bf69007
AM
18373+ opts = s;
18374+ while ((p = strsep(&opts, ",")) != NULL) {
18375+ token = match_token(p, tokens, args);
9f7054f1 18376+
4bf69007
AM
18377+ switch (token) {
18378+#ifdef CONFIG_PROPAGATE
18379+ case Opt_tag:
18380+ if (tag)
18381+ *tag = 0;
18382+ if (remove)
18383+ __dx_parse_remove(s, "tag");
18384+ *mnt_flags |= MNT_TAGID;
18385+ set |= MNT_TAGID;
18386+ break;
18387+ case Opt_notag:
18388+ if (remove)
18389+ __dx_parse_remove(s, "notag");
18390+ *mnt_flags |= MNT_NOTAG;
18391+ set |= MNT_NOTAG;
18392+ break;
18393+ case Opt_tagid:
18394+ if (tag && !match_int(args, &option))
18395+ *tag = option;
18396+ if (remove)
18397+ __dx_parse_remove(s, "tagid");
18398+ *mnt_flags |= MNT_TAGID;
18399+ set |= MNT_TAGID;
18400+ break;
18401+#endif /* CONFIG_PROPAGATE */
18402+ case Opt_notagcheck:
18403+ if (remove)
18404+ __dx_parse_remove(s, "notagcheck");
18405+ *flags |= MS_NOTAGCHECK;
18406+ set |= MS_NOTAGCHECK;
18407+ break;
18408+ }
18409+ vxdprintk(VXD_CBIT(tag, 7),
18410+ "dx_parse_tag(" VS_Q("%s") "): %d:#%d",
18411+ p, token, option);
18412+ }
18413+ if (set)
18414+ strcpy(string, s);
18415+ kfree(s);
18416+ return set;
9f7054f1 18417+}
2380c486 18418+
4bf69007 18419+#ifdef CONFIG_PROPAGATE
2380c486 18420+
4bf69007 18421+void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
2380c486 18422+{
61333608 18423+ vtag_t new_tag = 0;
4bf69007
AM
18424+ struct vfsmount *mnt;
18425+ int propagate;
2380c486 18426+
4bf69007
AM
18427+ if (!nd)
18428+ return;
18429+ mnt = nd->path.mnt;
18430+ if (!mnt)
18431+ return;
2380c486 18432+
4bf69007
AM
18433+ propagate = (mnt->mnt_flags & MNT_TAGID);
18434+ if (propagate)
18435+ new_tag = mnt->mnt_tag;
2380c486 18436+
4bf69007
AM
18437+ vxdprintk(VXD_CBIT(tag, 7),
18438+ "dx_propagate_tag(%p[#%lu.%d]): %d,%d",
18439+ inode, inode->i_ino, inode->i_tag,
18440+ new_tag, (propagate) ? 1 : 0);
18441+
18442+ if (propagate)
18443+ i_tag_write(inode, new_tag);
2380c486
JR
18444+}
18445+
4bf69007 18446+#include <linux/module.h>
2380c486 18447+
4bf69007 18448+EXPORT_SYMBOL_GPL(__dx_propagate_tag);
2380c486 18449+
4bf69007 18450+#endif /* CONFIG_PROPAGATE */
2380c486 18451+
8931d859
AM
18452diff -NurpP --minimal linux-4.4.161/kernel/vserver/limit.c linux-4.4.161-vs2.3.9.8/kernel/vserver/limit.c
18453--- linux-4.4.161/kernel/vserver/limit.c 1970-01-01 00:00:00.000000000 +0000
18454+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/limit.c 2018-10-20 04:57:21.000000000 +0000
1d9ad342 18455@@ -0,0 +1,386 @@
4bf69007
AM
18456+/*
18457+ * linux/kernel/vserver/limit.c
18458+ *
18459+ * Virtual Server: Context Limits
18460+ *
d6221c00 18461+ * Copyright (C) 2004-2010 Herbert P?tzl
4bf69007
AM
18462+ *
18463+ * V0.01 broken out from vcontext V0.05
18464+ * V0.02 changed vcmds to vxi arg
18465+ * V0.03 added memory cgroup support
18466+ *
18467+ */
2380c486 18468+
4bf69007
AM
18469+#include <linux/sched.h>
18470+#include <linux/module.h>
18471+#include <linux/memcontrol.h>
927ca606 18472+#include <linux/page_counter.h>
4bf69007
AM
18473+#include <linux/vs_limit.h>
18474+#include <linux/vserver/limit.h>
18475+#include <linux/vserver/limit_cmd.h>
2380c486 18476+
4bf69007 18477+#include <asm/uaccess.h>
d337f35e 18478+
d337f35e 18479+
4bf69007
AM
18480+const char *vlimit_name[NUM_LIMITS] = {
18481+ [RLIMIT_CPU] = "CPU",
18482+ [RLIMIT_NPROC] = "NPROC",
18483+ [RLIMIT_NOFILE] = "NOFILE",
18484+ [RLIMIT_LOCKS] = "LOCKS",
18485+ [RLIMIT_SIGPENDING] = "SIGP",
18486+ [RLIMIT_MSGQUEUE] = "MSGQ",
d337f35e 18487+
4bf69007
AM
18488+ [VLIMIT_NSOCK] = "NSOCK",
18489+ [VLIMIT_OPENFD] = "OPENFD",
18490+ [VLIMIT_SHMEM] = "SHMEM",
18491+ [VLIMIT_DENTRY] = "DENTRY",
18492+};
2380c486 18493+
4bf69007 18494+EXPORT_SYMBOL_GPL(vlimit_name);
2380c486 18495+
4bf69007 18496+#define MASK_ENTRY(x) (1 << (x))
d337f35e 18497+
4bf69007
AM
18498+const struct vcmd_ctx_rlimit_mask_v0 vlimit_mask = {
18499+ /* minimum */
18500+ 0
18501+ , /* softlimit */
18502+ 0
18503+ , /* maximum */
18504+ MASK_ENTRY( RLIMIT_NPROC ) |
18505+ MASK_ENTRY( RLIMIT_NOFILE ) |
18506+ MASK_ENTRY( RLIMIT_LOCKS ) |
18507+ MASK_ENTRY( RLIMIT_MSGQUEUE ) |
d337f35e 18508+
4bf69007
AM
18509+ MASK_ENTRY( VLIMIT_NSOCK ) |
18510+ MASK_ENTRY( VLIMIT_OPENFD ) |
18511+ MASK_ENTRY( VLIMIT_SHMEM ) |
18512+ MASK_ENTRY( VLIMIT_DENTRY ) |
18513+ 0
18514+};
18515+ /* accounting only */
18516+uint32_t account_mask =
18517+ MASK_ENTRY( VLIMIT_SEMARY ) |
18518+ MASK_ENTRY( VLIMIT_NSEMS ) |
18519+ MASK_ENTRY( VLIMIT_MAPPED ) |
18520+ 0;
d337f35e 18521+
4bf69007
AM
18522+
18523+static int is_valid_vlimit(int id)
18524+{
18525+ uint32_t mask = vlimit_mask.minimum |
18526+ vlimit_mask.softlimit | vlimit_mask.maximum;
18527+ return mask & (1 << id);
d337f35e
JR
18528+}
18529+
4bf69007 18530+static int is_accounted_vlimit(int id)
d337f35e 18531+{
4bf69007
AM
18532+ if (is_valid_vlimit(id))
18533+ return 1;
18534+ return account_mask & (1 << id);
18535+}
d337f35e 18536+
d337f35e 18537+
4bf69007
AM
18538+static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
18539+{
18540+ rlim_t limit = __rlim_soft(&vxi->limit, id);
18541+ return VX_VLIM(limit);
18542+}
d337f35e 18543+
4bf69007
AM
18544+static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
18545+{
18546+ rlim_t limit = __rlim_hard(&vxi->limit, id);
18547+ return VX_VLIM(limit);
18548+}
d337f35e 18549+
4bf69007
AM
18550+static int do_get_rlimit(struct vx_info *vxi, uint32_t id,
18551+ uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
18552+{
18553+ if (!is_valid_vlimit(id))
18554+ return -EINVAL;
18555+
18556+ if (minimum)
18557+ *minimum = CRLIM_UNSET;
18558+ if (softlimit)
18559+ *softlimit = vc_get_soft(vxi, id);
18560+ if (maximum)
18561+ *maximum = vc_get_hard(vxi, id);
d337f35e
JR
18562+ return 0;
18563+}
18564+
4bf69007 18565+int vc_get_rlimit(struct vx_info *vxi, void __user *data)
d337f35e 18566+{
4bf69007
AM
18567+ struct vcmd_ctx_rlimit_v0 vc_data;
18568+ int ret;
d337f35e 18569+
4bf69007
AM
18570+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18571+ return -EFAULT;
18572+
18573+ ret = do_get_rlimit(vxi, vc_data.id,
18574+ &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
18575+ if (ret)
18576+ return ret;
d337f35e 18577+
2380c486 18578+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
d337f35e
JR
18579+ return -EFAULT;
18580+ return 0;
18581+}
18582+
4bf69007
AM
18583+static int do_set_rlimit(struct vx_info *vxi, uint32_t id,
18584+ uint64_t minimum, uint64_t softlimit, uint64_t maximum)
d337f35e 18585+{
4bf69007
AM
18586+ if (!is_valid_vlimit(id))
18587+ return -EINVAL;
d337f35e 18588+
4bf69007
AM
18589+ if (maximum != CRLIM_KEEP)
18590+ __rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
18591+ if (softlimit != CRLIM_KEEP)
18592+ __rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
18593+
18594+ /* clamp soft limit */
18595+ if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
18596+ __rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
d337f35e 18597+
d337f35e
JR
18598+ return 0;
18599+}
18600+
4bf69007
AM
18601+int vc_set_rlimit(struct vx_info *vxi, void __user *data)
18602+{
18603+ struct vcmd_ctx_rlimit_v0 vc_data;
d337f35e 18604+
4bf69007
AM
18605+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18606+ return -EFAULT;
d337f35e 18607+
4bf69007
AM
18608+ return do_set_rlimit(vxi, vc_data.id,
18609+ vc_data.minimum, vc_data.softlimit, vc_data.maximum);
18610+}
d337f35e 18611+
4bf69007 18612+#ifdef CONFIG_IA32_EMULATION
2380c486 18613+
4bf69007
AM
18614+int vc_set_rlimit_x32(struct vx_info *vxi, void __user *data)
18615+{
18616+ struct vcmd_ctx_rlimit_v0_x32 vc_data;
d337f35e 18617+
4bf69007
AM
18618+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18619+ return -EFAULT;
d337f35e 18620+
4bf69007
AM
18621+ return do_set_rlimit(vxi, vc_data.id,
18622+ vc_data.minimum, vc_data.softlimit, vc_data.maximum);
18623+}
d337f35e 18624+
4bf69007
AM
18625+int vc_get_rlimit_x32(struct vx_info *vxi, void __user *data)
18626+{
18627+ struct vcmd_ctx_rlimit_v0_x32 vc_data;
18628+ int ret;
d337f35e 18629+
4bf69007
AM
18630+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18631+ return -EFAULT;
d337f35e 18632+
4bf69007
AM
18633+ ret = do_get_rlimit(vxi, vc_data.id,
18634+ &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
18635+ if (ret)
18636+ return ret;
2380c486 18637+
4bf69007
AM
18638+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18639+ return -EFAULT;
18640+ return 0;
2380c486 18641+}
d337f35e 18642+
4bf69007 18643+#endif /* CONFIG_IA32_EMULATION */
d337f35e
JR
18644+
18645+
4bf69007
AM
18646+int vc_get_rlimit_mask(uint32_t id, void __user *data)
18647+{
18648+ if (copy_to_user(data, &vlimit_mask, sizeof(vlimit_mask)))
18649+ return -EFAULT;
18650+ return 0;
18651+}
d337f35e
JR
18652+
18653+
4bf69007
AM
18654+static inline void vx_reset_hits(struct _vx_limit *limit)
18655+{
18656+ int lim;
d337f35e 18657+
4bf69007
AM
18658+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18659+ atomic_set(&__rlim_lhit(limit, lim), 0);
18660+ }
18661+}
d337f35e 18662+
4bf69007 18663+int vc_reset_hits(struct vx_info *vxi, void __user *data)
d337f35e 18664+{
4bf69007
AM
18665+ vx_reset_hits(&vxi->limit);
18666+ return 0;
d337f35e
JR
18667+}
18668+
4bf69007 18669+static inline void vx_reset_minmax(struct _vx_limit *limit)
d337f35e 18670+{
4bf69007
AM
18671+ rlim_t value;
18672+ int lim;
18673+
18674+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18675+ value = __rlim_get(limit, lim);
18676+ __rlim_rmax(limit, lim) = value;
18677+ __rlim_rmin(limit, lim) = value;
18678+ }
d337f35e
JR
18679+}
18680+
4bf69007 18681+int vc_reset_minmax(struct vx_info *vxi, void __user *data)
d337f35e 18682+{
4bf69007
AM
18683+ vx_reset_minmax(&vxi->limit);
18684+ return 0;
d337f35e
JR
18685+}
18686+
18687+
4bf69007 18688+int vc_rlimit_stat(struct vx_info *vxi, void __user *data)
d337f35e 18689+{
4bf69007
AM
18690+ struct vcmd_rlimit_stat_v0 vc_data;
18691+ struct _vx_limit *limit = &vxi->limit;
18692+ int id;
d337f35e 18693+
4bf69007
AM
18694+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18695+ return -EFAULT;
d337f35e 18696+
4bf69007
AM
18697+ id = vc_data.id;
18698+ if (!is_accounted_vlimit(id))
18699+ return -EINVAL;
2380c486 18700+
4bf69007
AM
18701+ vx_limit_fixup(limit, id);
18702+ vc_data.hits = atomic_read(&__rlim_lhit(limit, id));
18703+ vc_data.value = __rlim_get(limit, id);
18704+ vc_data.minimum = __rlim_rmin(limit, id);
18705+ vc_data.maximum = __rlim_rmax(limit, id);
2380c486 18706+
4bf69007
AM
18707+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18708+ return -EFAULT;
18709+ return 0;
d337f35e
JR
18710+}
18711+
d337f35e 18712+
927ca606 18713+#ifdef CONFIG_MEMCG
1d9ad342
AM
18714+
18715+void dump_sysinfo(struct sysinfo *si)
18716+{
18717+ printk(KERN_INFO "sysinfo: memunit=%u\n"
18718+ "\ttotalram:\t%lu\n"
18719+ "\tfreeram:\t%lu\n"
18720+ "\tsharedram:\t%lu\n"
18721+ "\tbufferram:\t%lu\n"
18722+ "\ttotalswap:\t%lu\n"
18723+ "\tfreeswap:\t%lu\n"
18724+ "\ttotalhigh:\t%lu\n"
18725+ "\tfreehigh:\t%lu\n",
18726+ si->mem_unit,
18727+ si->totalram,
18728+ si->freeram,
18729+ si->sharedram,
18730+ si->bufferram,
18731+ si->totalswap,
18732+ si->freeswap,
18733+ si->totalhigh,
18734+ si->freehigh);
18735+}
18736+
4bf69007 18737+void vx_vsi_meminfo(struct sysinfo *val)
d337f35e 18738+{
4bf69007 18739+ struct mem_cgroup *mcg;
1d9ad342
AM
18740+ unsigned long res_limit, res_usage;
18741+ unsigned shift;
18742+
18743+ if (VXD_CBIT(cvirt, 4))
18744+ dump_sysinfo(val);
d337f35e 18745+
4bf69007
AM
18746+ rcu_read_lock();
18747+ mcg = mem_cgroup_from_task(current);
1d9ad342
AM
18748+ if (VXD_CBIT(cvirt, 5))
18749+ dump_mem_cgroup(mcg);
4bf69007
AM
18750+ rcu_read_unlock();
18751+ if (!mcg)
18752+ goto out;
d337f35e 18753+
927ca606
AM
18754+ res_limit = mem_cgroup_mem_limit_pages(mcg);
18755+ res_usage = mem_cgroup_mem_usage_pages(mcg);
1d9ad342 18756+ shift = val->mem_unit == 1 ? PAGE_SHIFT : 0;
2380c486 18757+
927ca606 18758+ if (res_limit != PAGE_COUNTER_MAX)
1d9ad342
AM
18759+ val->totalram = res_limit << shift;
18760+ val->freeram = val->totalram - (res_usage << shift);
4bf69007
AM
18761+ val->bufferram = 0;
18762+ val->totalhigh = 0;
18763+ val->freehigh = 0;
18764+out:
4bf69007 18765+ return;
d337f35e
JR
18766+}
18767+
4bf69007 18768+void vx_vsi_swapinfo(struct sysinfo *val)
d337f35e 18769+{
4bf69007
AM
18770+#ifdef CONFIG_MEMCG_SWAP
18771+ struct mem_cgroup *mcg;
1d9ad342
AM
18772+ unsigned long res_limit, res_usage, memsw_limit, memsw_usage;
18773+ signed long swap_limit, swap_usage;
18774+ unsigned shift;
18775+
18776+ if (VXD_CBIT(cvirt, 6))
18777+ dump_sysinfo(val);
d337f35e 18778+
4bf69007
AM
18779+ rcu_read_lock();
18780+ mcg = mem_cgroup_from_task(current);
1d9ad342
AM
18781+ if (VXD_CBIT(cvirt, 7))
18782+ dump_mem_cgroup(mcg);
4bf69007
AM
18783+ rcu_read_unlock();
18784+ if (!mcg)
18785+ goto out;
d337f35e 18786+
927ca606 18787+ res_limit = mem_cgroup_mem_limit_pages(mcg);
d337f35e 18788+
4bf69007 18789+ /* memory unlimited */
927ca606 18790+ if (res_limit == PAGE_COUNTER_MAX)
4bf69007 18791+ goto out;
d337f35e 18792+
1d9ad342
AM
18793+ res_usage = mem_cgroup_mem_usage_pages(mcg);
18794+ memsw_limit = mem_cgroup_memsw_limit_pages(mcg);
18795+ memsw_usage = mem_cgroup_memsw_usage_pages(mcg);
18796+ shift = val->mem_unit == 1 ? PAGE_SHIFT : 0;
18797+
4bf69007
AM
18798+ swap_limit = memsw_limit - res_limit;
18799+ /* we have a swap limit? */
927ca606 18800+ if (memsw_limit != PAGE_COUNTER_MAX)
1d9ad342 18801+ val->totalswap = swap_limit << shift;
d337f35e 18802+
4bf69007
AM
18803+ /* calculate swap part */
18804+ swap_usage = (memsw_usage > res_usage) ?
18805+ memsw_usage - res_usage : 0;
18806+
18807+ /* total shown minus usage gives free swap */
18808+ val->freeswap = (swap_usage < swap_limit) ?
1d9ad342 18809+ val->totalswap - (swap_usage << shift) : 0;
4bf69007
AM
18810+out:
18811+#else /* !CONFIG_MEMCG_SWAP */
18812+ val->totalswap = 0;
18813+ val->freeswap = 0;
18814+#endif /* !CONFIG_MEMCG_SWAP */
4bf69007 18815+ return;
d337f35e
JR
18816+}
18817+
4bf69007 18818+long vx_vsi_cached(struct sysinfo *val)
d337f35e 18819+{
4bf69007 18820+ long cache = 0;
927ca606 18821+#ifdef CONFIG_MEMCG_BROKEN
4bf69007 18822+ struct mem_cgroup *mcg;
d337f35e 18823+
1d9ad342
AM
18824+ if (VXD_CBIT(cvirt, 8))
18825+ dump_sysinfo(val);
18826+
4bf69007
AM
18827+ rcu_read_lock();
18828+ mcg = mem_cgroup_from_task(current);
1d9ad342
AM
18829+ if (VXD_CBIT(cvirt, 9))
18830+ dump_mem_cgroup(mcg);
4bf69007
AM
18831+ rcu_read_unlock();
18832+ if (!mcg)
18833+ goto out;
2380c486 18834+
927ca606 18835+ // cache = mem_cgroup_stat_read_cache(mcg);
4bf69007 18836+out:
2380c486 18837+#endif
4bf69007 18838+ return cache;
d337f35e 18839+}
927ca606 18840+#endif /* !CONFIG_MEMCG */
d337f35e 18841+
8931d859
AM
18842diff -NurpP --minimal linux-4.4.161/kernel/vserver/limit_init.h linux-4.4.161-vs2.3.9.8/kernel/vserver/limit_init.h
18843--- linux-4.4.161/kernel/vserver/limit_init.h 1970-01-01 00:00:00.000000000 +0000
18844+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/limit_init.h 2018-10-20 04:57:21.000000000 +0000
4bf69007 18845@@ -0,0 +1,31 @@
d337f35e
JR
18846+
18847+
4bf69007
AM
18848+static inline void vx_info_init_limit(struct _vx_limit *limit)
18849+{
18850+ int lim;
d337f35e 18851+
4bf69007
AM
18852+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18853+ __rlim_soft(limit, lim) = RLIM_INFINITY;
18854+ __rlim_hard(limit, lim) = RLIM_INFINITY;
18855+ __rlim_set(limit, lim, 0);
18856+ atomic_set(&__rlim_lhit(limit, lim), 0);
18857+ __rlim_rmin(limit, lim) = 0;
18858+ __rlim_rmax(limit, lim) = 0;
18859+ }
18860+}
d337f35e 18861+
4bf69007 18862+static inline void vx_info_exit_limit(struct _vx_limit *limit)
d337f35e 18863+{
4bf69007
AM
18864+ rlim_t value;
18865+ int lim;
d337f35e 18866+
4bf69007
AM
18867+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18868+ if ((1 << lim) & VLIM_NOCHECK)
18869+ continue;
18870+ value = __rlim_get(limit, lim);
18871+ vxwprintk_xid(value,
18872+ "!!! limit: %p[%s,%d] = %ld on exit.",
18873+ limit, vlimit_name[lim], lim, (long)value);
18874+ }
18875+}
d337f35e 18876+
8931d859
AM
18877diff -NurpP --minimal linux-4.4.161/kernel/vserver/limit_proc.h linux-4.4.161-vs2.3.9.8/kernel/vserver/limit_proc.h
18878--- linux-4.4.161/kernel/vserver/limit_proc.h 1970-01-01 00:00:00.000000000 +0000
18879+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/limit_proc.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
18880@@ -0,0 +1,57 @@
18881+#ifndef _VX_LIMIT_PROC_H
18882+#define _VX_LIMIT_PROC_H
d337f35e 18883+
4bf69007 18884+#include <linux/vserver/limit_int.h>
d337f35e 18885+
d337f35e 18886+
4bf69007
AM
18887+#define VX_LIMIT_FMT ":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
18888+#define VX_LIMIT_TOP \
18889+ "Limit\t current\t min/max\t\t soft/hard\t\thits\n"
d337f35e 18890+
4bf69007
AM
18891+#define VX_LIMIT_ARG(r) \
18892+ (unsigned long)__rlim_get(limit, r), \
18893+ (unsigned long)__rlim_rmin(limit, r), \
18894+ (unsigned long)__rlim_rmax(limit, r), \
18895+ VX_VLIM(__rlim_soft(limit, r)), \
18896+ VX_VLIM(__rlim_hard(limit, r)), \
18897+ atomic_read(&__rlim_lhit(limit, r))
d337f35e 18898+
4bf69007
AM
18899+static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
18900+{
18901+ vx_limit_fixup(limit, -1);
18902+ return sprintf(buffer, VX_LIMIT_TOP
18903+ "PROC" VX_LIMIT_FMT
18904+ "VM" VX_LIMIT_FMT
18905+ "VML" VX_LIMIT_FMT
18906+ "RSS" VX_LIMIT_FMT
18907+ "ANON" VX_LIMIT_FMT
18908+ "RMAP" VX_LIMIT_FMT
18909+ "FILES" VX_LIMIT_FMT
18910+ "OFD" VX_LIMIT_FMT
18911+ "LOCKS" VX_LIMIT_FMT
18912+ "SOCK" VX_LIMIT_FMT
18913+ "MSGQ" VX_LIMIT_FMT
18914+ "SHM" VX_LIMIT_FMT
18915+ "SEMA" VX_LIMIT_FMT
18916+ "SEMS" VX_LIMIT_FMT
18917+ "DENT" VX_LIMIT_FMT,
18918+ VX_LIMIT_ARG(RLIMIT_NPROC),
18919+ VX_LIMIT_ARG(RLIMIT_AS),
18920+ VX_LIMIT_ARG(RLIMIT_MEMLOCK),
18921+ VX_LIMIT_ARG(RLIMIT_RSS),
18922+ VX_LIMIT_ARG(VLIMIT_ANON),
18923+ VX_LIMIT_ARG(VLIMIT_MAPPED),
18924+ VX_LIMIT_ARG(RLIMIT_NOFILE),
18925+ VX_LIMIT_ARG(VLIMIT_OPENFD),
18926+ VX_LIMIT_ARG(RLIMIT_LOCKS),
18927+ VX_LIMIT_ARG(VLIMIT_NSOCK),
18928+ VX_LIMIT_ARG(RLIMIT_MSGQUEUE),
18929+ VX_LIMIT_ARG(VLIMIT_SHMEM),
18930+ VX_LIMIT_ARG(VLIMIT_SEMARY),
18931+ VX_LIMIT_ARG(VLIMIT_NSEMS),
18932+ VX_LIMIT_ARG(VLIMIT_DENTRY));
d337f35e
JR
18933+}
18934+
4bf69007 18935+#endif /* _VX_LIMIT_PROC_H */
d337f35e 18936+
d337f35e 18937+
8931d859
AM
18938diff -NurpP --minimal linux-4.4.161/kernel/vserver/network.c linux-4.4.161-vs2.3.9.8/kernel/vserver/network.c
18939--- linux-4.4.161/kernel/vserver/network.c 1970-01-01 00:00:00.000000000 +0000
18940+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/network.c 2018-10-20 04:57:21.000000000 +0000
5cb1760b 18941@@ -0,0 +1,1053 @@
d337f35e 18942+/*
4bf69007 18943+ * linux/kernel/vserver/network.c
d337f35e 18944+ *
4bf69007
AM
18945+ * Virtual Server: Network Support
18946+ *
d6221c00 18947+ * Copyright (C) 2003-2007 Herbert P?tzl
4bf69007
AM
18948+ *
18949+ * V0.01 broken out from vcontext V0.05
18950+ * V0.02 cleaned up implementation
18951+ * V0.03 added equiv nx commands
18952+ * V0.04 switch to RCU based hash
18953+ * V0.05 and back to locking again
18954+ * V0.06 changed vcmds to nxi arg
18955+ * V0.07 have __create claim() the nxi
d337f35e 18956+ *
d337f35e 18957+ */
d337f35e 18958+
4bf69007
AM
18959+#include <linux/err.h>
18960+#include <linux/slab.h>
18961+#include <linux/rcupdate.h>
18962+#include <net/ipv6.h>
d337f35e 18963+
4bf69007
AM
18964+#include <linux/vs_network.h>
18965+#include <linux/vs_pid.h>
18966+#include <linux/vserver/network_cmd.h>
d337f35e
JR
18967+
18968+
4bf69007
AM
18969+atomic_t nx_global_ctotal = ATOMIC_INIT(0);
18970+atomic_t nx_global_cactive = ATOMIC_INIT(0);
d337f35e 18971+
4bf69007
AM
18972+static struct kmem_cache *nx_addr_v4_cachep = NULL;
18973+static struct kmem_cache *nx_addr_v6_cachep = NULL;
d337f35e 18974+
d337f35e 18975+
4bf69007 18976+static int __init init_network(void)
d337f35e 18977+{
4bf69007
AM
18978+ nx_addr_v4_cachep = kmem_cache_create("nx_v4_addr_cache",
18979+ sizeof(struct nx_addr_v4), 0,
18980+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
18981+ nx_addr_v6_cachep = kmem_cache_create("nx_v6_addr_cache",
18982+ sizeof(struct nx_addr_v6), 0,
18983+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
d337f35e
JR
18984+ return 0;
18985+}
18986+
18987+
4bf69007 18988+/* __alloc_nx_addr_v4() */
d337f35e 18989+
4bf69007 18990+static inline struct nx_addr_v4 *__alloc_nx_addr_v4(void)
d337f35e 18991+{
4bf69007
AM
18992+ struct nx_addr_v4 *nxa = kmem_cache_alloc(
18993+ nx_addr_v4_cachep, GFP_KERNEL);
92598135 18994+
4bf69007
AM
18995+ if (!IS_ERR(nxa))
18996+ memset(nxa, 0, sizeof(*nxa));
18997+ return nxa;
d337f35e
JR
18998+}
18999+
4bf69007 19000+/* __dealloc_nx_addr_v4() */
d337f35e 19001+
4bf69007
AM
19002+static inline void __dealloc_nx_addr_v4(struct nx_addr_v4 *nxa)
19003+{
19004+ kmem_cache_free(nx_addr_v4_cachep, nxa);
19005+}
d337f35e 19006+
4bf69007 19007+/* __dealloc_nx_addr_v4_all() */
d337f35e 19008+
4bf69007 19009+static inline void __dealloc_nx_addr_v4_all(struct nx_addr_v4 *nxa)
d337f35e 19010+{
4bf69007
AM
19011+ while (nxa) {
19012+ struct nx_addr_v4 *next = nxa->next;
d337f35e 19013+
4bf69007
AM
19014+ __dealloc_nx_addr_v4(nxa);
19015+ nxa = next;
19016+ }
19017+}
d337f35e 19018+
d337f35e 19019+
4bf69007 19020+#ifdef CONFIG_IPV6
d337f35e 19021+
4bf69007 19022+/* __alloc_nx_addr_v6() */
d337f35e 19023+
4bf69007
AM
19024+static inline struct nx_addr_v6 *__alloc_nx_addr_v6(void)
19025+{
19026+ struct nx_addr_v6 *nxa = kmem_cache_alloc(
19027+ nx_addr_v6_cachep, GFP_KERNEL);
d337f35e 19028+
4bf69007
AM
19029+ if (!IS_ERR(nxa))
19030+ memset(nxa, 0, sizeof(*nxa));
19031+ return nxa;
d337f35e
JR
19032+}
19033+
4bf69007
AM
19034+/* __dealloc_nx_addr_v6() */
19035+
19036+static inline void __dealloc_nx_addr_v6(struct nx_addr_v6 *nxa)
d337f35e 19037+{
4bf69007
AM
19038+ kmem_cache_free(nx_addr_v6_cachep, nxa);
19039+}
d337f35e 19040+
4bf69007 19041+/* __dealloc_nx_addr_v6_all() */
d337f35e 19042+
4bf69007
AM
19043+static inline void __dealloc_nx_addr_v6_all(struct nx_addr_v6 *nxa)
19044+{
19045+ while (nxa) {
19046+ struct nx_addr_v6 *next = nxa->next;
d337f35e 19047+
4bf69007
AM
19048+ __dealloc_nx_addr_v6(nxa);
19049+ nxa = next;
19050+ }
19051+}
d337f35e 19052+
4bf69007 19053+#endif /* CONFIG_IPV6 */
d337f35e 19054+
4bf69007 19055+/* __alloc_nx_info()
d337f35e 19056+
4bf69007
AM
19057+ * allocate an initialized nx_info struct
19058+ * doesn't make it visible (hash) */
d337f35e 19059+
61333608 19060+static struct nx_info *__alloc_nx_info(vnid_t nid)
d337f35e 19061+{
4bf69007 19062+ struct nx_info *new = NULL;
d337f35e 19063+
4bf69007 19064+ vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
d337f35e 19065+
4bf69007
AM
19066+ /* would this benefit from a slab cache? */
19067+ new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
19068+ if (!new)
19069+ return 0;
d337f35e 19070+
4bf69007
AM
19071+ memset(new, 0, sizeof(struct nx_info));
19072+ new->nx_id = nid;
19073+ INIT_HLIST_NODE(&new->nx_hlist);
19074+ atomic_set(&new->nx_usecnt, 0);
19075+ atomic_set(&new->nx_tasks, 0);
19076+ spin_lock_init(&new->addr_lock);
19077+ new->nx_state = 0;
d337f35e 19078+
4bf69007 19079+ new->nx_flags = NXF_INIT_SET;
d337f35e 19080+
4bf69007 19081+ /* rest of init goes here */
d337f35e 19082+
4bf69007
AM
19083+ new->v4_lback.s_addr = htonl(INADDR_LOOPBACK);
19084+ new->v4_bcast.s_addr = htonl(INADDR_BROADCAST);
19085+
19086+ vxdprintk(VXD_CBIT(nid, 0),
19087+ "alloc_nx_info(%d) = %p", nid, new);
19088+ atomic_inc(&nx_global_ctotal);
19089+ return new;
d337f35e
JR
19090+}
19091+
4bf69007 19092+/* __dealloc_nx_info()
d337f35e 19093+
4bf69007 19094+ * final disposal of nx_info */
d337f35e 19095+
4bf69007
AM
19096+static void __dealloc_nx_info(struct nx_info *nxi)
19097+{
19098+ vxdprintk(VXD_CBIT(nid, 0),
19099+ "dealloc_nx_info(%p)", nxi);
d337f35e 19100+
4bf69007
AM
19101+ nxi->nx_hlist.next = LIST_POISON1;
19102+ nxi->nx_id = -1;
d337f35e 19103+
4bf69007
AM
19104+ BUG_ON(atomic_read(&nxi->nx_usecnt));
19105+ BUG_ON(atomic_read(&nxi->nx_tasks));
19106+
19107+ __dealloc_nx_addr_v4_all(nxi->v4.next);
19108+#ifdef CONFIG_IPV6
19109+ __dealloc_nx_addr_v6_all(nxi->v6.next);
19110+#endif
19111+
19112+ nxi->nx_state |= NXS_RELEASED;
19113+ kfree(nxi);
19114+ atomic_dec(&nx_global_ctotal);
d337f35e
JR
19115+}
19116+
4bf69007
AM
19117+static void __shutdown_nx_info(struct nx_info *nxi)
19118+{
19119+ nxi->nx_state |= NXS_SHUTDOWN;
19120+ vs_net_change(nxi, VSC_NETDOWN);
19121+}
d337f35e 19122+
4bf69007 19123+/* exported stuff */
d337f35e 19124+
4bf69007
AM
19125+void free_nx_info(struct nx_info *nxi)
19126+{
19127+ /* context shutdown is mandatory */
19128+ BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
d337f35e 19129+
4bf69007
AM
19130+ /* context must not be hashed */
19131+ BUG_ON(nxi->nx_state & NXS_HASHED);
d337f35e 19132+
4bf69007
AM
19133+ BUG_ON(atomic_read(&nxi->nx_usecnt));
19134+ BUG_ON(atomic_read(&nxi->nx_tasks));
d337f35e 19135+
4bf69007
AM
19136+ __dealloc_nx_info(nxi);
19137+}
d337f35e 19138+
d337f35e 19139+
4bf69007
AM
19140+void __nx_set_lback(struct nx_info *nxi)
19141+{
19142+ int nid = nxi->nx_id;
19143+ __be32 lback = htonl(INADDR_LOOPBACK ^ ((nid & 0xFFFF) << 8));
d337f35e 19144+
4bf69007
AM
19145+ nxi->v4_lback.s_addr = lback;
19146+}
d337f35e 19147+
4bf69007
AM
19148+extern int __nx_inet_add_lback(__be32 addr);
19149+extern int __nx_inet_del_lback(__be32 addr);
d337f35e
JR
19150+
19151+
4bf69007 19152+/* hash table for nx_info hash */
d337f35e 19153+
4bf69007 19154+#define NX_HASH_SIZE 13
d337f35e 19155+
4bf69007
AM
19156+struct hlist_head nx_info_hash[NX_HASH_SIZE];
19157+
19158+static DEFINE_SPINLOCK(nx_info_hash_lock);
19159+
19160+
61333608 19161+static inline unsigned int __hashval(vnid_t nid)
d337f35e 19162+{
4bf69007 19163+ return (nid % NX_HASH_SIZE);
d337f35e
JR
19164+}
19165+
d337f35e 19166+
d337f35e 19167+
4bf69007 19168+/* __hash_nx_info()
d337f35e 19169+
4bf69007
AM
19170+ * add the nxi to the global hash table
19171+ * requires the hash_lock to be held */
19172+
19173+static inline void __hash_nx_info(struct nx_info *nxi)
d337f35e 19174+{
4bf69007 19175+ struct hlist_head *head;
d337f35e 19176+
4bf69007
AM
19177+ vxd_assert_lock(&nx_info_hash_lock);
19178+ vxdprintk(VXD_CBIT(nid, 4),
19179+ "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
d337f35e 19180+
4bf69007
AM
19181+ /* context must not be hashed */
19182+ BUG_ON(nx_info_state(nxi, NXS_HASHED));
d337f35e 19183+
4bf69007
AM
19184+ nxi->nx_state |= NXS_HASHED;
19185+ head = &nx_info_hash[__hashval(nxi->nx_id)];
19186+ hlist_add_head(&nxi->nx_hlist, head);
19187+ atomic_inc(&nx_global_cactive);
19188+}
d337f35e 19189+
4bf69007 19190+/* __unhash_nx_info()
d337f35e 19191+
4bf69007
AM
19192+ * remove the nxi from the global hash table
19193+ * requires the hash_lock to be held */
d337f35e 19194+
4bf69007
AM
19195+static inline void __unhash_nx_info(struct nx_info *nxi)
19196+{
19197+ vxd_assert_lock(&nx_info_hash_lock);
19198+ vxdprintk(VXD_CBIT(nid, 4),
19199+ "__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
19200+ atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
d337f35e 19201+
4bf69007
AM
19202+ /* context must be hashed */
19203+ BUG_ON(!nx_info_state(nxi, NXS_HASHED));
19204+ /* but without tasks */
19205+ BUG_ON(atomic_read(&nxi->nx_tasks));
d337f35e 19206+
4bf69007
AM
19207+ nxi->nx_state &= ~NXS_HASHED;
19208+ hlist_del(&nxi->nx_hlist);
19209+ atomic_dec(&nx_global_cactive);
d337f35e
JR
19210+}
19211+
d337f35e 19212+
4bf69007 19213+/* __lookup_nx_info()
d337f35e 19214+
4bf69007
AM
19215+ * requires the hash_lock to be held
19216+ * doesn't increment the nx_refcnt */
d337f35e 19217+
61333608 19218+static inline struct nx_info *__lookup_nx_info(vnid_t nid)
d337f35e 19219+{
4bf69007
AM
19220+ struct hlist_head *head = &nx_info_hash[__hashval(nid)];
19221+ struct hlist_node *pos;
19222+ struct nx_info *nxi;
d337f35e 19223+
4bf69007
AM
19224+ vxd_assert_lock(&nx_info_hash_lock);
19225+ hlist_for_each(pos, head) {
19226+ nxi = hlist_entry(pos, struct nx_info, nx_hlist);
19227+
19228+ if (nxi->nx_id == nid)
19229+ goto found;
d337f35e 19230+ }
4bf69007
AM
19231+ nxi = NULL;
19232+found:
19233+ vxdprintk(VXD_CBIT(nid, 0),
19234+ "__lookup_nx_info(#%u): %p[#%u]",
19235+ nid, nxi, nxi ? nxi->nx_id : 0);
19236+ return nxi;
d337f35e
JR
19237+}
19238+
19239+
4bf69007 19240+/* __create_nx_info()
d337f35e 19241+
4bf69007
AM
19242+ * create the requested context
19243+ * get(), claim() and hash it */
d337f35e 19244+
4bf69007
AM
19245+static struct nx_info *__create_nx_info(int id)
19246+{
19247+ struct nx_info *new, *nxi = NULL;
d337f35e 19248+
4bf69007 19249+ vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
d337f35e 19250+
4bf69007
AM
19251+ if (!(new = __alloc_nx_info(id)))
19252+ return ERR_PTR(-ENOMEM);
d337f35e 19253+
4bf69007
AM
19254+ /* required to make dynamic xids unique */
19255+ spin_lock(&nx_info_hash_lock);
d337f35e 19256+
4bf69007
AM
19257+ /* static context requested */
19258+ if ((nxi = __lookup_nx_info(id))) {
19259+ vxdprintk(VXD_CBIT(nid, 0),
19260+ "create_nx_info(%d) = %p (already there)", id, nxi);
19261+ if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
19262+ nxi = ERR_PTR(-EBUSY);
19263+ else
19264+ nxi = ERR_PTR(-EEXIST);
19265+ goto out_unlock;
19266+ }
19267+ /* new context */
19268+ vxdprintk(VXD_CBIT(nid, 0),
19269+ "create_nx_info(%d) = %p (new)", id, new);
19270+ claim_nx_info(new, NULL);
19271+ __nx_set_lback(new);
19272+ __hash_nx_info(get_nx_info(new));
19273+ nxi = new, new = NULL;
d337f35e 19274+
4bf69007
AM
19275+out_unlock:
19276+ spin_unlock(&nx_info_hash_lock);
19277+ if (new)
19278+ __dealloc_nx_info(new);
19279+ return nxi;
19280+}
d337f35e
JR
19281+
19282+
d337f35e 19283+
4bf69007 19284+/* exported stuff */
d337f35e 19285+
d337f35e 19286+
4bf69007
AM
19287+void unhash_nx_info(struct nx_info *nxi)
19288+{
19289+ __shutdown_nx_info(nxi);
19290+ spin_lock(&nx_info_hash_lock);
19291+ __unhash_nx_info(nxi);
19292+ spin_unlock(&nx_info_hash_lock);
d337f35e
JR
19293+}
19294+
4bf69007 19295+/* lookup_nx_info()
d337f35e 19296+
4bf69007
AM
19297+ * search for a nx_info and get() it
19298+ * negative id means current */
d337f35e 19299+
4bf69007 19300+struct nx_info *lookup_nx_info(int id)
d337f35e 19301+{
4bf69007 19302+ struct nx_info *nxi = NULL;
d337f35e 19303+
4bf69007
AM
19304+ if (id < 0) {
19305+ nxi = get_nx_info(current_nx_info());
19306+ } else if (id > 1) {
19307+ spin_lock(&nx_info_hash_lock);
19308+ nxi = get_nx_info(__lookup_nx_info(id));
19309+ spin_unlock(&nx_info_hash_lock);
d337f35e 19310+ }
4bf69007
AM
19311+ return nxi;
19312+}
d337f35e 19313+
4bf69007 19314+/* nid_is_hashed()
d337f35e 19315+
4bf69007
AM
19316+ * verify that nid is still hashed */
19317+
61333608 19318+int nid_is_hashed(vnid_t nid)
4bf69007
AM
19319+{
19320+ int hashed;
19321+
19322+ spin_lock(&nx_info_hash_lock);
19323+ hashed = (__lookup_nx_info(nid) != NULL);
19324+ spin_unlock(&nx_info_hash_lock);
19325+ return hashed;
d337f35e
JR
19326+}
19327+
19328+
4bf69007 19329+#ifdef CONFIG_PROC_FS
d337f35e 19330+
4bf69007
AM
19331+/* get_nid_list()
19332+
19333+ * get a subset of hashed nids for proc
19334+ * assumes size is at least one */
19335+
19336+int get_nid_list(int index, unsigned int *nids, int size)
d337f35e 19337+{
4bf69007 19338+ int hindex, nr_nids = 0;
d337f35e 19339+
4bf69007
AM
19340+ /* only show current and children */
19341+ if (!nx_check(0, VS_ADMIN | VS_WATCH)) {
19342+ if (index > 0)
19343+ return 0;
19344+ nids[nr_nids] = nx_current_nid();
19345+ return 1;
19346+ }
d337f35e 19347+
4bf69007
AM
19348+ for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
19349+ struct hlist_head *head = &nx_info_hash[hindex];
19350+ struct hlist_node *pos;
d337f35e 19351+
4bf69007
AM
19352+ spin_lock(&nx_info_hash_lock);
19353+ hlist_for_each(pos, head) {
19354+ struct nx_info *nxi;
19355+
19356+ if (--index > 0)
19357+ continue;
19358+
19359+ nxi = hlist_entry(pos, struct nx_info, nx_hlist);
19360+ nids[nr_nids] = nxi->nx_id;
19361+ if (++nr_nids >= size) {
19362+ spin_unlock(&nx_info_hash_lock);
d337f35e 19363+ goto out;
4bf69007 19364+ }
d337f35e 19365+ }
4bf69007
AM
19366+ /* keep the lock time short */
19367+ spin_unlock(&nx_info_hash_lock);
d337f35e
JR
19368+ }
19369+out:
4bf69007 19370+ return nr_nids;
d337f35e 19371+}
4bf69007 19372+#endif
d337f35e 19373+
4bf69007
AM
19374+
19375+/*
19376+ * migrate task to new network
19377+ * gets nxi, puts old_nxi on change
19378+ */
19379+
19380+int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
2380c486 19381+{
4bf69007
AM
19382+ struct nx_info *old_nxi;
19383+ int ret = 0;
2380c486 19384+
4bf69007
AM
19385+ if (!p || !nxi)
19386+ BUG();
d337f35e 19387+
4bf69007
AM
19388+ vxdprintk(VXD_CBIT(nid, 5),
19389+ "nx_migrate_task(%p,%p[#%d.%d.%d])",
19390+ p, nxi, nxi->nx_id,
19391+ atomic_read(&nxi->nx_usecnt),
19392+ atomic_read(&nxi->nx_tasks));
d337f35e 19393+
4bf69007
AM
19394+ if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
19395+ !nx_info_flags(nxi, NXF_STATE_SETUP, 0))
19396+ return -EACCES;
d337f35e 19397+
4bf69007
AM
19398+ if (nx_info_state(nxi, NXS_SHUTDOWN))
19399+ return -EFAULT;
d337f35e 19400+
4bf69007
AM
19401+ /* maybe disallow this completely? */
19402+ old_nxi = task_get_nx_info(p);
19403+ if (old_nxi == nxi)
19404+ goto out;
d337f35e 19405+
4bf69007
AM
19406+ task_lock(p);
19407+ if (old_nxi)
19408+ clr_nx_info(&p->nx_info);
19409+ claim_nx_info(nxi, p);
19410+ set_nx_info(&p->nx_info, nxi);
19411+ p->nid = nxi->nx_id;
19412+ task_unlock(p);
d337f35e 19413+
4bf69007
AM
19414+ vxdprintk(VXD_CBIT(nid, 5),
19415+ "moved task %p into nxi:%p[#%d]",
19416+ p, nxi, nxi->nx_id);
d337f35e 19417+
4bf69007
AM
19418+ if (old_nxi)
19419+ release_nx_info(old_nxi, p);
19420+ ret = 0;
19421+out:
19422+ put_nx_info(old_nxi);
19423+ return ret;
19424+}
d337f35e 19425+
d337f35e 19426+
4bf69007
AM
19427+void nx_set_persistent(struct nx_info *nxi)
19428+{
19429+ vxdprintk(VXD_CBIT(nid, 6),
19430+ "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
d337f35e 19431+
4bf69007
AM
19432+ get_nx_info(nxi);
19433+ claim_nx_info(nxi, NULL);
d337f35e
JR
19434+}
19435+
4bf69007 19436+void nx_clear_persistent(struct nx_info *nxi)
2380c486 19437+{
4bf69007
AM
19438+ vxdprintk(VXD_CBIT(nid, 6),
19439+ "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
2380c486 19440+
4bf69007
AM
19441+ release_nx_info(nxi, NULL);
19442+ put_nx_info(nxi);
2380c486 19443+}
d337f35e 19444+
4bf69007
AM
19445+void nx_update_persistent(struct nx_info *nxi)
19446+{
19447+ if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
19448+ nx_set_persistent(nxi);
19449+ else
19450+ nx_clear_persistent(nxi);
19451+}
d337f35e 19452+
4bf69007
AM
19453+/* vserver syscall commands below here */
19454+
19455+/* taks nid and nx_info functions */
d337f35e 19456+
4bf69007 19457+#include <asm/uaccess.h>
d337f35e
JR
19458+
19459+
4bf69007 19460+int vc_task_nid(uint32_t id)
d337f35e 19461+{
61333608 19462+ vnid_t nid;
d337f35e 19463+
4bf69007
AM
19464+ if (id) {
19465+ struct task_struct *tsk;
d337f35e 19466+
4bf69007
AM
19467+ rcu_read_lock();
19468+ tsk = find_task_by_real_pid(id);
19469+ nid = (tsk) ? tsk->nid : -ESRCH;
19470+ rcu_read_unlock();
19471+ } else
19472+ nid = nx_current_nid();
19473+ return nid;
d337f35e
JR
19474+}
19475+
19476+
4bf69007
AM
19477+int vc_nx_info(struct nx_info *nxi, void __user *data)
19478+{
19479+ struct vcmd_nx_info_v0 vc_data;
d337f35e 19480+
4bf69007 19481+ vc_data.nid = nxi->nx_id;
d337f35e 19482+
4bf69007
AM
19483+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19484+ return -EFAULT;
19485+ return 0;
19486+}
d337f35e 19487+
d337f35e 19488+
4bf69007 19489+/* network functions */
d337f35e 19490+
4bf69007
AM
19491+int vc_net_create(uint32_t nid, void __user *data)
19492+{
19493+ struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
19494+ struct nx_info *new_nxi;
19495+ int ret;
d337f35e 19496+
4bf69007
AM
19497+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19498+ return -EFAULT;
d337f35e 19499+
4bf69007
AM
19500+ if ((nid > MAX_S_CONTEXT) || (nid < 2))
19501+ return -EINVAL;
d337f35e 19502+
4bf69007
AM
19503+ new_nxi = __create_nx_info(nid);
19504+ if (IS_ERR(new_nxi))
19505+ return PTR_ERR(new_nxi);
d337f35e 19506+
4bf69007
AM
19507+ /* initial flags */
19508+ new_nxi->nx_flags = vc_data.flagword;
d337f35e 19509+
4bf69007
AM
19510+ ret = -ENOEXEC;
19511+ if (vs_net_change(new_nxi, VSC_NETUP))
19512+ goto out;
d337f35e 19513+
4bf69007
AM
19514+ ret = nx_migrate_task(current, new_nxi);
19515+ if (ret)
d337f35e
JR
19516+ goto out;
19517+
4bf69007
AM
19518+ /* return context id on success */
19519+ ret = new_nxi->nx_id;
d337f35e 19520+
4bf69007
AM
19521+ /* get a reference for persistent contexts */
19522+ if ((vc_data.flagword & NXF_PERSISTENT))
19523+ nx_set_persistent(new_nxi);
d337f35e 19524+out:
4bf69007
AM
19525+ release_nx_info(new_nxi, NULL);
19526+ put_nx_info(new_nxi);
19527+ return ret;
d337f35e
JR
19528+}
19529+
d337f35e 19530+
4bf69007
AM
19531+int vc_net_migrate(struct nx_info *nxi, void __user *data)
19532+{
19533+ return nx_migrate_task(current, nxi);
19534+}
d337f35e 19535+
2380c486 19536+
4bf69007
AM
19537+static inline
19538+struct nx_addr_v4 *__find_v4_addr(struct nx_info *nxi,
19539+ __be32 ip, __be32 ip2, __be32 mask, uint16_t type, uint16_t flags,
19540+ struct nx_addr_v4 **prev)
d337f35e 19541+{
4bf69007
AM
19542+ struct nx_addr_v4 *nxa = &nxi->v4;
19543+
19544+ for (; nxa; nxa = nxa->next) {
19545+ if ((nxa->ip[0].s_addr == ip) &&
19546+ (nxa->ip[1].s_addr == ip2) &&
19547+ (nxa->mask.s_addr == mask) &&
19548+ (nxa->type == type) &&
19549+ (nxa->flags == flags))
19550+ return nxa;
19551+
19552+ /* save previous entry */
19553+ if (prev)
19554+ *prev = nxa;
19555+ }
19556+ return NULL;
2380c486
JR
19557+}
19558+
4bf69007
AM
19559+int do_add_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
19560+ uint16_t type, uint16_t flags)
d337f35e 19561+{
4bf69007
AM
19562+ struct nx_addr_v4 *nxa = NULL;
19563+ struct nx_addr_v4 *new = __alloc_nx_addr_v4();
5cb1760b 19564+ unsigned long irqflags;
4bf69007 19565+ int ret = -EEXIST;
d337f35e 19566+
4bf69007
AM
19567+ if (IS_ERR(new))
19568+ return PTR_ERR(new);
d337f35e 19569+
5cb1760b 19570+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19571+ if (__find_v4_addr(nxi, ip, ip2, mask, type, flags, &nxa))
19572+ goto out_unlock;
2380c486 19573+
4bf69007
AM
19574+ if (NX_IPV4(nxi)) {
19575+ nxa->next = new;
19576+ nxa = new;
19577+ new = NULL;
19578+
19579+ /* remove single ip for ip list */
19580+ nxi->nx_flags &= ~NXF_SINGLE_IP;
19581+ }
19582+
19583+ nxa->ip[0].s_addr = ip;
19584+ nxa->ip[1].s_addr = ip2;
19585+ nxa->mask.s_addr = mask;
19586+ nxa->type = type;
19587+ nxa->flags = flags;
19588+ ret = 0;
19589+out_unlock:
5cb1760b 19590+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19591+ if (new)
19592+ __dealloc_nx_addr_v4(new);
19593+ return ret;
d337f35e
JR
19594+}
19595+
4bf69007
AM
19596+int do_remove_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
19597+ uint16_t type, uint16_t flags)
2380c486 19598+{
4bf69007
AM
19599+ struct nx_addr_v4 *nxa = NULL;
19600+ struct nx_addr_v4 *old = NULL;
5cb1760b 19601+ unsigned long irqflags;
4bf69007 19602+ int ret = 0;
2380c486 19603+
5cb1760b 19604+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19605+ switch (type) {
19606+ case NXA_TYPE_ADDR:
19607+ old = __find_v4_addr(nxi, ip, ip2, mask, type, flags, &nxa);
19608+ if (old) {
19609+ if (nxa) {
19610+ nxa->next = old->next;
19611+ old->next = NULL;
19612+ } else {
19613+ if (old->next) {
19614+ nxa = old;
19615+ old = old->next;
19616+ *nxa = *old;
19617+ old->next = NULL;
19618+ } else {
19619+ memset(old, 0, sizeof(*old));
19620+ old = NULL;
19621+ }
19622+ }
19623+ } else
19624+ ret = -ESRCH;
19625+ break;
2380c486 19626+
4bf69007
AM
19627+ case NXA_TYPE_ANY:
19628+ nxa = &nxi->v4;
19629+ old = nxa->next;
19630+ memset(nxa, 0, sizeof(*nxa));
19631+ break;
19632+
19633+ default:
19634+ ret = -EINVAL;
19635+ }
5cb1760b 19636+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19637+ __dealloc_nx_addr_v4_all(old);
19638+ return ret;
2380c486
JR
19639+}
19640+
4bf69007
AM
19641+
19642+int vc_net_add(struct nx_info *nxi, void __user *data)
2380c486 19643+{
4bf69007
AM
19644+ struct vcmd_net_addr_v0 vc_data;
19645+ int index, ret = 0;
2380c486 19646+
4bf69007 19647+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486
JR
19648+ return -EFAULT;
19649+
4bf69007
AM
19650+ switch (vc_data.type) {
19651+ case NXA_TYPE_IPV4:
19652+ if ((vc_data.count < 1) || (vc_data.count > 4))
19653+ return -EINVAL;
adc1caaa 19654+
4bf69007
AM
19655+ index = 0;
19656+ while (index < vc_data.count) {
19657+ ret = do_add_v4_addr(nxi, vc_data.ip[index].s_addr, 0,
19658+ vc_data.mask[index].s_addr, NXA_TYPE_ADDR, 0);
19659+ if (ret)
19660+ return ret;
19661+ index++;
19662+ }
19663+ ret = index;
19664+ break;
2380c486 19665+
4bf69007
AM
19666+ case NXA_TYPE_IPV4|NXA_MOD_BCAST:
19667+ nxi->v4_bcast = vc_data.ip[0];
19668+ ret = 1;
19669+ break;
2380c486 19670+
4bf69007
AM
19671+ case NXA_TYPE_IPV4|NXA_MOD_LBACK:
19672+ nxi->v4_lback = vc_data.ip[0];
19673+ ret = 1;
19674+ break;
19675+
19676+ default:
19677+ ret = -EINVAL;
19678+ break;
19679+ }
19680+ return ret;
19681+}
19682+
19683+int vc_net_remove(struct nx_info *nxi, void __user *data)
19684+{
19685+ struct vcmd_net_addr_v0 vc_data;
19686+
19687+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486 19688+ return -EFAULT;
4bf69007
AM
19689+
19690+ switch (vc_data.type) {
19691+ case NXA_TYPE_ANY:
19692+ return do_remove_v4_addr(nxi, 0, 0, 0, vc_data.type, 0);
19693+ default:
19694+ return -EINVAL;
19695+ }
2380c486
JR
19696+ return 0;
19697+}
19698+
d337f35e 19699+
4bf69007 19700+int vc_net_add_ipv4_v1(struct nx_info *nxi, void __user *data)
d337f35e 19701+{
4bf69007
AM
19702+ struct vcmd_net_addr_ipv4_v1 vc_data;
19703+
19704+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19705+ return -EFAULT;
19706+
19707+ switch (vc_data.type) {
19708+ case NXA_TYPE_ADDR:
19709+ case NXA_TYPE_MASK:
19710+ return do_add_v4_addr(nxi, vc_data.ip.s_addr, 0,
19711+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19712+
19713+ case NXA_TYPE_ADDR | NXA_MOD_BCAST:
19714+ nxi->v4_bcast = vc_data.ip;
19715+ break;
19716+
19717+ case NXA_TYPE_ADDR | NXA_MOD_LBACK:
19718+ nxi->v4_lback = vc_data.ip;
19719+ break;
19720+
19721+ default:
19722+ return -EINVAL;
19723+ }
19724+ return 0;
d337f35e
JR
19725+}
19726+
4bf69007 19727+int vc_net_add_ipv4(struct nx_info *nxi, void __user *data)
d337f35e 19728+{
4bf69007 19729+ struct vcmd_net_addr_ipv4_v2 vc_data;
d337f35e 19730+
4bf69007
AM
19731+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19732+ return -EFAULT;
19733+
19734+ switch (vc_data.type) {
19735+ case NXA_TYPE_ADDR:
19736+ case NXA_TYPE_MASK:
19737+ case NXA_TYPE_RANGE:
19738+ return do_add_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
19739+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19740+
19741+ case NXA_TYPE_ADDR | NXA_MOD_BCAST:
19742+ nxi->v4_bcast = vc_data.ip;
19743+ break;
19744+
19745+ case NXA_TYPE_ADDR | NXA_MOD_LBACK:
19746+ nxi->v4_lback = vc_data.ip;
19747+ break;
19748+
19749+ default:
19750+ return -EINVAL;
19751+ }
19752+ return 0;
d337f35e
JR
19753+}
19754+
4bf69007 19755+int vc_net_rem_ipv4_v1(struct nx_info *nxi, void __user *data)
d337f35e 19756+{
4bf69007
AM
19757+ struct vcmd_net_addr_ipv4_v1 vc_data;
19758+
19759+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19760+ return -EFAULT;
19761+
19762+ return do_remove_v4_addr(nxi, vc_data.ip.s_addr, 0,
19763+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
d337f35e
JR
19764+}
19765+
4bf69007 19766+int vc_net_rem_ipv4(struct nx_info *nxi, void __user *data)
d337f35e 19767+{
4bf69007
AM
19768+ struct vcmd_net_addr_ipv4_v2 vc_data;
19769+
19770+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19771+ return -EFAULT;
19772+
19773+ return do_remove_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
19774+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
d337f35e 19775+}
d337f35e 19776+
4bf69007 19777+#ifdef CONFIG_IPV6
d337f35e
JR
19778+
19779+static inline
4bf69007
AM
19780+struct nx_addr_v6 *__find_v6_addr(struct nx_info *nxi,
19781+ struct in6_addr *ip, struct in6_addr *mask,
19782+ uint32_t prefix, uint16_t type, uint16_t flags,
19783+ struct nx_addr_v6 **prev)
d337f35e 19784+{
4bf69007 19785+ struct nx_addr_v6 *nxa = &nxi->v6;
d337f35e 19786+
4bf69007
AM
19787+ for (; nxa; nxa = nxa->next) {
19788+ if (ipv6_addr_equal(&nxa->ip, ip) &&
19789+ ipv6_addr_equal(&nxa->mask, mask) &&
19790+ (nxa->prefix == prefix) &&
19791+ (nxa->type == type) &&
19792+ (nxa->flags == flags))
19793+ return nxa;
19794+
19795+ /* save previous entry */
19796+ if (prev)
19797+ *prev = nxa;
19798+ }
19799+ return NULL;
d337f35e
JR
19800+}
19801+
d337f35e 19802+
4bf69007
AM
19803+int do_add_v6_addr(struct nx_info *nxi,
19804+ struct in6_addr *ip, struct in6_addr *mask,
19805+ uint32_t prefix, uint16_t type, uint16_t flags)
19806+{
19807+ struct nx_addr_v6 *nxa = NULL;
19808+ struct nx_addr_v6 *new = __alloc_nx_addr_v6();
5cb1760b 19809+ unsigned long irqflags;
4bf69007 19810+ int ret = -EEXIST;
d337f35e 19811+
4bf69007
AM
19812+ if (IS_ERR(new))
19813+ return PTR_ERR(new);
d337f35e 19814+
5cb1760b 19815+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19816+ if (__find_v6_addr(nxi, ip, mask, prefix, type, flags, &nxa))
19817+ goto out_unlock;
d337f35e 19818+
4bf69007
AM
19819+ if (NX_IPV6(nxi)) {
19820+ nxa->next = new;
19821+ nxa = new;
19822+ new = NULL;
19823+ }
d337f35e 19824+
4bf69007
AM
19825+ nxa->ip = *ip;
19826+ nxa->mask = *mask;
19827+ nxa->prefix = prefix;
19828+ nxa->type = type;
19829+ nxa->flags = flags;
19830+ ret = 0;
19831+out_unlock:
5cb1760b 19832+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19833+ if (new)
19834+ __dealloc_nx_addr_v6(new);
19835+ return ret;
19836+}
d337f35e 19837+
4bf69007
AM
19838+int do_remove_v6_addr(struct nx_info *nxi,
19839+ struct in6_addr *ip, struct in6_addr *mask,
19840+ uint32_t prefix, uint16_t type, uint16_t flags)
d337f35e 19841+{
4bf69007
AM
19842+ struct nx_addr_v6 *nxa = NULL;
19843+ struct nx_addr_v6 *old = NULL;
5cb1760b 19844+ unsigned long irqflags;
4bf69007 19845+ int ret = 0;
d337f35e 19846+
5cb1760b 19847+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19848+ switch (type) {
19849+ case NXA_TYPE_ADDR:
19850+ old = __find_v6_addr(nxi, ip, mask, prefix, type, flags, &nxa);
19851+ if (old) {
19852+ if (nxa) {
19853+ nxa->next = old->next;
19854+ old->next = NULL;
19855+ } else {
19856+ if (old->next) {
19857+ nxa = old;
19858+ old = old->next;
19859+ *nxa = *old;
19860+ old->next = NULL;
19861+ } else {
19862+ memset(old, 0, sizeof(*old));
19863+ old = NULL;
19864+ }
19865+ }
19866+ } else
19867+ ret = -ESRCH;
19868+ break;
d337f35e 19869+
4bf69007
AM
19870+ case NXA_TYPE_ANY:
19871+ nxa = &nxi->v6;
19872+ old = nxa->next;
19873+ memset(nxa, 0, sizeof(*nxa));
d337f35e
JR
19874+ break;
19875+
d337f35e 19876+ default:
4bf69007 19877+ ret = -EINVAL;
d337f35e 19878+ }
5cb1760b 19879+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19880+ __dealloc_nx_addr_v6_all(old);
19881+ return ret;
d337f35e
JR
19882+}
19883+
4bf69007 19884+int vc_net_add_ipv6(struct nx_info *nxi, void __user *data)
d337f35e 19885+{
4bf69007 19886+ struct vcmd_net_addr_ipv6_v1 vc_data;
d337f35e 19887+
4bf69007 19888+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
19889+ return -EFAULT;
19890+
4bf69007
AM
19891+ switch (vc_data.type) {
19892+ case NXA_TYPE_ADDR:
19893+ memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
19894+ /* fallthrough */
19895+ case NXA_TYPE_MASK:
19896+ return do_add_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
19897+ vc_data.prefix, vc_data.type, vc_data.flags);
19898+ default:
19899+ return -EINVAL;
19900+ }
19901+ return 0;
19902+}
d337f35e 19903+
4bf69007
AM
19904+int vc_net_remove_ipv6(struct nx_info *nxi, void __user *data)
19905+{
19906+ struct vcmd_net_addr_ipv6_v1 vc_data;
19907+
19908+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19909+ return -EFAULT;
19910+
19911+ switch (vc_data.type) {
19912+ case NXA_TYPE_ADDR:
19913+ memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
19914+ /* fallthrough */
19915+ case NXA_TYPE_MASK:
19916+ return do_remove_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
19917+ vc_data.prefix, vc_data.type, vc_data.flags);
19918+ case NXA_TYPE_ANY:
19919+ return do_remove_v6_addr(nxi, NULL, NULL, 0, vc_data.type, 0);
19920+ default:
19921+ return -EINVAL;
19922+ }
19923+ return 0;
d337f35e
JR
19924+}
19925+
4bf69007 19926+#endif /* CONFIG_IPV6 */
d337f35e 19927+
4bf69007
AM
19928+
19929+int vc_get_nflags(struct nx_info *nxi, void __user *data)
d337f35e 19930+{
4bf69007 19931+ struct vcmd_net_flags_v0 vc_data;
d337f35e 19932+
4bf69007 19933+ vc_data.flagword = nxi->nx_flags;
d337f35e 19934+
4bf69007
AM
19935+ /* special STATE flag handling */
19936+ vc_data.mask = vs_mask_flags(~0ULL, nxi->nx_flags, NXF_ONE_TIME);
d337f35e 19937+
4bf69007
AM
19938+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19939+ return -EFAULT;
19940+ return 0;
d337f35e
JR
19941+}
19942+
4bf69007
AM
19943+int vc_set_nflags(struct nx_info *nxi, void __user *data)
19944+{
19945+ struct vcmd_net_flags_v0 vc_data;
19946+ uint64_t mask, trigger;
19947+
19948+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19949+ return -EFAULT;
d337f35e 19950+
4bf69007
AM
19951+ /* special STATE flag handling */
19952+ mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
19953+ trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
d337f35e 19954+
4bf69007
AM
19955+ nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
19956+ vc_data.flagword, mask);
19957+ if (trigger & NXF_PERSISTENT)
19958+ nx_update_persistent(nxi);
19959+
19960+ return 0;
19961+}
19962+
19963+int vc_get_ncaps(struct nx_info *nxi, void __user *data)
d337f35e 19964+{
4bf69007 19965+ struct vcmd_net_caps_v0 vc_data;
d337f35e 19966+
4bf69007
AM
19967+ vc_data.ncaps = nxi->nx_ncaps;
19968+ vc_data.cmask = ~0ULL;
d337f35e 19969+
2380c486 19970+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
4bf69007
AM
19971+ return -EFAULT;
19972+ return 0;
d337f35e
JR
19973+}
19974+
4bf69007
AM
19975+int vc_set_ncaps(struct nx_info *nxi, void __user *data)
19976+{
19977+ struct vcmd_net_caps_v0 vc_data;
19978+
19979+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19980+ return -EFAULT;
19981+
19982+ nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
19983+ vc_data.ncaps, vc_data.cmask);
19984+ return 0;
19985+}
19986+
19987+
19988+#include <linux/module.h>
19989+
19990+module_init(init_network);
19991+
19992+EXPORT_SYMBOL_GPL(free_nx_info);
19993+EXPORT_SYMBOL_GPL(unhash_nx_info);
19994+
8931d859
AM
19995diff -NurpP --minimal linux-4.4.161/kernel/vserver/proc.c linux-4.4.161-vs2.3.9.8/kernel/vserver/proc.c
19996--- linux-4.4.161/kernel/vserver/proc.c 1970-01-01 00:00:00.000000000 +0000
19997+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/proc.c 2018-10-20 04:57:21.000000000 +0000
8de2f54c 19998@@ -0,0 +1,1040 @@
d337f35e 19999+/*
4bf69007 20000+ * linux/kernel/vserver/proc.c
d337f35e 20001+ *
4bf69007 20002+ * Virtual Context Support
d337f35e 20003+ *
d6221c00 20004+ * Copyright (C) 2003-2011 Herbert P?tzl
d337f35e 20005+ *
4bf69007
AM
20006+ * V0.01 basic structure
20007+ * V0.02 adaptation vs1.3.0
20008+ * V0.03 proc permissions
20009+ * V0.04 locking/generic
20010+ * V0.05 next generation procfs
20011+ * V0.06 inode validation
20012+ * V0.07 generic rewrite vid
20013+ * V0.08 remove inode type
20014+ * V0.09 added u/wmask info
d337f35e
JR
20015+ *
20016+ */
20017+
4bf69007 20018+#include <linux/proc_fs.h>
ec22aa5c 20019+#include <linux/fs_struct.h>
4bf69007
AM
20020+#include <linux/mount.h>
20021+#include <linux/namei.h>
20022+#include <asm/unistd.h>
2380c486 20023+
d337f35e 20024+#include <linux/vs_context.h>
4bf69007
AM
20025+#include <linux/vs_network.h>
20026+#include <linux/vs_cvirt.h>
d337f35e 20027+
4bf69007
AM
20028+#include <linux/in.h>
20029+#include <linux/inetdevice.h>
20030+#include <linux/vs_inet.h>
20031+#include <linux/vs_inet6.h>
d337f35e 20032+
4bf69007 20033+#include <linux/vserver/global.h>
d337f35e 20034+
4bf69007
AM
20035+#include "cvirt_proc.h"
20036+#include "cacct_proc.h"
20037+#include "limit_proc.h"
20038+#include "sched_proc.h"
20039+#include "vci_config.h"
d337f35e 20040+
09be7631
JR
20041+#include <../../fs/proc/internal.h>
20042+
2380c486 20043+
4bf69007
AM
20044+static inline char *print_cap_t(char *buffer, kernel_cap_t *c)
20045+{
20046+ unsigned __capi;
2380c486 20047+
4bf69007
AM
20048+ CAP_FOR_EACH_U32(__capi) {
20049+ buffer += sprintf(buffer, "%08x",
20050+ c->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
20051+ }
20052+ return buffer;
20053+}
2380c486 20054+
d337f35e 20055+
4bf69007 20056+static struct proc_dir_entry *proc_virtual;
d337f35e 20057+
4bf69007 20058+static struct proc_dir_entry *proc_virtnet;
d337f35e 20059+
d337f35e 20060+
4bf69007 20061+/* first the actual feeds */
d337f35e 20062+
d337f35e 20063+
4bf69007
AM
20064+static int proc_vci(char *buffer)
20065+{
20066+ return sprintf(buffer,
20067+ "VCIVersion:\t%04x:%04x\n"
20068+ "VCISyscall:\t%d\n"
20069+ "VCIKernel:\t%08x\n",
20070+ VCI_VERSION >> 16,
20071+ VCI_VERSION & 0xFFFF,
20072+ __NR_vserver,
20073+ vci_kernel_config());
20074+}
d337f35e 20075+
4bf69007
AM
20076+static int proc_virtual_info(char *buffer)
20077+{
20078+ return proc_vci(buffer);
d337f35e
JR
20079+}
20080+
4bf69007
AM
20081+static int proc_virtual_status(char *buffer)
20082+{
20083+ return sprintf(buffer,
20084+ "#CTotal:\t%d\n"
20085+ "#CActive:\t%d\n"
20086+ "#NSProxy:\t%d\t%d %d %d %d %d %d\n"
20087+ "#InitTask:\t%d\t%d %d\n",
20088+ atomic_read(&vx_global_ctotal),
20089+ atomic_read(&vx_global_cactive),
20090+ atomic_read(&vs_global_nsproxy),
20091+ atomic_read(&vs_global_fs),
20092+ atomic_read(&vs_global_mnt_ns),
20093+ atomic_read(&vs_global_uts_ns),
20094+ atomic_read(&nr_ipc_ns),
20095+ atomic_read(&vs_global_user_ns),
20096+ atomic_read(&vs_global_pid_ns),
20097+ atomic_read(&init_task.usage),
20098+ atomic_read(&init_task.nsproxy->count),
20099+ init_task.fs->users);
20100+}
2380c486 20101+
2380c486 20102+
4bf69007 20103+int proc_vxi_info(struct vx_info *vxi, char *buffer)
d337f35e 20104+{
4bf69007 20105+ int length;
d337f35e 20106+
4bf69007
AM
20107+ length = sprintf(buffer,
20108+ "ID:\t%d\n"
20109+ "Info:\t%p\n"
20110+ "Init:\t%d\n"
20111+ "OOM:\t%lld\n",
20112+ vxi->vx_id,
20113+ vxi,
20114+ vxi->vx_initpid,
20115+ vxi->vx_badness_bias);
20116+ return length;
d337f35e
JR
20117+}
20118+
4bf69007 20119+int proc_vxi_status(struct vx_info *vxi, char *buffer)
d337f35e 20120+{
4bf69007 20121+ char *orig = buffer;
d337f35e 20122+
4bf69007
AM
20123+ buffer += sprintf(buffer,
20124+ "UseCnt:\t%d\n"
20125+ "Tasks:\t%d\n"
20126+ "Flags:\t%016llx\n",
20127+ atomic_read(&vxi->vx_usecnt),
20128+ atomic_read(&vxi->vx_tasks),
20129+ (unsigned long long)vxi->vx_flags);
d337f35e 20130+
4bf69007
AM
20131+ buffer += sprintf(buffer, "BCaps:\t");
20132+ buffer = print_cap_t(buffer, &vxi->vx_bcaps);
20133+ buffer += sprintf(buffer, "\n");
ab30d09f 20134+
4bf69007
AM
20135+ buffer += sprintf(buffer,
20136+ "CCaps:\t%016llx\n"
20137+ "Umask:\t%16llx\n"
20138+ "Wmask:\t%16llx\n"
20139+ "Spaces:\t%08lx %08lx\n",
20140+ (unsigned long long)vxi->vx_ccaps,
20141+ (unsigned long long)vxi->vx_umask,
20142+ (unsigned long long)vxi->vx_wmask,
20143+ vxi->space[0].vx_nsmask, vxi->space[1].vx_nsmask);
20144+ return buffer - orig;
20145+}
ab30d09f 20146+
4bf69007
AM
20147+int proc_vxi_limit(struct vx_info *vxi, char *buffer)
20148+{
20149+ return vx_info_proc_limit(&vxi->limit, buffer);
20150+}
d337f35e 20151+
4bf69007
AM
20152+int proc_vxi_sched(struct vx_info *vxi, char *buffer)
20153+{
20154+ int cpu, length;
d337f35e 20155+
4bf69007
AM
20156+ length = vx_info_proc_sched(&vxi->sched, buffer);
20157+ for_each_online_cpu(cpu) {
20158+ length += vx_info_proc_sched_pc(
20159+ &vx_per_cpu(vxi, sched_pc, cpu),
20160+ buffer + length, cpu);
ec22aa5c 20161+ }
4bf69007
AM
20162+ return length;
20163+}
ec22aa5c 20164+
4bf69007
AM
20165+int proc_vxi_nsproxy0(struct vx_info *vxi, char *buffer)
20166+{
20167+ return vx_info_proc_nsproxy(vxi->space[0].vx_nsproxy, buffer);
20168+}
d337f35e 20169+
4bf69007
AM
20170+int proc_vxi_nsproxy1(struct vx_info *vxi, char *buffer)
20171+{
20172+ return vx_info_proc_nsproxy(vxi->space[1].vx_nsproxy, buffer);
20173+}
ec22aa5c 20174+
4bf69007
AM
20175+int proc_vxi_cvirt(struct vx_info *vxi, char *buffer)
20176+{
20177+ int cpu, length;
d33d7b00 20178+
4bf69007
AM
20179+ vx_update_load(vxi);
20180+ length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
20181+ for_each_online_cpu(cpu) {
20182+ length += vx_info_proc_cvirt_pc(
20183+ &vx_per_cpu(vxi, cvirt_pc, cpu),
20184+ buffer + length, cpu);
3bac966d 20185+ }
4bf69007
AM
20186+ return length;
20187+}
3bac966d 20188+
4bf69007
AM
20189+int proc_vxi_cacct(struct vx_info *vxi, char *buffer)
20190+{
20191+ return vx_info_proc_cacct(&vxi->cacct, buffer);
d337f35e
JR
20192+}
20193+
20194+
4bf69007 20195+static int proc_virtnet_info(char *buffer)
d337f35e 20196+{
4bf69007
AM
20197+ return proc_vci(buffer);
20198+}
ab30d09f 20199+
4bf69007
AM
20200+static int proc_virtnet_status(char *buffer)
20201+{
20202+ return sprintf(buffer,
20203+ "#CTotal:\t%d\n"
20204+ "#CActive:\t%d\n",
20205+ atomic_read(&nx_global_ctotal),
20206+ atomic_read(&nx_global_cactive));
20207+}
d337f35e 20208+
4bf69007
AM
20209+int proc_nxi_info(struct nx_info *nxi, char *buffer)
20210+{
20211+ struct nx_addr_v4 *v4a;
20212+#ifdef CONFIG_IPV6
20213+ struct nx_addr_v6 *v6a;
20214+#endif
20215+ int length, i;
ab30d09f 20216+
4bf69007
AM
20217+ length = sprintf(buffer,
20218+ "ID:\t%d\n"
20219+ "Info:\t%p\n"
20220+ "Bcast:\t" NIPQUAD_FMT "\n"
20221+ "Lback:\t" NIPQUAD_FMT "\n",
20222+ nxi->nx_id,
20223+ nxi,
20224+ NIPQUAD(nxi->v4_bcast.s_addr),
20225+ NIPQUAD(nxi->v4_lback.s_addr));
ab30d09f 20226+
4bf69007
AM
20227+ if (!NX_IPV4(nxi))
20228+ goto skip_v4;
20229+ for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
20230+ length += sprintf(buffer + length, "%d:\t" NXAV4_FMT "\n",
20231+ i, NXAV4(v4a));
20232+skip_v4:
20233+#ifdef CONFIG_IPV6
20234+ if (!NX_IPV6(nxi))
20235+ goto skip_v6;
20236+ for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
20237+ length += sprintf(buffer + length, "%d:\t" NXAV6_FMT "\n",
20238+ i, NXAV6(v6a));
20239+skip_v6:
20240+#endif
20241+ return length;
20242+}
2380c486 20243+
4bf69007
AM
20244+int proc_nxi_status(struct nx_info *nxi, char *buffer)
20245+{
20246+ int length;
ec22aa5c 20247+
4bf69007
AM
20248+ length = sprintf(buffer,
20249+ "UseCnt:\t%d\n"
20250+ "Tasks:\t%d\n"
20251+ "Flags:\t%016llx\n"
20252+ "NCaps:\t%016llx\n",
20253+ atomic_read(&nxi->nx_usecnt),
20254+ atomic_read(&nxi->nx_tasks),
20255+ (unsigned long long)nxi->nx_flags,
20256+ (unsigned long long)nxi->nx_ncaps);
20257+ return length;
20258+}
ec22aa5c 20259+
ec22aa5c 20260+
d337f35e 20261+
4bf69007 20262+/* here the inode helpers */
d337f35e 20263+
4bf69007
AM
20264+struct vs_entry {
20265+ int len;
20266+ char *name;
20267+ mode_t mode;
20268+ struct inode_operations *iop;
20269+ struct file_operations *fop;
20270+ union proc_op op;
20271+};
d337f35e 20272+
4bf69007
AM
20273+static struct inode *vs_proc_make_inode(struct super_block *sb, struct vs_entry *p)
20274+{
20275+ struct inode *inode = new_inode(sb);
3bac966d 20276+
4bf69007
AM
20277+ if (!inode)
20278+ goto out;
3bac966d 20279+
4bf69007
AM
20280+ inode->i_mode = p->mode;
20281+ if (p->iop)
20282+ inode->i_op = p->iop;
20283+ if (p->fop)
20284+ inode->i_fop = p->fop;
3bac966d 20285+
4bf69007
AM
20286+ set_nlink(inode, (p->mode & S_IFDIR) ? 2 : 1);
20287+ inode->i_flags |= S_IMMUTABLE;
3bac966d 20288+
4bf69007 20289+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2380c486 20290+
8ce283e1
AM
20291+ i_uid_write(inode, 0);
20292+ i_gid_write(inode, 0);
20293+ i_tag_write(inode, 0);
4bf69007
AM
20294+out:
20295+ return inode;
d337f35e
JR
20296+}
20297+
4bf69007
AM
20298+static struct dentry *vs_proc_instantiate(struct inode *dir,
20299+ struct dentry *dentry, int id, void *ptr)
2380c486 20300+{
4bf69007
AM
20301+ struct vs_entry *p = ptr;
20302+ struct inode *inode = vs_proc_make_inode(dir->i_sb, p);
20303+ struct dentry *error = ERR_PTR(-EINVAL);
2380c486 20304+
4bf69007
AM
20305+ if (!inode)
20306+ goto out;
2380c486 20307+
4bf69007
AM
20308+ PROC_I(inode)->op = p->op;
20309+ PROC_I(inode)->fd = id;
20310+ d_add(dentry, inode);
20311+ error = NULL;
20312+out:
20313+ return error;
2380c486
JR
20314+}
20315+
4bf69007 20316+/* Lookups */
2380c486 20317+
09be7631
JR
20318+typedef struct dentry *vx_instantiate_t(struct inode *, struct dentry *, int, void *);
20319+
2380c486 20320+
4bf69007
AM
20321+/*
20322+ * Fill a directory entry.
20323+ *
20324+ * If possible create the dcache entry and derive our inode number and
20325+ * file type from dcache entry.
20326+ *
20327+ * Since all of the proc inode numbers are dynamically generated, the inode
20328+ * numbers do not exist until the inode is cache. This means creating the
c2e5f7c8
JR
20329+ * the dcache entry in iterate is necessary to keep the inode numbers
20330+ * reported by iterate in sync with the inode numbers reported
4bf69007
AM
20331+ * by stat.
20332+ */
c2e5f7c8 20333+static int vx_proc_fill_cache(struct file *filp, struct dir_context *ctx,
09be7631 20334+ char *name, int len, vx_instantiate_t instantiate, int id, void *ptr)
2380c486 20335+{
927ca606 20336+ struct dentry *child, *dir = filp->f_path.dentry;
4bf69007
AM
20337+ struct inode *inode;
20338+ struct qstr qname;
20339+ ino_t ino = 0;
20340+ unsigned type = DT_UNKNOWN;
d337f35e 20341+
4bf69007
AM
20342+ qname.name = name;
20343+ qname.len = len;
20344+ qname.hash = full_name_hash(name, len);
d337f35e 20345+
4bf69007
AM
20346+ child = d_lookup(dir, &qname);
20347+ if (!child) {
20348+ struct dentry *new;
20349+ new = d_alloc(dir, &qname);
20350+ if (new) {
20351+ child = instantiate(dir->d_inode, new, id, ptr);
20352+ if (child)
20353+ dput(new);
20354+ else
20355+ child = new;
20356+ }
20357+ }
20358+ if (!child || IS_ERR(child) || !child->d_inode)
20359+ goto end_instantiate;
20360+ inode = child->d_inode;
20361+ if (inode) {
20362+ ino = inode->i_ino;
20363+ type = inode->i_mode >> 12;
20364+ }
20365+ dput(child);
20366+end_instantiate:
20367+ if (!ino)
4bf69007 20368+ ino = 1;
c2e5f7c8 20369+ return !dir_emit(ctx, name, len, ino, type);
4bf69007 20370+}
d337f35e 20371+
d337f35e 20372+
d337f35e 20373+
4bf69007 20374+/* get and revalidate vx_info/xid */
2380c486 20375+
4bf69007
AM
20376+static inline
20377+struct vx_info *get_proc_vx_info(struct inode *inode)
20378+{
20379+ return lookup_vx_info(PROC_I(inode)->fd);
d337f35e
JR
20380+}
20381+
4bf69007 20382+static int proc_xid_revalidate(struct dentry *dentry, unsigned int flags)
d337f35e 20383+{
4bf69007 20384+ struct inode *inode = dentry->d_inode;
61333608 20385+ vxid_t xid = PROC_I(inode)->fd;
2380c486 20386+
4bf69007
AM
20387+ if (flags & LOOKUP_RCU) /* FIXME: can be dropped? */
20388+ return -ECHILD;
2380c486 20389+
4bf69007
AM
20390+ if (!xid || xid_is_hashed(xid))
20391+ return 1;
20392+ d_drop(dentry);
d337f35e
JR
20393+ return 0;
20394+}
20395+
d337f35e 20396+
4bf69007 20397+/* get and revalidate nx_info/nid */
d337f35e 20398+
4bf69007
AM
20399+static int proc_nid_revalidate(struct dentry *dentry, unsigned int flags)
20400+{
20401+ struct inode *inode = dentry->d_inode;
61333608 20402+ vnid_t nid = PROC_I(inode)->fd;
2380c486 20403+
4bf69007
AM
20404+ if (flags & LOOKUP_RCU) /* FIXME: can be dropped? */
20405+ return -ECHILD;
2380c486 20406+
4bf69007
AM
20407+ if (!nid || nid_is_hashed(nid))
20408+ return 1;
20409+ d_drop(dentry);
20410+ return 0;
d337f35e
JR
20411+}
20412+
4bf69007
AM
20413+
20414+
20415+#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
20416+
20417+static ssize_t proc_vs_info_read(struct file *file, char __user *buf,
20418+ size_t count, loff_t *ppos)
d337f35e 20419+{
927ca606 20420+ struct inode *inode = file->f_path.dentry->d_inode;
4bf69007
AM
20421+ unsigned long page;
20422+ ssize_t length = 0;
20423+
20424+ if (count > PROC_BLOCK_SIZE)
20425+ count = PROC_BLOCK_SIZE;
20426+
20427+ /* fade that out as soon as stable */
20428+ WARN_ON(PROC_I(inode)->fd);
20429+
20430+ if (!(page = __get_free_page(GFP_KERNEL)))
20431+ return -ENOMEM;
20432+
20433+ BUG_ON(!PROC_I(inode)->op.proc_vs_read);
20434+ length = PROC_I(inode)->op.proc_vs_read((char *)page);
20435+
20436+ if (length >= 0)
20437+ length = simple_read_from_buffer(buf, count, ppos,
20438+ (char *)page, length);
20439+
20440+ free_page(page);
20441+ return length;
d337f35e
JR
20442+}
20443+
4bf69007
AM
20444+static ssize_t proc_vx_info_read(struct file *file, char __user *buf,
20445+ size_t count, loff_t *ppos)
20446+{
927ca606 20447+ struct inode *inode = file->f_path.dentry->d_inode;
4bf69007 20448+ struct vx_info *vxi = NULL;
61333608 20449+ vxid_t xid = PROC_I(inode)->fd;
4bf69007
AM
20450+ unsigned long page;
20451+ ssize_t length = 0;
d337f35e 20452+
4bf69007
AM
20453+ if (count > PROC_BLOCK_SIZE)
20454+ count = PROC_BLOCK_SIZE;
20455+
20456+ /* fade that out as soon as stable */
20457+ WARN_ON(!xid);
20458+ vxi = lookup_vx_info(xid);
20459+ if (!vxi)
20460+ goto out;
d337f35e 20461+
4bf69007
AM
20462+ length = -ENOMEM;
20463+ if (!(page = __get_free_page(GFP_KERNEL)))
20464+ goto out_put;
d337f35e 20465+
4bf69007
AM
20466+ BUG_ON(!PROC_I(inode)->op.proc_vxi_read);
20467+ length = PROC_I(inode)->op.proc_vxi_read(vxi, (char *)page);
d337f35e 20468+
4bf69007
AM
20469+ if (length >= 0)
20470+ length = simple_read_from_buffer(buf, count, ppos,
20471+ (char *)page, length);
d337f35e 20472+
4bf69007
AM
20473+ free_page(page);
20474+out_put:
20475+ put_vx_info(vxi);
20476+out:
20477+ return length;
20478+}
20479+
20480+static ssize_t proc_nx_info_read(struct file *file, char __user *buf,
20481+ size_t count, loff_t *ppos)
d337f35e 20482+{
927ca606 20483+ struct inode *inode = file->f_path.dentry->d_inode;
4bf69007 20484+ struct nx_info *nxi = NULL;
61333608 20485+ vnid_t nid = PROC_I(inode)->fd;
4bf69007
AM
20486+ unsigned long page;
20487+ ssize_t length = 0;
d337f35e 20488+
4bf69007
AM
20489+ if (count > PROC_BLOCK_SIZE)
20490+ count = PROC_BLOCK_SIZE;
d337f35e 20491+
4bf69007
AM
20492+ /* fade that out as soon as stable */
20493+ WARN_ON(!nid);
20494+ nxi = lookup_nx_info(nid);
20495+ if (!nxi)
20496+ goto out;
d337f35e 20497+
4bf69007
AM
20498+ length = -ENOMEM;
20499+ if (!(page = __get_free_page(GFP_KERNEL)))
20500+ goto out_put;
d337f35e 20501+
4bf69007
AM
20502+ BUG_ON(!PROC_I(inode)->op.proc_nxi_read);
20503+ length = PROC_I(inode)->op.proc_nxi_read(nxi, (char *)page);
2380c486 20504+
4bf69007
AM
20505+ if (length >= 0)
20506+ length = simple_read_from_buffer(buf, count, ppos,
20507+ (char *)page, length);
d337f35e 20508+
4bf69007
AM
20509+ free_page(page);
20510+out_put:
20511+ put_nx_info(nxi);
20512+out:
20513+ return length;
20514+}
2380c486 20515+
d337f35e 20516+
763640ca 20517+
4bf69007 20518+/* here comes the lower level */
763640ca 20519+
265d6dcc 20520+
4bf69007
AM
20521+#define NOD(NAME, MODE, IOP, FOP, OP) { \
20522+ .len = sizeof(NAME) - 1, \
20523+ .name = (NAME), \
20524+ .mode = MODE, \
20525+ .iop = IOP, \
20526+ .fop = FOP, \
20527+ .op = OP, \
20528+}
d337f35e 20529+
d337f35e 20530+
4bf69007
AM
20531+#define DIR(NAME, MODE, OTYPE) \
20532+ NOD(NAME, (S_IFDIR | (MODE)), \
20533+ &proc_ ## OTYPE ## _inode_operations, \
20534+ &proc_ ## OTYPE ## _file_operations, { } )
d337f35e 20535+
4bf69007
AM
20536+#define INF(NAME, MODE, OTYPE) \
20537+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
20538+ &proc_vs_info_file_operations, \
20539+ { .proc_vs_read = &proc_##OTYPE } )
d337f35e 20540+
4bf69007
AM
20541+#define VINF(NAME, MODE, OTYPE) \
20542+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
20543+ &proc_vx_info_file_operations, \
20544+ { .proc_vxi_read = &proc_##OTYPE } )
2380c486 20545+
4bf69007
AM
20546+#define NINF(NAME, MODE, OTYPE) \
20547+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
20548+ &proc_nx_info_file_operations, \
20549+ { .proc_nxi_read = &proc_##OTYPE } )
d337f35e 20550+
d337f35e 20551+
4bf69007
AM
20552+static struct file_operations proc_vs_info_file_operations = {
20553+ .read = proc_vs_info_read,
20554+};
d337f35e 20555+
4bf69007
AM
20556+static struct file_operations proc_vx_info_file_operations = {
20557+ .read = proc_vx_info_read,
20558+};
d337f35e 20559+
4bf69007
AM
20560+static struct dentry_operations proc_xid_dentry_operations = {
20561+ .d_revalidate = proc_xid_revalidate,
20562+};
d337f35e 20563+
4bf69007
AM
20564+static struct vs_entry vx_base_stuff[] = {
20565+ VINF("info", S_IRUGO, vxi_info),
20566+ VINF("status", S_IRUGO, vxi_status),
20567+ VINF("limit", S_IRUGO, vxi_limit),
20568+ VINF("sched", S_IRUGO, vxi_sched),
20569+ VINF("nsproxy", S_IRUGO, vxi_nsproxy0),
20570+ VINF("nsproxy1",S_IRUGO, vxi_nsproxy1),
20571+ VINF("cvirt", S_IRUGO, vxi_cvirt),
20572+ VINF("cacct", S_IRUGO, vxi_cacct),
20573+ {}
20574+};
2380c486 20575+
d337f35e 20576+
d337f35e 20577+
d337f35e 20578+
4bf69007
AM
20579+static struct dentry *proc_xid_instantiate(struct inode *dir,
20580+ struct dentry *dentry, int id, void *ptr)
20581+{
20582+ dentry->d_op = &proc_xid_dentry_operations;
20583+ return vs_proc_instantiate(dir, dentry, id, ptr);
20584+}
2380c486 20585+
4bf69007
AM
20586+static struct dentry *proc_xid_lookup(struct inode *dir,
20587+ struct dentry *dentry, unsigned int flags)
20588+{
20589+ struct vs_entry *p = vx_base_stuff;
20590+ struct dentry *error = ERR_PTR(-ENOENT);
2380c486 20591+
4bf69007
AM
20592+ for (; p->name; p++) {
20593+ if (p->len != dentry->d_name.len)
20594+ continue;
20595+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20596+ break;
20597+ }
20598+ if (!p->name)
20599+ goto out;
d337f35e 20600+
4bf69007
AM
20601+ error = proc_xid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
20602+out:
20603+ return error;
20604+}
9f7054f1 20605+
c2e5f7c8 20606+static int proc_xid_iterate(struct file *filp, struct dir_context *ctx)
4bf69007 20607+{
927ca606 20608+ struct dentry *dentry = filp->f_path.dentry;
4bf69007
AM
20609+ struct inode *inode = dentry->d_inode;
20610+ struct vs_entry *p = vx_base_stuff;
20611+ int size = sizeof(vx_base_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20612+ int index;
2380c486 20613+
8de2f54c
AM
20614+ if (!dir_emit_dots(filp, ctx))
20615+ return 0;
20616+
20617+ index = ctx->pos - 2;
20618+ if (index < size) {
4bf69007 20619+ for (p += index; p->name; p++) {
c2e5f7c8 20620+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20621+ vs_proc_instantiate, PROC_I(inode)->fd, p))
8de2f54c 20622+ return 0;
c2e5f7c8 20623+ ctx->pos++;
4bf69007 20624+ }
d337f35e 20625+ }
4bf69007 20626+ return 1;
d337f35e
JR
20627+}
20628+
20629+
d337f35e 20630+
4bf69007
AM
20631+static struct file_operations proc_nx_info_file_operations = {
20632+ .read = proc_nx_info_read,
20633+};
d337f35e 20634+
4bf69007
AM
20635+static struct dentry_operations proc_nid_dentry_operations = {
20636+ .d_revalidate = proc_nid_revalidate,
20637+};
d337f35e 20638+
4bf69007
AM
20639+static struct vs_entry nx_base_stuff[] = {
20640+ NINF("info", S_IRUGO, nxi_info),
20641+ NINF("status", S_IRUGO, nxi_status),
20642+ {}
20643+};
2380c486 20644+
d337f35e 20645+
4bf69007
AM
20646+static struct dentry *proc_nid_instantiate(struct inode *dir,
20647+ struct dentry *dentry, int id, void *ptr)
d337f35e 20648+{
4bf69007
AM
20649+ dentry->d_op = &proc_nid_dentry_operations;
20650+ return vs_proc_instantiate(dir, dentry, id, ptr);
20651+}
d337f35e 20652+
4bf69007
AM
20653+static struct dentry *proc_nid_lookup(struct inode *dir,
20654+ struct dentry *dentry, unsigned int flags)
20655+{
20656+ struct vs_entry *p = nx_base_stuff;
20657+ struct dentry *error = ERR_PTR(-ENOENT);
d337f35e 20658+
4bf69007
AM
20659+ for (; p->name; p++) {
20660+ if (p->len != dentry->d_name.len)
20661+ continue;
20662+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20663+ break;
20664+ }
20665+ if (!p->name)
20666+ goto out;
d337f35e 20667+
4bf69007
AM
20668+ error = proc_nid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
20669+out:
20670+ return error;
20671+}
d337f35e 20672+
c2e5f7c8 20673+static int proc_nid_iterate(struct file *filp, struct dir_context *ctx)
4bf69007 20674+{
927ca606 20675+ struct dentry *dentry = filp->f_path.dentry;
4bf69007
AM
20676+ struct inode *inode = dentry->d_inode;
20677+ struct vs_entry *p = nx_base_stuff;
20678+ int size = sizeof(nx_base_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20679+ int index;
d337f35e 20680+
8de2f54c
AM
20681+ if (!dir_emit_dots(filp, ctx))
20682+ return 0;
20683+
20684+ index = ctx->pos - 2;
20685+ if (index < size) {
4bf69007 20686+ for (p += index; p->name; p++) {
c2e5f7c8 20687+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20688+ vs_proc_instantiate, PROC_I(inode)->fd, p))
8de2f54c 20689+ return 0;
c2e5f7c8 20690+ ctx->pos++;
4bf69007
AM
20691+ }
20692+ }
4bf69007
AM
20693+ return 1;
20694+}
2380c486 20695+
d337f35e 20696+
4bf69007 20697+#define MAX_MULBY10 ((~0U - 9) / 10)
d337f35e 20698+
4bf69007
AM
20699+static inline int atovid(const char *str, int len)
20700+{
20701+ int vid, c;
d337f35e 20702+
4bf69007
AM
20703+ vid = 0;
20704+ while (len-- > 0) {
20705+ c = *str - '0';
20706+ str++;
20707+ if (c > 9)
20708+ return -1;
20709+ if (vid >= MAX_MULBY10)
20710+ return -1;
20711+ vid *= 10;
20712+ vid += c;
20713+ if (!vid)
20714+ return -1;
20715+ }
20716+ return vid;
20717+}
2380c486 20718+
4bf69007 20719+/* now the upper level (virtual) */
2380c486 20720+
2380c486 20721+
4bf69007
AM
20722+static struct file_operations proc_xid_file_operations = {
20723+ .read = generic_read_dir,
c2e5f7c8 20724+ .iterate = proc_xid_iterate,
4bf69007 20725+};
2380c486 20726+
4bf69007
AM
20727+static struct inode_operations proc_xid_inode_operations = {
20728+ .lookup = proc_xid_lookup,
20729+};
d337f35e 20730+
4bf69007
AM
20731+static struct vs_entry vx_virtual_stuff[] = {
20732+ INF("info", S_IRUGO, virtual_info),
20733+ INF("status", S_IRUGO, virtual_status),
20734+ DIR(NULL, S_IRUGO | S_IXUGO, xid),
20735+};
2380c486 20736+
d337f35e 20737+
4bf69007
AM
20738+static struct dentry *proc_virtual_lookup(struct inode *dir,
20739+ struct dentry *dentry, unsigned int flags)
20740+{
20741+ struct vs_entry *p = vx_virtual_stuff;
20742+ struct dentry *error = ERR_PTR(-ENOENT);
20743+ int id = 0;
d337f35e 20744+
4bf69007
AM
20745+ for (; p->name; p++) {
20746+ if (p->len != dentry->d_name.len)
20747+ continue;
20748+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20749+ break;
20750+ }
20751+ if (p->name)
20752+ goto instantiate;
d337f35e 20753+
4bf69007
AM
20754+ id = atovid(dentry->d_name.name, dentry->d_name.len);
20755+ if ((id < 0) || !xid_is_hashed(id))
d337f35e
JR
20756+ goto out;
20757+
4bf69007
AM
20758+instantiate:
20759+ error = proc_xid_instantiate(dir, dentry, id, p);
20760+out:
20761+ return error;
20762+}
d337f35e 20763+
4bf69007
AM
20764+static struct file_operations proc_nid_file_operations = {
20765+ .read = generic_read_dir,
c2e5f7c8 20766+ .iterate = proc_nid_iterate,
4bf69007 20767+};
d337f35e 20768+
4bf69007
AM
20769+static struct inode_operations proc_nid_inode_operations = {
20770+ .lookup = proc_nid_lookup,
20771+};
d337f35e 20772+
4bf69007
AM
20773+static struct vs_entry nx_virtnet_stuff[] = {
20774+ INF("info", S_IRUGO, virtnet_info),
20775+ INF("status", S_IRUGO, virtnet_status),
20776+ DIR(NULL, S_IRUGO | S_IXUGO, nid),
20777+};
d337f35e 20778+
d337f35e 20779+
4bf69007
AM
20780+static struct dentry *proc_virtnet_lookup(struct inode *dir,
20781+ struct dentry *dentry, unsigned int flags)
20782+{
20783+ struct vs_entry *p = nx_virtnet_stuff;
20784+ struct dentry *error = ERR_PTR(-ENOENT);
20785+ int id = 0;
d337f35e 20786+
4bf69007
AM
20787+ for (; p->name; p++) {
20788+ if (p->len != dentry->d_name.len)
20789+ continue;
20790+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20791+ break;
20792+ }
20793+ if (p->name)
20794+ goto instantiate;
d337f35e 20795+
4bf69007
AM
20796+ id = atovid(dentry->d_name.name, dentry->d_name.len);
20797+ if ((id < 0) || !nid_is_hashed(id))
d337f35e
JR
20798+ goto out;
20799+
4bf69007
AM
20800+instantiate:
20801+ error = proc_nid_instantiate(dir, dentry, id, p);
20802+out:
20803+ return error;
20804+}
2380c486 20805+
d337f35e 20806+
4bf69007
AM
20807+#define PROC_MAXVIDS 32
20808+
c2e5f7c8 20809+int proc_virtual_iterate(struct file *filp, struct dir_context *ctx)
4bf69007 20810+{
4bf69007
AM
20811+ struct vs_entry *p = vx_virtual_stuff;
20812+ int size = sizeof(vx_virtual_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20813+ int index;
4bf69007
AM
20814+ unsigned int xid_array[PROC_MAXVIDS];
20815+ char buf[PROC_NUMBUF];
20816+ unsigned int nr_xids, i;
4bf69007 20817+
8de2f54c
AM
20818+ if (!dir_emit_dots(filp, ctx))
20819+ return 0;
20820+
20821+ index = ctx->pos - 2;
20822+ if (index < size) {
4bf69007 20823+ for (p += index; p->name; p++) {
c2e5f7c8 20824+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20825+ vs_proc_instantiate, 0, p))
8de2f54c 20826+ return 0;
c2e5f7c8 20827+ ctx->pos++;
d337f35e
JR
20828+ }
20829+ }
8de2f54c
AM
20830+
20831+ index = ctx->pos - size;
20832+ p = &vx_virtual_stuff[size - 1];
20833+ nr_xids = get_xid_list(index, xid_array, PROC_MAXVIDS);
20834+ for (i = 0; i < nr_xids; i++) {
20835+ int n, xid = xid_array[i];
20836+ unsigned int j = PROC_NUMBUF;
20837+
20838+ n = xid;
20839+ do
20840+ buf[--j] = '0' + (n % 10);
20841+ while (n /= 10);
20842+
20843+ if (vx_proc_fill_cache(filp, ctx,
20844+ buf + j, PROC_NUMBUF - j,
20845+ vs_proc_instantiate, xid, p))
20846+ return 0;
20847+ ctx->pos++;
20848+ }
4bf69007 20849+ return 0;
d337f35e
JR
20850+}
20851+
4bf69007
AM
20852+static int proc_virtual_getattr(struct vfsmount *mnt,
20853+ struct dentry *dentry, struct kstat *stat)
d337f35e 20854+{
4bf69007 20855+ struct inode *inode = dentry->d_inode;
d337f35e 20856+
4bf69007
AM
20857+ generic_fillattr(inode, stat);
20858+ stat->nlink = 2 + atomic_read(&vx_global_cactive);
20859+ return 0;
d337f35e
JR
20860+}
20861+
4bf69007
AM
20862+static struct file_operations proc_virtual_dir_operations = {
20863+ .read = generic_read_dir,
c2e5f7c8 20864+ .iterate = proc_virtual_iterate,
d337f35e
JR
20865+};
20866+
4bf69007
AM
20867+static struct inode_operations proc_virtual_dir_inode_operations = {
20868+ .getattr = proc_virtual_getattr,
20869+ .lookup = proc_virtual_lookup,
20870+};
d337f35e 20871+
d337f35e
JR
20872+
20873+
c2e5f7c8 20874+int proc_virtnet_iterate(struct file *filp, struct dir_context *ctx)
d337f35e 20875+{
4bf69007
AM
20876+ struct vs_entry *p = nx_virtnet_stuff;
20877+ int size = sizeof(nx_virtnet_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20878+ int index;
4bf69007
AM
20879+ unsigned int nid_array[PROC_MAXVIDS];
20880+ char buf[PROC_NUMBUF];
20881+ unsigned int nr_nids, i;
d337f35e 20882+
8de2f54c
AM
20883+ if (!dir_emit_dots(filp, ctx))
20884+ return 0;
20885+
20886+ index = ctx->pos - 2;
20887+ if (index < size) {
4bf69007 20888+ for (p += index; p->name; p++) {
c2e5f7c8 20889+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20890+ vs_proc_instantiate, 0, p))
8de2f54c 20891+ return 0;
c2e5f7c8 20892+ ctx->pos++;
d337f35e
JR
20893+ }
20894+ }
8de2f54c
AM
20895+
20896+ index = ctx->pos - size;
20897+ p = &nx_virtnet_stuff[size - 1];
20898+ nr_nids = get_nid_list(index, nid_array, PROC_MAXVIDS);
20899+ for (i = 0; i < nr_nids; i++) {
20900+ int n, nid = nid_array[i];
20901+ unsigned int j = PROC_NUMBUF;
20902+
20903+ n = nid;
20904+ do
20905+ buf[--j] = '0' + (n % 10);
20906+ while (n /= 10);
20907+
20908+ if (vx_proc_fill_cache(filp, ctx,
20909+ buf + j, PROC_NUMBUF - j,
20910+ vs_proc_instantiate, nid, p))
20911+ return 0;
20912+ ctx->pos++;
20913+ }
d337f35e
JR
20914+ return 0;
20915+}
20916+
4bf69007
AM
20917+static int proc_virtnet_getattr(struct vfsmount *mnt,
20918+ struct dentry *dentry, struct kstat *stat)
20919+{
20920+ struct inode *inode = dentry->d_inode;
d337f35e 20921+
4bf69007
AM
20922+ generic_fillattr(inode, stat);
20923+ stat->nlink = 2 + atomic_read(&nx_global_cactive);
20924+ return 0;
20925+}
d337f35e 20926+
4bf69007
AM
20927+static struct file_operations proc_virtnet_dir_operations = {
20928+ .read = generic_read_dir,
c2e5f7c8 20929+ .iterate = proc_virtnet_iterate,
d337f35e
JR
20930+};
20931+
4bf69007
AM
20932+static struct inode_operations proc_virtnet_dir_inode_operations = {
20933+ .getattr = proc_virtnet_getattr,
20934+ .lookup = proc_virtnet_lookup,
d337f35e
JR
20935+};
20936+
d337f35e
JR
20937+
20938+
4bf69007 20939+void proc_vx_init(void)
d337f35e 20940+{
4bf69007 20941+ struct proc_dir_entry *ent;
d337f35e 20942+
4bf69007
AM
20943+ ent = proc_mkdir("virtual", 0);
20944+ if (ent) {
20945+ ent->proc_fops = &proc_virtual_dir_operations;
20946+ ent->proc_iops = &proc_virtual_dir_inode_operations;
20947+ }
20948+ proc_virtual = ent;
d337f35e 20949+
4bf69007
AM
20950+ ent = proc_mkdir("virtnet", 0);
20951+ if (ent) {
20952+ ent->proc_fops = &proc_virtnet_dir_operations;
20953+ ent->proc_iops = &proc_virtnet_dir_inode_operations;
d337f35e 20954+ }
4bf69007 20955+ proc_virtnet = ent;
d337f35e
JR
20956+}
20957+
d337f35e 20958+
2380c486 20959+
2380c486 20960+
4bf69007 20961+/* per pid info */
2380c486 20962+
bb20add7
AM
20963+void render_cap_t(struct seq_file *, const char *,
20964+ struct vx_info *, kernel_cap_t *);
20965+
2380c486 20966+
bb20add7
AM
20967+int proc_pid_vx_info(
20968+ struct seq_file *m,
20969+ struct pid_namespace *ns,
20970+ struct pid *pid,
20971+ struct task_struct *p)
2380c486 20972+{
4bf69007 20973+ struct vx_info *vxi;
2380c486 20974+
bb20add7 20975+ seq_printf(m, "XID:\t%d\n", vx_task_xid(p));
2380c486 20976+
4bf69007
AM
20977+ vxi = task_get_vx_info(p);
20978+ if (!vxi)
bb20add7 20979+ return 0;
2380c486 20980+
bb20add7
AM
20981+ render_cap_t(m, "BCaps:\t", vxi, &vxi->vx_bcaps);
20982+ seq_printf(m, "CCaps:\t%016llx\n",
4bf69007 20983+ (unsigned long long)vxi->vx_ccaps);
bb20add7 20984+ seq_printf(m, "CFlags:\t%016llx\n",
4bf69007 20985+ (unsigned long long)vxi->vx_flags);
bb20add7 20986+ seq_printf(m, "CIPid:\t%d\n", vxi->vx_initpid);
4bf69007
AM
20987+
20988+ put_vx_info(vxi);
bb20add7 20989+ return 0;
2380c486
JR
20990+}
20991+
2380c486 20992+
bb20add7
AM
20993+int proc_pid_nx_info(
20994+ struct seq_file *m,
20995+ struct pid_namespace *ns,
20996+ struct pid *pid,
20997+ struct task_struct *p)
4bf69007
AM
20998+{
20999+ struct nx_info *nxi;
21000+ struct nx_addr_v4 *v4a;
21001+#ifdef CONFIG_IPV6
21002+ struct nx_addr_v6 *v6a;
21003+#endif
4bf69007 21004+ int i;
2380c486 21005+
bb20add7 21006+ seq_printf(m, "NID:\t%d\n", nx_task_nid(p));
2380c486 21007+
4bf69007
AM
21008+ nxi = task_get_nx_info(p);
21009+ if (!nxi)
bb20add7 21010+ return 0;
2380c486 21011+
bb20add7 21012+ seq_printf(m, "NCaps:\t%016llx\n",
4bf69007 21013+ (unsigned long long)nxi->nx_ncaps);
bb20add7 21014+ seq_printf(m, "NFlags:\t%016llx\n",
4bf69007
AM
21015+ (unsigned long long)nxi->nx_flags);
21016+
bb20add7 21017+ seq_printf(m, "V4Root[bcast]:\t" NIPQUAD_FMT "\n",
4bf69007 21018+ NIPQUAD(nxi->v4_bcast.s_addr));
bb20add7 21019+ seq_printf(m, "V4Root[lback]:\t" NIPQUAD_FMT "\n",
4bf69007
AM
21020+ NIPQUAD(nxi->v4_lback.s_addr));
21021+ if (!NX_IPV4(nxi))
21022+ goto skip_v4;
21023+ for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
bb20add7 21024+ seq_printf(m, "V4Root[%d]:\t" NXAV4_FMT "\n",
4bf69007
AM
21025+ i, NXAV4(v4a));
21026+skip_v4:
21027+#ifdef CONFIG_IPV6
21028+ if (!NX_IPV6(nxi))
21029+ goto skip_v6;
21030+ for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
bb20add7 21031+ seq_printf(m, "V6Root[%d]:\t" NXAV6_FMT "\n",
4bf69007
AM
21032+ i, NXAV6(v6a));
21033+skip_v6:
21034+#endif
21035+ put_nx_info(nxi);
bb20add7 21036+ return 0;
2380c486
JR
21037+}
21038+
8931d859
AM
21039diff -NurpP --minimal linux-4.4.161/kernel/vserver/sched.c linux-4.4.161-vs2.3.9.8/kernel/vserver/sched.c
21040--- linux-4.4.161/kernel/vserver/sched.c 1970-01-01 00:00:00.000000000 +0000
21041+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/sched.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
21042@@ -0,0 +1,83 @@
21043+/*
21044+ * linux/kernel/vserver/sched.c
21045+ *
21046+ * Virtual Server: Scheduler Support
21047+ *
d6221c00 21048+ * Copyright (C) 2004-2010 Herbert P?tzl
4bf69007
AM
21049+ *
21050+ * V0.01 adapted Sam Vilains version to 2.6.3
21051+ * V0.02 removed legacy interface
21052+ * V0.03 changed vcmds to vxi arg
21053+ * V0.04 removed older and legacy interfaces
21054+ * V0.05 removed scheduler code/commands
21055+ *
21056+ */
21057+
21058+#include <linux/vs_context.h>
21059+#include <linux/vs_sched.h>
21060+#include <linux/cpumask.h>
21061+#include <linux/vserver/sched_cmd.h>
2380c486 21062+
4bf69007
AM
21063+#include <asm/uaccess.h>
21064+
21065+
21066+void vx_update_sched_param(struct _vx_sched *sched,
21067+ struct _vx_sched_pc *sched_pc)
2380c486 21068+{
4bf69007 21069+ sched_pc->prio_bias = sched->prio_bias;
2380c486
JR
21070+}
21071+
4bf69007
AM
21072+static int do_set_prio_bias(struct vx_info *vxi, struct vcmd_prio_bias *data)
21073+{
21074+ int cpu;
2380c486 21075+
4bf69007
AM
21076+ if (data->prio_bias > MAX_PRIO_BIAS)
21077+ data->prio_bias = MAX_PRIO_BIAS;
21078+ if (data->prio_bias < MIN_PRIO_BIAS)
21079+ data->prio_bias = MIN_PRIO_BIAS;
2380c486 21080+
4bf69007 21081+ if (data->cpu_id != ~0) {
927ca606 21082+ vxi->sched.update = *get_cpu_mask(data->cpu_id);
4bf69007
AM
21083+ cpumask_and(&vxi->sched.update, &vxi->sched.update,
21084+ cpu_online_mask);
21085+ } else
21086+ cpumask_copy(&vxi->sched.update, cpu_online_mask);
2380c486 21087+
927ca606 21088+ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)vxi->sched.update)
4bf69007
AM
21089+ vx_update_sched_param(&vxi->sched,
21090+ &vx_per_cpu(vxi, sched_pc, cpu));
21091+ return 0;
21092+}
2380c486 21093+
4bf69007
AM
21094+int vc_set_prio_bias(struct vx_info *vxi, void __user *data)
21095+{
21096+ struct vcmd_prio_bias vc_data;
d337f35e 21097+
4bf69007
AM
21098+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21099+ return -EFAULT;
d337f35e 21100+
4bf69007
AM
21101+ return do_set_prio_bias(vxi, &vc_data);
21102+}
d337f35e 21103+
4bf69007
AM
21104+int vc_get_prio_bias(struct vx_info *vxi, void __user *data)
21105+{
21106+ struct vcmd_prio_bias vc_data;
21107+ struct _vx_sched_pc *pcd;
21108+ int cpu;
d337f35e 21109+
4bf69007
AM
21110+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21111+ return -EFAULT;
2380c486 21112+
4bf69007 21113+ cpu = vc_data.cpu_id;
d337f35e 21114+
4bf69007
AM
21115+ if (!cpu_possible(cpu))
21116+ return -EINVAL;
d337f35e 21117+
4bf69007
AM
21118+ pcd = &vx_per_cpu(vxi, sched_pc, cpu);
21119+ vc_data.prio_bias = pcd->prio_bias;
d337f35e 21120+
4bf69007
AM
21121+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21122+ return -EFAULT;
21123+ return 0;
21124+}
d337f35e 21125+
8931d859
AM
21126diff -NurpP --minimal linux-4.4.161/kernel/vserver/sched_init.h linux-4.4.161-vs2.3.9.8/kernel/vserver/sched_init.h
21127--- linux-4.4.161/kernel/vserver/sched_init.h 1970-01-01 00:00:00.000000000 +0000
21128+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/sched_init.h 2018-10-20 04:57:21.000000000 +0000
4bf69007 21129@@ -0,0 +1,27 @@
2380c486 21130+
4bf69007
AM
21131+static inline void vx_info_init_sched(struct _vx_sched *sched)
21132+{
21133+ /* scheduling; hard code starting values as constants */
21134+ sched->prio_bias = 0;
d337f35e
JR
21135+}
21136+
4bf69007
AM
21137+static inline
21138+void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
e3afe727 21139+{
4bf69007
AM
21140+ sched_pc->prio_bias = 0;
21141+
21142+ sched_pc->user_ticks = 0;
21143+ sched_pc->sys_ticks = 0;
21144+ sched_pc->hold_ticks = 0;
e3afe727
AM
21145+}
21146+
4bf69007 21147+static inline void vx_info_exit_sched(struct _vx_sched *sched)
e3afe727 21148+{
4bf69007 21149+ return;
e3afe727
AM
21150+}
21151+
4bf69007
AM
21152+static inline
21153+void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
e3afe727 21154+{
4bf69007 21155+ return;
e3afe727 21156+}
8931d859
AM
21157diff -NurpP --minimal linux-4.4.161/kernel/vserver/sched_proc.h linux-4.4.161-vs2.3.9.8/kernel/vserver/sched_proc.h
21158--- linux-4.4.161/kernel/vserver/sched_proc.h 1970-01-01 00:00:00.000000000 +0000
21159+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/sched_proc.h 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
21160@@ -0,0 +1,32 @@
21161+#ifndef _VX_SCHED_PROC_H
21162+#define _VX_SCHED_PROC_H
e3afe727 21163+
4bf69007
AM
21164+
21165+static inline
21166+int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
e3afe727 21167+{
4bf69007
AM
21168+ int length = 0;
21169+
21170+ length += sprintf(buffer,
21171+ "PrioBias:\t%8d\n",
21172+ sched->prio_bias);
21173+ return length;
e3afe727
AM
21174+}
21175+
4bf69007
AM
21176+static inline
21177+int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
21178+ char *buffer, int cpu)
e3afe727 21179+{
4bf69007 21180+ int length = 0;
e3afe727 21181+
4bf69007
AM
21182+ length += sprintf(buffer + length,
21183+ "cpu %d: %lld %lld %lld", cpu,
21184+ (unsigned long long)sched_pc->user_ticks,
21185+ (unsigned long long)sched_pc->sys_ticks,
21186+ (unsigned long long)sched_pc->hold_ticks);
21187+ length += sprintf(buffer + length,
21188+ " %d\n", sched_pc->prio_bias);
21189+ return length;
21190+}
93de0823 21191+
4bf69007 21192+#endif /* _VX_SCHED_PROC_H */
8931d859
AM
21193diff -NurpP --minimal linux-4.4.161/kernel/vserver/signal.c linux-4.4.161-vs2.3.9.8/kernel/vserver/signal.c
21194--- linux-4.4.161/kernel/vserver/signal.c 1970-01-01 00:00:00.000000000 +0000
21195+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/signal.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
21196@@ -0,0 +1,134 @@
21197+/*
21198+ * linux/kernel/vserver/signal.c
21199+ *
21200+ * Virtual Server: Signal Support
21201+ *
d6221c00 21202+ * Copyright (C) 2003-2007 Herbert P?tzl
4bf69007
AM
21203+ *
21204+ * V0.01 broken out from vcontext V0.05
21205+ * V0.02 changed vcmds to vxi arg
21206+ * V0.03 adjusted siginfo for kill
21207+ *
21208+ */
99a884b4 21209+
4bf69007 21210+#include <asm/uaccess.h>
93de0823 21211+
4bf69007
AM
21212+#include <linux/vs_context.h>
21213+#include <linux/vs_pid.h>
21214+#include <linux/vserver/signal_cmd.h>
d337f35e 21215+
d337f35e 21216+
4bf69007
AM
21217+int vx_info_kill(struct vx_info *vxi, int pid, int sig)
21218+{
21219+ int retval, count = 0;
21220+ struct task_struct *p;
21221+ struct siginfo *sip = SEND_SIG_PRIV;
d33d7b00 21222+
4bf69007
AM
21223+ retval = -ESRCH;
21224+ vxdprintk(VXD_CBIT(misc, 4),
21225+ "vx_info_kill(%p[#%d],%d,%d)*",
21226+ vxi, vxi->vx_id, pid, sig);
21227+ read_lock(&tasklist_lock);
21228+ switch (pid) {
21229+ case 0:
21230+ case -1:
21231+ for_each_process(p) {
21232+ int err = 0;
d337f35e 21233+
4bf69007
AM
21234+ if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
21235+ (pid && vxi->vx_initpid == p->pid))
21236+ continue;
d337f35e 21237+
4bf69007
AM
21238+ err = group_send_sig_info(sig, sip, p);
21239+ ++count;
21240+ if (err != -EPERM)
21241+ retval = err;
21242+ }
21243+ break;
d337f35e 21244+
4bf69007
AM
21245+ case 1:
21246+ if (vxi->vx_initpid) {
21247+ pid = vxi->vx_initpid;
21248+ /* for now, only SIGINT to private init ... */
21249+ if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
21250+ /* ... as long as there are tasks left */
21251+ (atomic_read(&vxi->vx_tasks) > 1))
21252+ sig = SIGINT;
21253+ }
21254+ /* fallthrough */
21255+ default:
21256+ rcu_read_lock();
21257+ p = find_task_by_real_pid(pid);
21258+ rcu_read_unlock();
21259+ if (p) {
21260+ if (vx_task_xid(p) == vxi->vx_id)
21261+ retval = group_send_sig_info(sig, sip, p);
21262+ }
21263+ break;
21264+ }
21265+ read_unlock(&tasklist_lock);
21266+ vxdprintk(VXD_CBIT(misc, 4),
21267+ "vx_info_kill(%p[#%d],%d,%d,%ld) = %d",
21268+ vxi, vxi->vx_id, pid, sig, (long)sip, retval);
21269+ return retval;
21270+}
d337f35e 21271+
4bf69007 21272+int vc_ctx_kill(struct vx_info *vxi, void __user *data)
d337f35e 21273+{
4bf69007 21274+ struct vcmd_ctx_kill_v0 vc_data;
d337f35e 21275+
4bf69007
AM
21276+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21277+ return -EFAULT;
d337f35e 21278+
4bf69007
AM
21279+ /* special check to allow guest shutdown */
21280+ if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
21281+ /* forbid killall pid=0 when init is present */
21282+ (((vc_data.pid < 1) && vxi->vx_initpid) ||
21283+ (vc_data.pid > 1)))
21284+ return -EACCES;
21285+
21286+ return vx_info_kill(vxi, vc_data.pid, vc_data.sig);
d337f35e
JR
21287+}
21288+
4bf69007
AM
21289+
21290+static int __wait_exit(struct vx_info *vxi)
d337f35e 21291+{
4bf69007
AM
21292+ DECLARE_WAITQUEUE(wait, current);
21293+ int ret = 0;
d337f35e 21294+
4bf69007
AM
21295+ add_wait_queue(&vxi->vx_wait, &wait);
21296+ set_current_state(TASK_INTERRUPTIBLE);
d337f35e 21297+
4bf69007
AM
21298+wait:
21299+ if (vx_info_state(vxi,
21300+ VXS_SHUTDOWN | VXS_HASHED | VXS_HELPER) == VXS_SHUTDOWN)
21301+ goto out;
21302+ if (signal_pending(current)) {
21303+ ret = -ERESTARTSYS;
21304+ goto out;
21305+ }
21306+ schedule();
21307+ goto wait;
21308+
21309+out:
21310+ set_current_state(TASK_RUNNING);
21311+ remove_wait_queue(&vxi->vx_wait, &wait);
21312+ return ret;
d337f35e
JR
21313+}
21314+
4a036bed 21315+
7b17263b 21316+
4bf69007 21317+int vc_wait_exit(struct vx_info *vxi, void __user *data)
7b17263b 21318+{
4bf69007
AM
21319+ struct vcmd_wait_exit_v0 vc_data;
21320+ int ret;
7b17263b 21321+
4bf69007
AM
21322+ ret = __wait_exit(vxi);
21323+ vc_data.reboot_cmd = vxi->reboot_cmd;
21324+ vc_data.exit_code = vxi->exit_code;
21325+
21326+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21327+ ret = -EFAULT;
21328+ return ret;
7b17263b 21329+}
2380c486 21330+
8931d859
AM
21331diff -NurpP --minimal linux-4.4.161/kernel/vserver/space.c linux-4.4.161-vs2.3.9.8/kernel/vserver/space.c
21332--- linux-4.4.161/kernel/vserver/space.c 1970-01-01 00:00:00.000000000 +0000
21333+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/space.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
21334@@ -0,0 +1,436 @@
21335+/*
21336+ * linux/kernel/vserver/space.c
21337+ *
21338+ * Virtual Server: Context Space Support
21339+ *
d6221c00 21340+ * Copyright (C) 2003-2010 Herbert P?tzl
4bf69007
AM
21341+ *
21342+ * V0.01 broken out from context.c 0.07
21343+ * V0.02 added task locking for namespace
21344+ * V0.03 broken out vx_enter_namespace
21345+ * V0.04 added *space support and commands
21346+ * V0.05 added credential support
21347+ *
21348+ */
21349+
21350+#include <linux/utsname.h>
21351+#include <linux/nsproxy.h>
21352+#include <linux/err.h>
21353+#include <linux/fs_struct.h>
21354+#include <linux/cred.h>
21355+#include <asm/uaccess.h>
d337f35e 21356+
d337f35e 21357+#include <linux/vs_context.h>
4bf69007
AM
21358+#include <linux/vserver/space.h>
21359+#include <linux/vserver/space_cmd.h>
2380c486 21360+
4bf69007
AM
21361+atomic_t vs_global_nsproxy = ATOMIC_INIT(0);
21362+atomic_t vs_global_fs = ATOMIC_INIT(0);
21363+atomic_t vs_global_mnt_ns = ATOMIC_INIT(0);
21364+atomic_t vs_global_uts_ns = ATOMIC_INIT(0);
21365+atomic_t vs_global_user_ns = ATOMIC_INIT(0);
21366+atomic_t vs_global_pid_ns = ATOMIC_INIT(0);
d337f35e 21367+
2380c486 21368+
4bf69007 21369+/* namespace functions */
2380c486 21370+
4bf69007
AM
21371+#include <linux/mnt_namespace.h>
21372+#include <linux/user_namespace.h>
21373+#include <linux/pid_namespace.h>
21374+#include <linux/ipc_namespace.h>
21375+#include <net/net_namespace.h>
21376+#include "../fs/mount.h"
2380c486 21377+
2380c486 21378+
4bf69007
AM
21379+static const struct vcmd_space_mask_v1 space_mask_v0 = {
21380+ .mask = CLONE_FS |
21381+ CLONE_NEWNS |
21382+#ifdef CONFIG_UTS_NS
21383+ CLONE_NEWUTS |
21384+#endif
21385+#ifdef CONFIG_IPC_NS
21386+ CLONE_NEWIPC |
21387+#endif
21388+#ifdef CONFIG_USER_NS
21389+ CLONE_NEWUSER |
21390+#endif
21391+ 0
21392+};
2380c486 21393+
4bf69007
AM
21394+static const struct vcmd_space_mask_v1 space_mask = {
21395+ .mask = CLONE_FS |
21396+ CLONE_NEWNS |
21397+#ifdef CONFIG_UTS_NS
21398+ CLONE_NEWUTS |
21399+#endif
21400+#ifdef CONFIG_IPC_NS
21401+ CLONE_NEWIPC |
21402+#endif
21403+#ifdef CONFIG_USER_NS
21404+ CLONE_NEWUSER |
21405+#endif
21406+#ifdef CONFIG_PID_NS
21407+ CLONE_NEWPID |
21408+#endif
21409+#ifdef CONFIG_NET_NS
21410+ CLONE_NEWNET |
21411+#endif
21412+ 0
21413+};
2380c486 21414+
4bf69007
AM
21415+static const struct vcmd_space_mask_v1 default_space_mask = {
21416+ .mask = CLONE_FS |
21417+ CLONE_NEWNS |
21418+#ifdef CONFIG_UTS_NS
21419+ CLONE_NEWUTS |
21420+#endif
21421+#ifdef CONFIG_IPC_NS
21422+ CLONE_NEWIPC |
21423+#endif
21424+#ifdef CONFIG_USER_NS
bb20add7 21425+// CLONE_NEWUSER |
4bf69007
AM
21426+#endif
21427+#ifdef CONFIG_PID_NS
21428+// CLONE_NEWPID |
21429+#endif
21430+ 0
21431+};
2380c486 21432+
4bf69007
AM
21433+/*
21434+ * build a new nsproxy mix
21435+ * assumes that both proxies are 'const'
21436+ * does not touch nsproxy refcounts
21437+ * will hold a reference on the result.
21438+ */
7b17263b 21439+
4bf69007
AM
21440+struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
21441+ struct nsproxy *new_nsproxy, unsigned long mask)
21442+{
21443+ struct mnt_namespace *old_ns;
21444+ struct uts_namespace *old_uts;
21445+ struct ipc_namespace *old_ipc;
21446+#ifdef CONFIG_PID_NS
21447+ struct pid_namespace *old_pid;
21448+#endif
21449+#ifdef CONFIG_NET_NS
21450+ struct net *old_net;
21451+#endif
21452+ struct nsproxy *nsproxy;
d337f35e 21453+
4bf69007
AM
21454+ nsproxy = copy_nsproxy(old_nsproxy);
21455+ if (!nsproxy)
21456+ goto out;
bd0a9c15 21457+
4bf69007
AM
21458+ if (mask & CLONE_NEWNS) {
21459+ old_ns = nsproxy->mnt_ns;
21460+ nsproxy->mnt_ns = new_nsproxy->mnt_ns;
21461+ if (nsproxy->mnt_ns)
21462+ get_mnt_ns(nsproxy->mnt_ns);
21463+ } else
21464+ old_ns = NULL;
d337f35e 21465+
4bf69007
AM
21466+ if (mask & CLONE_NEWUTS) {
21467+ old_uts = nsproxy->uts_ns;
21468+ nsproxy->uts_ns = new_nsproxy->uts_ns;
21469+ if (nsproxy->uts_ns)
21470+ get_uts_ns(nsproxy->uts_ns);
21471+ } else
21472+ old_uts = NULL;
2380c486 21473+
4bf69007
AM
21474+ if (mask & CLONE_NEWIPC) {
21475+ old_ipc = nsproxy->ipc_ns;
21476+ nsproxy->ipc_ns = new_nsproxy->ipc_ns;
21477+ if (nsproxy->ipc_ns)
21478+ get_ipc_ns(nsproxy->ipc_ns);
21479+ } else
21480+ old_ipc = NULL;
ec22aa5c 21481+
4bf69007
AM
21482+#ifdef CONFIG_PID_NS
21483+ if (mask & CLONE_NEWPID) {
5f23d63e
AM
21484+ old_pid = nsproxy->pid_ns_for_children;
21485+ nsproxy->pid_ns_for_children = new_nsproxy->pid_ns_for_children;
21486+ if (nsproxy->pid_ns_for_children)
21487+ get_pid_ns(nsproxy->pid_ns_for_children);
4bf69007
AM
21488+ } else
21489+ old_pid = NULL;
21490+#endif
21491+#ifdef CONFIG_NET_NS
21492+ if (mask & CLONE_NEWNET) {
21493+ old_net = nsproxy->net_ns;
21494+ nsproxy->net_ns = new_nsproxy->net_ns;
21495+ if (nsproxy->net_ns)
21496+ get_net(nsproxy->net_ns);
21497+ } else
21498+ old_net = NULL;
21499+#endif
21500+ if (old_ns)
21501+ put_mnt_ns(old_ns);
21502+ if (old_uts)
21503+ put_uts_ns(old_uts);
21504+ if (old_ipc)
21505+ put_ipc_ns(old_ipc);
21506+#ifdef CONFIG_PID_NS
21507+ if (old_pid)
21508+ put_pid_ns(old_pid);
21509+#endif
21510+#ifdef CONFIG_NET_NS
21511+ if (old_net)
21512+ put_net(old_net);
21513+#endif
21514+out:
21515+ return nsproxy;
21516+}
2380c486 21517+
bd0a9c15 21518+
4bf69007
AM
21519+/*
21520+ * merge two nsproxy structs into a new one.
21521+ * will hold a reference on the result.
21522+ */
d337f35e 21523+
4bf69007
AM
21524+static inline
21525+struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
21526+ struct nsproxy *proxy, unsigned long mask)
21527+{
21528+ struct nsproxy null_proxy = { .mnt_ns = NULL };
2380c486 21529+
4bf69007
AM
21530+ if (!proxy)
21531+ return NULL;
d337f35e 21532+
4bf69007
AM
21533+ if (mask) {
21534+ /* vs_mix_nsproxy returns with reference */
21535+ return vs_mix_nsproxy(old ? old : &null_proxy,
21536+ proxy, mask);
21537+ }
21538+ get_nsproxy(proxy);
21539+ return proxy;
21540+}
2380c486 21541+
ec22aa5c 21542+
4bf69007
AM
21543+int vx_enter_space(struct vx_info *vxi, unsigned long mask, unsigned index)
21544+{
21545+ struct nsproxy *proxy, *proxy_cur, *proxy_new;
21546+ struct fs_struct *fs_cur, *fs = NULL;
21547+ struct _vx_space *space;
21548+ int ret, kill = 0;
2380c486 21549+
4bf69007
AM
21550+ vxdprintk(VXD_CBIT(space, 8), "vx_enter_space(%p[#%u],0x%08lx,%d)",
21551+ vxi, vxi->vx_id, mask, index);
2380c486 21552+
4bf69007
AM
21553+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
21554+ return -EACCES;
2380c486 21555+
4bf69007
AM
21556+ if (index >= VX_SPACES)
21557+ return -EINVAL;
2380c486 21558+
4bf69007
AM
21559+ space = &vxi->space[index];
21560+
21561+ if (!mask)
21562+ mask = space->vx_nsmask;
21563+
21564+ if ((mask & space->vx_nsmask) != mask)
21565+ return -EINVAL;
21566+
21567+ if (mask & CLONE_FS) {
21568+ fs = copy_fs_struct(space->vx_fs);
21569+ if (!fs)
21570+ return -ENOMEM;
2380c486 21571+ }
4bf69007
AM
21572+ proxy = space->vx_nsproxy;
21573+
21574+ vxdprintk(VXD_CBIT(space, 9),
21575+ "vx_enter_space(%p[#%u],0x%08lx,%d) -> (%p,%p)",
21576+ vxi, vxi->vx_id, mask, index, proxy, fs);
21577+
21578+ task_lock(current);
21579+ fs_cur = current->fs;
21580+
21581+ if (mask & CLONE_FS) {
21582+ spin_lock(&fs_cur->lock);
21583+ current->fs = fs;
21584+ kill = !--fs_cur->users;
21585+ spin_unlock(&fs_cur->lock);
ec22aa5c 21586+ }
ec22aa5c 21587+
4bf69007
AM
21588+ proxy_cur = current->nsproxy;
21589+ get_nsproxy(proxy_cur);
21590+ task_unlock(current);
21591+
21592+ if (kill)
21593+ free_fs_struct(fs_cur);
21594+
21595+ proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
21596+ if (IS_ERR(proxy_new)) {
21597+ ret = PTR_ERR(proxy_new);
21598+ goto out_put;
eab5a9a6 21599+ }
4bf69007
AM
21600+
21601+ proxy_new = xchg(&current->nsproxy, proxy_new);
21602+
21603+ if (mask & CLONE_NEWUSER) {
21604+ struct cred *cred;
21605+
21606+ vxdprintk(VXD_CBIT(space, 10),
21607+ "vx_enter_space(%p[#%u],%p) cred (%p,%p)",
21608+ vxi, vxi->vx_id, space->vx_cred,
21609+ current->real_cred, current->cred);
21610+
21611+ if (space->vx_cred) {
21612+ cred = __prepare_creds(space->vx_cred);
21613+ if (cred)
21614+ commit_creds(cred);
21615+ }
d337f35e 21616+ }
4bf69007
AM
21617+
21618+ ret = 0;
21619+
21620+ if (proxy_new)
21621+ put_nsproxy(proxy_new);
21622+out_put:
21623+ if (proxy_cur)
21624+ put_nsproxy(proxy_cur);
21625+ return ret;
21626+}
21627+
21628+
21629+int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index)
21630+{
21631+ struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
21632+ struct fs_struct *fs_vxi, *fs = NULL;
21633+ struct _vx_space *space;
21634+ int ret, kill = 0;
21635+
21636+ vxdprintk(VXD_CBIT(space, 8), "vx_set_space(%p[#%u],0x%08lx,%d)",
21637+ vxi, vxi->vx_id, mask, index);
21638+
21639+ if ((mask & space_mask.mask) != mask)
21640+ return -EINVAL;
21641+
21642+ if (index >= VX_SPACES)
21643+ return -EINVAL;
21644+
21645+ space = &vxi->space[index];
21646+
21647+ proxy_vxi = space->vx_nsproxy;
21648+ fs_vxi = space->vx_fs;
21649+
21650+ if (mask & CLONE_FS) {
21651+ fs = copy_fs_struct(current->fs);
21652+ if (!fs)
21653+ return -ENOMEM;
2380c486 21654+ }
d337f35e 21655+
4bf69007 21656+ task_lock(current);
2ba6f0dd 21657+
4bf69007
AM
21658+ if (mask & CLONE_FS) {
21659+ spin_lock(&fs_vxi->lock);
21660+ space->vx_fs = fs;
21661+ kill = !--fs_vxi->users;
21662+ spin_unlock(&fs_vxi->lock);
21663+ }
2ba6f0dd 21664+
4bf69007
AM
21665+ proxy_cur = current->nsproxy;
21666+ get_nsproxy(proxy_cur);
21667+ task_unlock(current);
2ba6f0dd 21668+
4bf69007
AM
21669+ if (kill)
21670+ free_fs_struct(fs_vxi);
2ba6f0dd 21671+
4bf69007
AM
21672+ proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
21673+ if (IS_ERR(proxy_new)) {
21674+ ret = PTR_ERR(proxy_new);
21675+ goto out_put;
21676+ }
2ba6f0dd 21677+
4bf69007
AM
21678+ proxy_new = xchg(&space->vx_nsproxy, proxy_new);
21679+ space->vx_nsmask |= mask;
2ba6f0dd 21680+
4bf69007
AM
21681+ if (mask & CLONE_NEWUSER) {
21682+ struct cred *cred;
2ba6f0dd 21683+
4bf69007
AM
21684+ vxdprintk(VXD_CBIT(space, 10),
21685+ "vx_set_space(%p[#%u],%p) cred (%p,%p)",
21686+ vxi, vxi->vx_id, space->vx_cred,
21687+ current->real_cred, current->cred);
2ba6f0dd 21688+
4bf69007
AM
21689+ cred = prepare_creds();
21690+ cred = (struct cred *)xchg(&space->vx_cred, cred);
21691+ if (cred)
21692+ abort_creds(cred);
21693+ }
2ba6f0dd 21694+
4bf69007 21695+ ret = 0;
2ba6f0dd 21696+
4bf69007
AM
21697+ if (proxy_new)
21698+ put_nsproxy(proxy_new);
21699+out_put:
21700+ if (proxy_cur)
21701+ put_nsproxy(proxy_cur);
21702+ return ret;
21703+}
2ba6f0dd
AM
21704+
21705+
4bf69007
AM
21706+int vc_enter_space_v1(struct vx_info *vxi, void __user *data)
21707+{
21708+ struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
2ba6f0dd 21709+
4bf69007
AM
21710+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21711+ return -EFAULT;
2ba6f0dd 21712+
4bf69007
AM
21713+ return vx_enter_space(vxi, vc_data.mask, 0);
21714+}
2ba6f0dd 21715+
4bf69007
AM
21716+int vc_enter_space(struct vx_info *vxi, void __user *data)
21717+{
21718+ struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
2ba6f0dd 21719+
4bf69007
AM
21720+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21721+ return -EFAULT;
2ba6f0dd 21722+
4bf69007
AM
21723+ if (vc_data.index >= VX_SPACES)
21724+ return -EINVAL;
2ba6f0dd 21725+
4bf69007
AM
21726+ return vx_enter_space(vxi, vc_data.mask, vc_data.index);
21727+}
2ba6f0dd 21728+
4bf69007
AM
21729+int vc_set_space_v1(struct vx_info *vxi, void __user *data)
21730+{
21731+ struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
2ba6f0dd 21732+
4bf69007
AM
21733+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21734+ return -EFAULT;
2ba6f0dd 21735+
4bf69007
AM
21736+ return vx_set_space(vxi, vc_data.mask, 0);
21737+}
2ba6f0dd 21738+
4bf69007
AM
21739+int vc_set_space(struct vx_info *vxi, void __user *data)
21740+{
21741+ struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
2ba6f0dd 21742+
4bf69007
AM
21743+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21744+ return -EFAULT;
2ba6f0dd 21745+
4bf69007
AM
21746+ if (vc_data.index >= VX_SPACES)
21747+ return -EINVAL;
2ba6f0dd 21748+
4bf69007
AM
21749+ return vx_set_space(vxi, vc_data.mask, vc_data.index);
21750+}
2ba6f0dd 21751+
4bf69007
AM
21752+int vc_get_space_mask(void __user *data, int type)
21753+{
21754+ const struct vcmd_space_mask_v1 *mask;
2ba6f0dd 21755+
4bf69007
AM
21756+ if (type == 0)
21757+ mask = &space_mask_v0;
21758+ else if (type == 1)
21759+ mask = &space_mask;
21760+ else
21761+ mask = &default_space_mask;
2ba6f0dd 21762+
4bf69007
AM
21763+ vxdprintk(VXD_CBIT(space, 10),
21764+ "vc_get_space_mask(%d) = %08llx", type, mask->mask);
2ba6f0dd 21765+
4bf69007
AM
21766+ if (copy_to_user(data, mask, sizeof(*mask)))
21767+ return -EFAULT;
21768+ return 0;
21769+}
2ba6f0dd 21770+
8931d859
AM
21771diff -NurpP --minimal linux-4.4.161/kernel/vserver/switch.c linux-4.4.161-vs2.3.9.8/kernel/vserver/switch.c
21772--- linux-4.4.161/kernel/vserver/switch.c 1970-01-01 00:00:00.000000000 +0000
21773+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/switch.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
21774@@ -0,0 +1,556 @@
21775+/*
21776+ * linux/kernel/vserver/switch.c
21777+ *
21778+ * Virtual Server: Syscall Switch
21779+ *
d6221c00 21780+ * Copyright (C) 2003-2011 Herbert P?tzl
4bf69007
AM
21781+ *
21782+ * V0.01 syscall switch
21783+ * V0.02 added signal to context
21784+ * V0.03 added rlimit functions
21785+ * V0.04 added iattr, task/xid functions
21786+ * V0.05 added debug/history stuff
21787+ * V0.06 added compat32 layer
21788+ * V0.07 vcmd args and perms
21789+ * V0.08 added status commands
21790+ * V0.09 added tag commands
21791+ * V0.10 added oom bias
21792+ * V0.11 added device commands
21793+ * V0.12 added warn mask
21794+ *
21795+ */
2ba6f0dd 21796+
4bf69007
AM
21797+#include <linux/vs_context.h>
21798+#include <linux/vs_network.h>
21799+#include <linux/vserver/switch.h>
2ba6f0dd 21800+
4bf69007 21801+#include "vci_config.h"
2ba6f0dd 21802+
2ba6f0dd 21803+
4bf69007
AM
21804+static inline
21805+int vc_get_version(uint32_t id)
21806+{
21807+ return VCI_VERSION;
21808+}
2ba6f0dd 21809+
4bf69007
AM
21810+static inline
21811+int vc_get_vci(uint32_t id)
21812+{
21813+ return vci_kernel_config();
21814+}
2ba6f0dd 21815+
4bf69007
AM
21816+#include <linux/vserver/context_cmd.h>
21817+#include <linux/vserver/cvirt_cmd.h>
21818+#include <linux/vserver/cacct_cmd.h>
21819+#include <linux/vserver/limit_cmd.h>
21820+#include <linux/vserver/network_cmd.h>
21821+#include <linux/vserver/sched_cmd.h>
21822+#include <linux/vserver/debug_cmd.h>
21823+#include <linux/vserver/inode_cmd.h>
21824+#include <linux/vserver/dlimit_cmd.h>
21825+#include <linux/vserver/signal_cmd.h>
21826+#include <linux/vserver/space_cmd.h>
21827+#include <linux/vserver/tag_cmd.h>
21828+#include <linux/vserver/device_cmd.h>
2ba6f0dd 21829+
4bf69007
AM
21830+#include <linux/vserver/inode.h>
21831+#include <linux/vserver/dlimit.h>
2ba6f0dd 21832+
2ba6f0dd 21833+
4bf69007
AM
21834+#ifdef CONFIG_COMPAT
21835+#define __COMPAT(name, id, data, compat) \
21836+ (compat) ? name ## _x32(id, data) : name(id, data)
21837+#define __COMPAT_NO_ID(name, data, compat) \
21838+ (compat) ? name ## _x32(data) : name(data)
21839+#else
21840+#define __COMPAT(name, id, data, compat) \
21841+ name(id, data)
21842+#define __COMPAT_NO_ID(name, data, compat) \
21843+ name(data)
21844+#endif
2ba6f0dd 21845+
2ba6f0dd 21846+
4bf69007
AM
21847+static inline
21848+long do_vcmd(uint32_t cmd, uint32_t id,
21849+ struct vx_info *vxi, struct nx_info *nxi,
21850+ void __user *data, int compat)
21851+{
21852+ switch (cmd) {
2ba6f0dd 21853+
4bf69007
AM
21854+ case VCMD_get_version:
21855+ return vc_get_version(id);
21856+ case VCMD_get_vci:
21857+ return vc_get_vci(id);
2ba6f0dd 21858+
4bf69007
AM
21859+ case VCMD_task_xid:
21860+ return vc_task_xid(id);
21861+ case VCMD_vx_info:
21862+ return vc_vx_info(vxi, data);
2ba6f0dd 21863+
4bf69007
AM
21864+ case VCMD_task_nid:
21865+ return vc_task_nid(id);
21866+ case VCMD_nx_info:
21867+ return vc_nx_info(nxi, data);
2ba6f0dd 21868+
4bf69007
AM
21869+ case VCMD_task_tag:
21870+ return vc_task_tag(id);
2ba6f0dd 21871+
4bf69007
AM
21872+ case VCMD_set_space_v1:
21873+ return vc_set_space_v1(vxi, data);
21874+ /* this is version 2 */
21875+ case VCMD_set_space:
21876+ return vc_set_space(vxi, data);
2ba6f0dd 21877+
4bf69007
AM
21878+ case VCMD_get_space_mask_v0:
21879+ return vc_get_space_mask(data, 0);
21880+ /* this is version 1 */
21881+ case VCMD_get_space_mask:
21882+ return vc_get_space_mask(data, 1);
2ba6f0dd 21883+
4bf69007
AM
21884+ case VCMD_get_space_default:
21885+ return vc_get_space_mask(data, -1);
2ba6f0dd 21886+
4bf69007
AM
21887+ case VCMD_set_umask:
21888+ return vc_set_umask(vxi, data);
2ba6f0dd 21889+
4bf69007
AM
21890+ case VCMD_get_umask:
21891+ return vc_get_umask(vxi, data);
2ba6f0dd 21892+
4bf69007
AM
21893+ case VCMD_set_wmask:
21894+ return vc_set_wmask(vxi, data);
2ba6f0dd 21895+
4bf69007
AM
21896+ case VCMD_get_wmask:
21897+ return vc_get_wmask(vxi, data);
21898+#ifdef CONFIG_IA32_EMULATION
21899+ case VCMD_get_rlimit:
21900+ return __COMPAT(vc_get_rlimit, vxi, data, compat);
21901+ case VCMD_set_rlimit:
21902+ return __COMPAT(vc_set_rlimit, vxi, data, compat);
21903+#else
21904+ case VCMD_get_rlimit:
21905+ return vc_get_rlimit(vxi, data);
21906+ case VCMD_set_rlimit:
21907+ return vc_set_rlimit(vxi, data);
21908+#endif
21909+ case VCMD_get_rlimit_mask:
21910+ return vc_get_rlimit_mask(id, data);
21911+ case VCMD_reset_hits:
21912+ return vc_reset_hits(vxi, data);
21913+ case VCMD_reset_minmax:
21914+ return vc_reset_minmax(vxi, data);
2ba6f0dd 21915+
4bf69007
AM
21916+ case VCMD_get_vhi_name:
21917+ return vc_get_vhi_name(vxi, data);
21918+ case VCMD_set_vhi_name:
21919+ return vc_set_vhi_name(vxi, data);
2ba6f0dd 21920+
4bf69007
AM
21921+ case VCMD_ctx_stat:
21922+ return vc_ctx_stat(vxi, data);
21923+ case VCMD_virt_stat:
21924+ return vc_virt_stat(vxi, data);
21925+ case VCMD_sock_stat:
21926+ return vc_sock_stat(vxi, data);
21927+ case VCMD_rlimit_stat:
21928+ return vc_rlimit_stat(vxi, data);
2ba6f0dd 21929+
4bf69007
AM
21930+ case VCMD_set_cflags:
21931+ return vc_set_cflags(vxi, data);
21932+ case VCMD_get_cflags:
21933+ return vc_get_cflags(vxi, data);
2ba6f0dd 21934+
4bf69007
AM
21935+ /* this is version 1 */
21936+ case VCMD_set_ccaps:
21937+ return vc_set_ccaps(vxi, data);
21938+ /* this is version 1 */
21939+ case VCMD_get_ccaps:
21940+ return vc_get_ccaps(vxi, data);
21941+ case VCMD_set_bcaps:
21942+ return vc_set_bcaps(vxi, data);
21943+ case VCMD_get_bcaps:
21944+ return vc_get_bcaps(vxi, data);
2ba6f0dd 21945+
4bf69007
AM
21946+ case VCMD_set_badness:
21947+ return vc_set_badness(vxi, data);
21948+ case VCMD_get_badness:
21949+ return vc_get_badness(vxi, data);
2ba6f0dd 21950+
4bf69007
AM
21951+ case VCMD_set_nflags:
21952+ return vc_set_nflags(nxi, data);
21953+ case VCMD_get_nflags:
21954+ return vc_get_nflags(nxi, data);
2ba6f0dd 21955+
4bf69007
AM
21956+ case VCMD_set_ncaps:
21957+ return vc_set_ncaps(nxi, data);
21958+ case VCMD_get_ncaps:
21959+ return vc_get_ncaps(nxi, data);
2ba6f0dd 21960+
4bf69007
AM
21961+ case VCMD_set_prio_bias:
21962+ return vc_set_prio_bias(vxi, data);
21963+ case VCMD_get_prio_bias:
21964+ return vc_get_prio_bias(vxi, data);
21965+ case VCMD_add_dlimit:
21966+ return __COMPAT(vc_add_dlimit, id, data, compat);
21967+ case VCMD_rem_dlimit:
21968+ return __COMPAT(vc_rem_dlimit, id, data, compat);
21969+ case VCMD_set_dlimit:
21970+ return __COMPAT(vc_set_dlimit, id, data, compat);
21971+ case VCMD_get_dlimit:
21972+ return __COMPAT(vc_get_dlimit, id, data, compat);
2ba6f0dd 21973+
4bf69007
AM
21974+ case VCMD_ctx_kill:
21975+ return vc_ctx_kill(vxi, data);
2ba6f0dd 21976+
4bf69007
AM
21977+ case VCMD_wait_exit:
21978+ return vc_wait_exit(vxi, data);
2ba6f0dd 21979+
4bf69007
AM
21980+ case VCMD_get_iattr:
21981+ return __COMPAT_NO_ID(vc_get_iattr, data, compat);
21982+ case VCMD_set_iattr:
21983+ return __COMPAT_NO_ID(vc_set_iattr, data, compat);
2ba6f0dd 21984+
4bf69007
AM
21985+ case VCMD_fget_iattr:
21986+ return vc_fget_iattr(id, data);
21987+ case VCMD_fset_iattr:
21988+ return vc_fset_iattr(id, data);
2ba6f0dd 21989+
4bf69007
AM
21990+ case VCMD_enter_space_v0:
21991+ return vc_enter_space_v1(vxi, NULL);
21992+ case VCMD_enter_space_v1:
21993+ return vc_enter_space_v1(vxi, data);
21994+ /* this is version 2 */
21995+ case VCMD_enter_space:
21996+ return vc_enter_space(vxi, data);
2ba6f0dd 21997+
4bf69007
AM
21998+ case VCMD_ctx_create_v0:
21999+ return vc_ctx_create(id, NULL);
22000+ case VCMD_ctx_create:
22001+ return vc_ctx_create(id, data);
22002+ case VCMD_ctx_migrate_v0:
22003+ return vc_ctx_migrate(vxi, NULL);
22004+ case VCMD_ctx_migrate:
22005+ return vc_ctx_migrate(vxi, data);
2ba6f0dd 22006+
4bf69007
AM
22007+ case VCMD_net_create_v0:
22008+ return vc_net_create(id, NULL);
22009+ case VCMD_net_create:
22010+ return vc_net_create(id, data);
22011+ case VCMD_net_migrate:
22012+ return vc_net_migrate(nxi, data);
2ba6f0dd 22013+
4bf69007
AM
22014+ case VCMD_tag_migrate:
22015+ return vc_tag_migrate(id);
2ba6f0dd 22016+
4bf69007
AM
22017+ case VCMD_net_add:
22018+ return vc_net_add(nxi, data);
22019+ case VCMD_net_remove:
22020+ return vc_net_remove(nxi, data);
2ba6f0dd 22021+
4bf69007
AM
22022+ case VCMD_net_add_ipv4_v1:
22023+ return vc_net_add_ipv4_v1(nxi, data);
22024+ /* this is version 2 */
22025+ case VCMD_net_add_ipv4:
22026+ return vc_net_add_ipv4(nxi, data);
2ba6f0dd 22027+
4bf69007
AM
22028+ case VCMD_net_rem_ipv4_v1:
22029+ return vc_net_rem_ipv4_v1(nxi, data);
22030+ /* this is version 2 */
22031+ case VCMD_net_rem_ipv4:
22032+ return vc_net_rem_ipv4(nxi, data);
22033+#ifdef CONFIG_IPV6
22034+ case VCMD_net_add_ipv6:
22035+ return vc_net_add_ipv6(nxi, data);
22036+ case VCMD_net_remove_ipv6:
22037+ return vc_net_remove_ipv6(nxi, data);
22038+#endif
22039+/* case VCMD_add_match_ipv4:
22040+ return vc_add_match_ipv4(nxi, data);
22041+ case VCMD_get_match_ipv4:
22042+ return vc_get_match_ipv4(nxi, data);
22043+#ifdef CONFIG_IPV6
22044+ case VCMD_add_match_ipv6:
22045+ return vc_add_match_ipv6(nxi, data);
22046+ case VCMD_get_match_ipv6:
22047+ return vc_get_match_ipv6(nxi, data);
22048+#endif */
2ba6f0dd 22049+
4bf69007
AM
22050+#ifdef CONFIG_VSERVER_DEVICE
22051+ case VCMD_set_mapping:
22052+ return __COMPAT(vc_set_mapping, vxi, data, compat);
22053+ case VCMD_unset_mapping:
22054+ return __COMPAT(vc_unset_mapping, vxi, data, compat);
22055+#endif
22056+#ifdef CONFIG_VSERVER_HISTORY
22057+ case VCMD_dump_history:
22058+ return vc_dump_history(id);
22059+ case VCMD_read_history:
22060+ return __COMPAT(vc_read_history, id, data, compat);
22061+#endif
22062+ default:
22063+ vxwprintk_task(1, "unimplemented VCMD_%02d_%d[%d]",
22064+ VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
22065+ }
22066+ return -ENOSYS;
22067+}
2ba6f0dd 22068+
2ba6f0dd 22069+
4bf69007
AM
22070+#define __VCMD(vcmd, _perm, _args, _flags) \
22071+ case VCMD_ ## vcmd: perm = _perm; \
22072+ args = _args; flags = _flags; break
2ba6f0dd 22073+
2ba6f0dd 22074+
4bf69007
AM
22075+#define VCA_NONE 0x00
22076+#define VCA_VXI 0x01
22077+#define VCA_NXI 0x02
2ba6f0dd 22078+
4bf69007
AM
22079+#define VCF_NONE 0x00
22080+#define VCF_INFO 0x01
22081+#define VCF_ADMIN 0x02
22082+#define VCF_ARES 0x06 /* includes admin */
22083+#define VCF_SETUP 0x08
2ba6f0dd 22084+
4bf69007 22085+#define VCF_ZIDOK 0x10 /* zero id okay */
2ba6f0dd 22086+
2ba6f0dd
AM
22087+
22088+static inline
4bf69007 22089+long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
2ba6f0dd 22090+{
4bf69007
AM
22091+ long ret;
22092+ int permit = -1, state = 0;
22093+ int perm = -1, args = 0, flags = 0;
22094+ struct vx_info *vxi = NULL;
22095+ struct nx_info *nxi = NULL;
2ba6f0dd 22096+
4bf69007
AM
22097+ switch (cmd) {
22098+ /* unpriviledged commands */
22099+ __VCMD(get_version, 0, VCA_NONE, 0);
22100+ __VCMD(get_vci, 0, VCA_NONE, 0);
22101+ __VCMD(get_rlimit_mask, 0, VCA_NONE, 0);
22102+ __VCMD(get_space_mask_v0,0, VCA_NONE, 0);
22103+ __VCMD(get_space_mask, 0, VCA_NONE, 0);
22104+ __VCMD(get_space_default,0, VCA_NONE, 0);
2ba6f0dd 22105+
4bf69007
AM
22106+ /* info commands */
22107+ __VCMD(task_xid, 2, VCA_NONE, 0);
22108+ __VCMD(reset_hits, 2, VCA_VXI, 0);
22109+ __VCMD(reset_minmax, 2, VCA_VXI, 0);
22110+ __VCMD(vx_info, 3, VCA_VXI, VCF_INFO);
22111+ __VCMD(get_bcaps, 3, VCA_VXI, VCF_INFO);
22112+ __VCMD(get_ccaps, 3, VCA_VXI, VCF_INFO);
22113+ __VCMD(get_cflags, 3, VCA_VXI, VCF_INFO);
22114+ __VCMD(get_umask, 3, VCA_VXI, VCF_INFO);
22115+ __VCMD(get_wmask, 3, VCA_VXI, VCF_INFO);
22116+ __VCMD(get_badness, 3, VCA_VXI, VCF_INFO);
22117+ __VCMD(get_vhi_name, 3, VCA_VXI, VCF_INFO);
22118+ __VCMD(get_rlimit, 3, VCA_VXI, VCF_INFO);
2ba6f0dd 22119+
4bf69007
AM
22120+ __VCMD(ctx_stat, 3, VCA_VXI, VCF_INFO);
22121+ __VCMD(virt_stat, 3, VCA_VXI, VCF_INFO);
22122+ __VCMD(sock_stat, 3, VCA_VXI, VCF_INFO);
22123+ __VCMD(rlimit_stat, 3, VCA_VXI, VCF_INFO);
2ba6f0dd 22124+
4bf69007
AM
22125+ __VCMD(task_nid, 2, VCA_NONE, 0);
22126+ __VCMD(nx_info, 3, VCA_NXI, VCF_INFO);
22127+ __VCMD(get_ncaps, 3, VCA_NXI, VCF_INFO);
22128+ __VCMD(get_nflags, 3, VCA_NXI, VCF_INFO);
2ba6f0dd 22129+
4bf69007 22130+ __VCMD(task_tag, 2, VCA_NONE, 0);
2ba6f0dd 22131+
4bf69007
AM
22132+ __VCMD(get_iattr, 2, VCA_NONE, 0);
22133+ __VCMD(fget_iattr, 2, VCA_NONE, 0);
22134+ __VCMD(get_dlimit, 3, VCA_NONE, VCF_INFO);
22135+ __VCMD(get_prio_bias, 3, VCA_VXI, VCF_INFO);
2ba6f0dd 22136+
4bf69007
AM
22137+ /* lower admin commands */
22138+ __VCMD(wait_exit, 4, VCA_VXI, VCF_INFO);
22139+ __VCMD(ctx_create_v0, 5, VCA_NONE, 0);
22140+ __VCMD(ctx_create, 5, VCA_NONE, 0);
22141+ __VCMD(ctx_migrate_v0, 5, VCA_VXI, VCF_ADMIN);
22142+ __VCMD(ctx_migrate, 5, VCA_VXI, VCF_ADMIN);
22143+ __VCMD(enter_space_v0, 5, VCA_VXI, VCF_ADMIN);
22144+ __VCMD(enter_space_v1, 5, VCA_VXI, VCF_ADMIN);
22145+ __VCMD(enter_space, 5, VCA_VXI, VCF_ADMIN);
2ba6f0dd 22146+
4bf69007
AM
22147+ __VCMD(net_create_v0, 5, VCA_NONE, 0);
22148+ __VCMD(net_create, 5, VCA_NONE, 0);
22149+ __VCMD(net_migrate, 5, VCA_NXI, VCF_ADMIN);
2ba6f0dd 22150+
4bf69007 22151+ __VCMD(tag_migrate, 5, VCA_NONE, VCF_ADMIN);
2ba6f0dd 22152+
4bf69007
AM
22153+ /* higher admin commands */
22154+ __VCMD(ctx_kill, 6, VCA_VXI, VCF_ARES);
22155+ __VCMD(set_space_v1, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22156+ __VCMD(set_space, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
2ba6f0dd 22157+
4bf69007
AM
22158+ __VCMD(set_ccaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22159+ __VCMD(set_bcaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22160+ __VCMD(set_cflags, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22161+ __VCMD(set_umask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22162+ __VCMD(set_wmask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22163+ __VCMD(set_badness, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
2ba6f0dd 22164+
4bf69007
AM
22165+ __VCMD(set_vhi_name, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22166+ __VCMD(set_rlimit, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22167+ __VCMD(set_prio_bias, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
2ba6f0dd 22168+
4bf69007
AM
22169+ __VCMD(set_ncaps, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
22170+ __VCMD(set_nflags, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
22171+ __VCMD(net_add, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22172+ __VCMD(net_remove, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22173+ __VCMD(net_add_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22174+ __VCMD(net_rem_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22175+ __VCMD(net_add_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22176+ __VCMD(net_rem_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22177+#ifdef CONFIG_IPV6
22178+ __VCMD(net_add_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22179+ __VCMD(net_remove_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22180+#endif
22181+ __VCMD(set_iattr, 7, VCA_NONE, 0);
22182+ __VCMD(fset_iattr, 7, VCA_NONE, 0);
22183+ __VCMD(set_dlimit, 7, VCA_NONE, VCF_ARES);
22184+ __VCMD(add_dlimit, 8, VCA_NONE, VCF_ARES);
22185+ __VCMD(rem_dlimit, 8, VCA_NONE, VCF_ARES);
2ba6f0dd 22186+
4bf69007
AM
22187+#ifdef CONFIG_VSERVER_DEVICE
22188+ __VCMD(set_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
22189+ __VCMD(unset_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
22190+#endif
22191+ /* debug level admin commands */
22192+#ifdef CONFIG_VSERVER_HISTORY
22193+ __VCMD(dump_history, 9, VCA_NONE, 0);
22194+ __VCMD(read_history, 9, VCA_NONE, 0);
22195+#endif
2ba6f0dd 22196+
4bf69007
AM
22197+ default:
22198+ perm = -1;
22199+ }
2ba6f0dd 22200+
4bf69007
AM
22201+ vxdprintk(VXD_CBIT(switch, 0),
22202+ "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
22203+ VC_CATEGORY(cmd), VC_COMMAND(cmd),
22204+ VC_VERSION(cmd), id, data, compat,
22205+ perm, args, flags);
2ba6f0dd 22206+
4bf69007
AM
22207+ ret = -ENOSYS;
22208+ if (perm < 0)
22209+ goto out;
2ba6f0dd 22210+
4bf69007
AM
22211+ state = 1;
22212+ if (!capable(CAP_CONTEXT))
22213+ goto out;
2ba6f0dd 22214+
4bf69007
AM
22215+ state = 2;
22216+ /* moved here from the individual commands */
22217+ ret = -EPERM;
22218+ if ((perm > 1) && !capable(CAP_SYS_ADMIN))
22219+ goto out;
2ba6f0dd 22220+
4bf69007
AM
22221+ state = 3;
22222+ /* vcmd involves resource management */
22223+ ret = -EPERM;
22224+ if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
22225+ goto out;
2ba6f0dd 22226+
4bf69007
AM
22227+ state = 4;
22228+ /* various legacy exceptions */
22229+ switch (cmd) {
22230+ /* will go away when spectator is a cap */
22231+ case VCMD_ctx_migrate_v0:
22232+ case VCMD_ctx_migrate:
22233+ if (id == 1) {
22234+ current->xid = 1;
22235+ ret = 1;
22236+ goto out;
22237+ }
22238+ break;
2ba6f0dd 22239+
4bf69007
AM
22240+ /* will go away when spectator is a cap */
22241+ case VCMD_net_migrate:
22242+ if (id == 1) {
22243+ current->nid = 1;
22244+ ret = 1;
22245+ goto out;
22246+ }
22247+ break;
22248+ }
2ba6f0dd 22249+
4bf69007
AM
22250+ /* vcmds are fine by default */
22251+ permit = 1;
2ba6f0dd 22252+
4bf69007
AM
22253+ /* admin type vcmds require admin ... */
22254+ if (flags & VCF_ADMIN)
22255+ permit = vx_check(0, VS_ADMIN) ? 1 : 0;
2ba6f0dd 22256+
4bf69007
AM
22257+ /* ... but setup type vcmds override that */
22258+ if (!permit && (flags & VCF_SETUP))
22259+ permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
2ba6f0dd 22260+
4bf69007
AM
22261+ state = 5;
22262+ ret = -EPERM;
22263+ if (!permit)
22264+ goto out;
2ba6f0dd 22265+
4bf69007
AM
22266+ state = 6;
22267+ if (!id && (flags & VCF_ZIDOK))
22268+ goto skip_id;
2ba6f0dd 22269+
4bf69007
AM
22270+ ret = -ESRCH;
22271+ if (args & VCA_VXI) {
22272+ vxi = lookup_vx_info(id);
22273+ if (!vxi)
22274+ goto out;
2ba6f0dd 22275+
4bf69007
AM
22276+ if ((flags & VCF_ADMIN) &&
22277+ /* special case kill for shutdown */
22278+ (cmd != VCMD_ctx_kill) &&
22279+ /* can context be administrated? */
22280+ !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
22281+ ret = -EACCES;
22282+ goto out_vxi;
22283+ }
22284+ }
22285+ state = 7;
22286+ if (args & VCA_NXI) {
22287+ nxi = lookup_nx_info(id);
22288+ if (!nxi)
22289+ goto out_vxi;
2ba6f0dd 22290+
4bf69007
AM
22291+ if ((flags & VCF_ADMIN) &&
22292+ /* can context be administrated? */
22293+ !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
22294+ ret = -EACCES;
22295+ goto out_nxi;
22296+ }
22297+ }
22298+skip_id:
22299+ state = 8;
22300+ ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
2ba6f0dd 22301+
4bf69007
AM
22302+out_nxi:
22303+ if ((args & VCA_NXI) && nxi)
22304+ put_nx_info(nxi);
22305+out_vxi:
22306+ if ((args & VCA_VXI) && vxi)
22307+ put_vx_info(vxi);
22308+out:
22309+ vxdprintk(VXD_CBIT(switch, 1),
22310+ "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
22311+ VC_CATEGORY(cmd), VC_COMMAND(cmd),
22312+ VC_VERSION(cmd), ret, ret, state, permit);
22313+ return ret;
22314+}
2ba6f0dd 22315+
4bf69007
AM
22316+asmlinkage long
22317+sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
22318+{
22319+ return do_vserver(cmd, id, data, 0);
22320+}
2ba6f0dd 22321+
4bf69007 22322+#ifdef CONFIG_COMPAT
2ba6f0dd 22323+
4bf69007
AM
22324+asmlinkage long
22325+sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
22326+{
22327+ return do_vserver(cmd, id, data, 1);
22328+}
2ba6f0dd 22329+
4bf69007 22330+#endif /* CONFIG_COMPAT */
8931d859
AM
22331diff -NurpP --minimal linux-4.4.161/kernel/vserver/sysctl.c linux-4.4.161-vs2.3.9.8/kernel/vserver/sysctl.c
22332--- linux-4.4.161/kernel/vserver/sysctl.c 1970-01-01 00:00:00.000000000 +0000
22333+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/sysctl.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
22334@@ -0,0 +1,247 @@
22335+/*
22336+ * kernel/vserver/sysctl.c
22337+ *
22338+ * Virtual Context Support
22339+ *
d6221c00 22340+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
22341+ *
22342+ * V0.01 basic structure
22343+ *
22344+ */
2ba6f0dd 22345+
4bf69007
AM
22346+#include <linux/module.h>
22347+#include <linux/ctype.h>
22348+#include <linux/sysctl.h>
22349+#include <linux/parser.h>
22350+#include <asm/uaccess.h>
2ba6f0dd 22351+
4bf69007
AM
22352+enum {
22353+ CTL_DEBUG_ERROR = 0,
22354+ CTL_DEBUG_SWITCH = 1,
22355+ CTL_DEBUG_XID,
22356+ CTL_DEBUG_NID,
22357+ CTL_DEBUG_TAG,
22358+ CTL_DEBUG_NET,
22359+ CTL_DEBUG_LIMIT,
22360+ CTL_DEBUG_CRES,
22361+ CTL_DEBUG_DLIM,
22362+ CTL_DEBUG_QUOTA,
22363+ CTL_DEBUG_CVIRT,
22364+ CTL_DEBUG_SPACE,
22365+ CTL_DEBUG_PERM,
22366+ CTL_DEBUG_MISC,
2ba6f0dd
AM
22367+};
22368+
2ba6f0dd 22369+
4bf69007
AM
22370+unsigned int vs_debug_switch = 0;
22371+unsigned int vs_debug_xid = 0;
22372+unsigned int vs_debug_nid = 0;
22373+unsigned int vs_debug_tag = 0;
22374+unsigned int vs_debug_net = 0;
22375+unsigned int vs_debug_limit = 0;
22376+unsigned int vs_debug_cres = 0;
22377+unsigned int vs_debug_dlim = 0;
22378+unsigned int vs_debug_quota = 0;
22379+unsigned int vs_debug_cvirt = 0;
22380+unsigned int vs_debug_space = 0;
22381+unsigned int vs_debug_perm = 0;
22382+unsigned int vs_debug_misc = 0;
2ba6f0dd 22383+
2ba6f0dd 22384+
4bf69007 22385+static struct ctl_table_header *vserver_table_header;
bb20add7 22386+static struct ctl_table vserver_root_table[];
4bf69007 22387+
2ba6f0dd 22388+
4bf69007
AM
22389+void vserver_register_sysctl(void)
22390+{
22391+ if (!vserver_table_header) {
22392+ vserver_table_header = register_sysctl_table(vserver_root_table);
22393+ }
2ba6f0dd 22394+
4bf69007 22395+}
2ba6f0dd 22396+
4bf69007
AM
22397+void vserver_unregister_sysctl(void)
22398+{
22399+ if (vserver_table_header) {
22400+ unregister_sysctl_table(vserver_table_header);
22401+ vserver_table_header = NULL;
22402+ }
22403+}
2ba6f0dd 22404+
2ba6f0dd 22405+
bb20add7 22406+static int proc_dodebug(struct ctl_table *table, int write,
4bf69007
AM
22407+ void __user *buffer, size_t *lenp, loff_t *ppos)
22408+{
22409+ char tmpbuf[20], *p, c;
22410+ unsigned int value;
22411+ size_t left, len;
2ba6f0dd 22412+
4bf69007
AM
22413+ if ((*ppos && !write) || !*lenp) {
22414+ *lenp = 0;
22415+ return 0;
22416+ }
2ba6f0dd 22417+
4bf69007 22418+ left = *lenp;
2ba6f0dd 22419+
4bf69007
AM
22420+ if (write) {
22421+ if (!access_ok(VERIFY_READ, buffer, left))
22422+ return -EFAULT;
22423+ p = (char *)buffer;
22424+ while (left && __get_user(c, p) >= 0 && isspace(c))
22425+ left--, p++;
22426+ if (!left)
22427+ goto done;
2ba6f0dd 22428+
4bf69007
AM
22429+ if (left > sizeof(tmpbuf) - 1)
22430+ return -EINVAL;
22431+ if (copy_from_user(tmpbuf, p, left))
22432+ return -EFAULT;
22433+ tmpbuf[left] = '\0';
2ba6f0dd 22434+
4bf69007
AM
22435+ for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
22436+ value = 10 * value + (*p - '0');
22437+ if (*p && !isspace(*p))
22438+ return -EINVAL;
22439+ while (left && isspace(*p))
22440+ left--, p++;
22441+ *(unsigned int *)table->data = value;
22442+ } else {
22443+ if (!access_ok(VERIFY_WRITE, buffer, left))
22444+ return -EFAULT;
22445+ len = sprintf(tmpbuf, "%d", *(unsigned int *)table->data);
22446+ if (len > left)
22447+ len = left;
22448+ if (__copy_to_user(buffer, tmpbuf, len))
22449+ return -EFAULT;
22450+ if ((left -= len) > 0) {
22451+ if (put_user('\n', (char *)buffer + len))
22452+ return -EFAULT;
22453+ left--;
22454+ }
22455+ }
2ba6f0dd 22456+
4bf69007
AM
22457+done:
22458+ *lenp -= left;
22459+ *ppos += *lenp;
22460+ return 0;
22461+}
2ba6f0dd 22462+
4bf69007 22463+static int zero;
2ba6f0dd 22464+
4bf69007
AM
22465+#define CTL_ENTRY(ctl, name) \
22466+ { \
22467+ .procname = #name, \
22468+ .data = &vs_ ## name, \
22469+ .maxlen = sizeof(int), \
22470+ .mode = 0644, \
22471+ .proc_handler = &proc_dodebug, \
22472+ .extra1 = &zero, \
22473+ .extra2 = &zero, \
22474+ }
2ba6f0dd 22475+
bb20add7 22476+static struct ctl_table vserver_debug_table[] = {
4bf69007
AM
22477+ CTL_ENTRY(CTL_DEBUG_SWITCH, debug_switch),
22478+ CTL_ENTRY(CTL_DEBUG_XID, debug_xid),
22479+ CTL_ENTRY(CTL_DEBUG_NID, debug_nid),
22480+ CTL_ENTRY(CTL_DEBUG_TAG, debug_tag),
22481+ CTL_ENTRY(CTL_DEBUG_NET, debug_net),
22482+ CTL_ENTRY(CTL_DEBUG_LIMIT, debug_limit),
22483+ CTL_ENTRY(CTL_DEBUG_CRES, debug_cres),
22484+ CTL_ENTRY(CTL_DEBUG_DLIM, debug_dlim),
22485+ CTL_ENTRY(CTL_DEBUG_QUOTA, debug_quota),
22486+ CTL_ENTRY(CTL_DEBUG_CVIRT, debug_cvirt),
22487+ CTL_ENTRY(CTL_DEBUG_SPACE, debug_space),
22488+ CTL_ENTRY(CTL_DEBUG_PERM, debug_perm),
22489+ CTL_ENTRY(CTL_DEBUG_MISC, debug_misc),
22490+ { 0 }
22491+};
2ba6f0dd 22492+
bb20add7 22493+static struct ctl_table vserver_root_table[] = {
4bf69007
AM
22494+ {
22495+ .procname = "vserver",
22496+ .mode = 0555,
22497+ .child = vserver_debug_table
22498+ },
22499+ { 0 }
22500+};
2ba6f0dd 22501+
2ba6f0dd 22502+
4bf69007
AM
22503+static match_table_t tokens = {
22504+ { CTL_DEBUG_SWITCH, "switch=%x" },
22505+ { CTL_DEBUG_XID, "xid=%x" },
22506+ { CTL_DEBUG_NID, "nid=%x" },
22507+ { CTL_DEBUG_TAG, "tag=%x" },
22508+ { CTL_DEBUG_NET, "net=%x" },
22509+ { CTL_DEBUG_LIMIT, "limit=%x" },
22510+ { CTL_DEBUG_CRES, "cres=%x" },
22511+ { CTL_DEBUG_DLIM, "dlim=%x" },
22512+ { CTL_DEBUG_QUOTA, "quota=%x" },
22513+ { CTL_DEBUG_CVIRT, "cvirt=%x" },
22514+ { CTL_DEBUG_SPACE, "space=%x" },
22515+ { CTL_DEBUG_PERM, "perm=%x" },
22516+ { CTL_DEBUG_MISC, "misc=%x" },
22517+ { CTL_DEBUG_ERROR, NULL }
22518+};
2ba6f0dd 22519+
4bf69007
AM
22520+#define HANDLE_CASE(id, name, val) \
22521+ case CTL_DEBUG_ ## id: \
22522+ vs_debug_ ## name = val; \
22523+ printk("vs_debug_" #name "=0x%x\n", val); \
22524+ break
2ba6f0dd 22525+
2ba6f0dd 22526+
4bf69007
AM
22527+static int __init vs_debug_setup(char *str)
22528+{
22529+ char *p;
22530+ int token;
2ba6f0dd 22531+
4bf69007
AM
22532+ printk("vs_debug_setup(%s)\n", str);
22533+ while ((p = strsep(&str, ",")) != NULL) {
22534+ substring_t args[MAX_OPT_ARGS];
22535+ unsigned int value;
2ba6f0dd 22536+
4bf69007
AM
22537+ if (!*p)
22538+ continue;
2ba6f0dd 22539+
4bf69007
AM
22540+ token = match_token(p, tokens, args);
22541+ value = (token > 0) ? simple_strtoul(args[0].from, NULL, 0) : 0;
2ba6f0dd 22542+
4bf69007
AM
22543+ switch (token) {
22544+ HANDLE_CASE(SWITCH, switch, value);
22545+ HANDLE_CASE(XID, xid, value);
22546+ HANDLE_CASE(NID, nid, value);
22547+ HANDLE_CASE(TAG, tag, value);
22548+ HANDLE_CASE(NET, net, value);
22549+ HANDLE_CASE(LIMIT, limit, value);
22550+ HANDLE_CASE(CRES, cres, value);
22551+ HANDLE_CASE(DLIM, dlim, value);
22552+ HANDLE_CASE(QUOTA, quota, value);
22553+ HANDLE_CASE(CVIRT, cvirt, value);
22554+ HANDLE_CASE(SPACE, space, value);
22555+ HANDLE_CASE(PERM, perm, value);
22556+ HANDLE_CASE(MISC, misc, value);
22557+ default:
22558+ return -EINVAL;
22559+ break;
22560+ }
22561+ }
22562+ return 1;
22563+}
2ba6f0dd 22564+
4bf69007 22565+__setup("vsdebug=", vs_debug_setup);
2ba6f0dd 22566+
2ba6f0dd 22567+
2ba6f0dd 22568+
4bf69007
AM
22569+EXPORT_SYMBOL_GPL(vs_debug_switch);
22570+EXPORT_SYMBOL_GPL(vs_debug_xid);
22571+EXPORT_SYMBOL_GPL(vs_debug_nid);
22572+EXPORT_SYMBOL_GPL(vs_debug_net);
22573+EXPORT_SYMBOL_GPL(vs_debug_limit);
22574+EXPORT_SYMBOL_GPL(vs_debug_cres);
22575+EXPORT_SYMBOL_GPL(vs_debug_dlim);
22576+EXPORT_SYMBOL_GPL(vs_debug_quota);
22577+EXPORT_SYMBOL_GPL(vs_debug_cvirt);
22578+EXPORT_SYMBOL_GPL(vs_debug_space);
22579+EXPORT_SYMBOL_GPL(vs_debug_perm);
22580+EXPORT_SYMBOL_GPL(vs_debug_misc);
2ba6f0dd 22581+
8931d859
AM
22582diff -NurpP --minimal linux-4.4.161/kernel/vserver/tag.c linux-4.4.161-vs2.3.9.8/kernel/vserver/tag.c
22583--- linux-4.4.161/kernel/vserver/tag.c 1970-01-01 00:00:00.000000000 +0000
22584+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/tag.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
22585@@ -0,0 +1,63 @@
22586+/*
22587+ * linux/kernel/vserver/tag.c
22588+ *
22589+ * Virtual Server: Shallow Tag Space
22590+ *
d6221c00 22591+ * Copyright (C) 2007 Herbert P?tzl
4bf69007
AM
22592+ *
22593+ * V0.01 basic implementation
22594+ *
22595+ */
2ba6f0dd 22596+
4bf69007
AM
22597+#include <linux/sched.h>
22598+#include <linux/vserver/debug.h>
22599+#include <linux/vs_pid.h>
22600+#include <linux/vs_tag.h>
2ba6f0dd 22601+
4bf69007 22602+#include <linux/vserver/tag_cmd.h>
2ba6f0dd 22603+
2ba6f0dd 22604+
61333608 22605+int dx_migrate_task(struct task_struct *p, vtag_t tag)
4bf69007
AM
22606+{
22607+ if (!p)
22608+ BUG();
2ba6f0dd 22609+
4bf69007
AM
22610+ vxdprintk(VXD_CBIT(tag, 5),
22611+ "dx_migrate_task(%p[#%d],#%d)", p, p->tag, tag);
2ba6f0dd 22612+
4bf69007
AM
22613+ task_lock(p);
22614+ p->tag = tag;
22615+ task_unlock(p);
2ba6f0dd 22616+
4bf69007
AM
22617+ vxdprintk(VXD_CBIT(tag, 5),
22618+ "moved task %p into [#%d]", p, tag);
22619+ return 0;
22620+}
2ba6f0dd 22621+
4bf69007 22622+/* vserver syscall commands below here */
2ba6f0dd 22623+
4bf69007 22624+/* taks xid and vx_info functions */
2ba6f0dd 22625+
2ba6f0dd 22626+
4bf69007
AM
22627+int vc_task_tag(uint32_t id)
22628+{
61333608 22629+ vtag_t tag;
2ba6f0dd 22630+
4bf69007
AM
22631+ if (id) {
22632+ struct task_struct *tsk;
22633+ rcu_read_lock();
22634+ tsk = find_task_by_real_pid(id);
22635+ tag = (tsk) ? tsk->tag : -ESRCH;
22636+ rcu_read_unlock();
22637+ } else
22638+ tag = dx_current_tag();
22639+ return tag;
22640+}
2ba6f0dd 22641+
2ba6f0dd 22642+
4bf69007
AM
22643+int vc_tag_migrate(uint32_t tag)
22644+{
22645+ return dx_migrate_task(current, tag & 0xFFFF);
22646+}
2ba6f0dd 22647+
2ba6f0dd 22648+
8931d859
AM
22649diff -NurpP --minimal linux-4.4.161/kernel/vserver/vci_config.h linux-4.4.161-vs2.3.9.8/kernel/vserver/vci_config.h
22650--- linux-4.4.161/kernel/vserver/vci_config.h 1970-01-01 00:00:00.000000000 +0000
22651+++ linux-4.4.161-vs2.3.9.8/kernel/vserver/vci_config.h 2018-10-20 04:57:21.000000000 +0000
4bf69007 22652@@ -0,0 +1,80 @@
2ba6f0dd 22653+
4bf69007 22654+/* interface version */
2ba6f0dd 22655+
4bf69007 22656+#define VCI_VERSION 0x00020308
2ba6f0dd 22657+
2ba6f0dd 22658+
4bf69007
AM
22659+enum {
22660+ VCI_KCBIT_NO_DYNAMIC = 0,
2ba6f0dd 22661+
4bf69007
AM
22662+ VCI_KCBIT_PROC_SECURE = 4,
22663+ /* VCI_KCBIT_HARDCPU = 5, */
22664+ /* VCI_KCBIT_IDLELIMIT = 6, */
22665+ /* VCI_KCBIT_IDLETIME = 7, */
2ba6f0dd 22666+
4bf69007
AM
22667+ VCI_KCBIT_COWBL = 8,
22668+ VCI_KCBIT_FULLCOWBL = 9,
22669+ VCI_KCBIT_SPACES = 10,
22670+ VCI_KCBIT_NETV2 = 11,
22671+ VCI_KCBIT_MEMCG = 12,
22672+ VCI_KCBIT_MEMCG_SWAP = 13,
2ba6f0dd 22673+
4bf69007
AM
22674+ VCI_KCBIT_DEBUG = 16,
22675+ VCI_KCBIT_HISTORY = 20,
22676+ VCI_KCBIT_TAGGED = 24,
22677+ VCI_KCBIT_PPTAG = 28,
2ba6f0dd 22678+
4bf69007 22679+ VCI_KCBIT_MORE = 31,
2ba6f0dd
AM
22680+};
22681+
2ba6f0dd 22682+
4bf69007
AM
22683+static inline uint32_t vci_kernel_config(void)
22684+{
22685+ return
22686+ (1 << VCI_KCBIT_NO_DYNAMIC) |
2ba6f0dd 22687+
4bf69007
AM
22688+ /* configured features */
22689+#ifdef CONFIG_VSERVER_PROC_SECURE
22690+ (1 << VCI_KCBIT_PROC_SECURE) |
22691+#endif
22692+#ifdef CONFIG_VSERVER_COWBL
22693+ (1 << VCI_KCBIT_COWBL) |
22694+ (1 << VCI_KCBIT_FULLCOWBL) |
22695+#endif
22696+ (1 << VCI_KCBIT_SPACES) |
22697+ (1 << VCI_KCBIT_NETV2) |
22698+#ifdef CONFIG_MEMCG
22699+ (1 << VCI_KCBIT_MEMCG) |
22700+#endif
22701+#ifdef CONFIG_MEMCG_SWAP
22702+ (1 << VCI_KCBIT_MEMCG_SWAP) |
22703+#endif
2ba6f0dd 22704+
4bf69007
AM
22705+ /* debug options */
22706+#ifdef CONFIG_VSERVER_DEBUG
22707+ (1 << VCI_KCBIT_DEBUG) |
22708+#endif
22709+#ifdef CONFIG_VSERVER_HISTORY
22710+ (1 << VCI_KCBIT_HISTORY) |
22711+#endif
2ba6f0dd 22712+
4bf69007
AM
22713+ /* inode context tagging */
22714+#if defined(CONFIG_TAGGING_NONE)
22715+ (0 << VCI_KCBIT_TAGGED) |
22716+#elif defined(CONFIG_TAGGING_UID16)
22717+ (1 << VCI_KCBIT_TAGGED) |
22718+#elif defined(CONFIG_TAGGING_GID16)
22719+ (2 << VCI_KCBIT_TAGGED) |
22720+#elif defined(CONFIG_TAGGING_ID24)
22721+ (3 << VCI_KCBIT_TAGGED) |
22722+#elif defined(CONFIG_TAGGING_INTERN)
22723+ (4 << VCI_KCBIT_TAGGED) |
22724+#elif defined(CONFIG_TAGGING_RUNTIME)
22725+ (5 << VCI_KCBIT_TAGGED) |
22726+#else
22727+ (7 << VCI_KCBIT_TAGGED) |
22728+#endif
22729+ (1 << VCI_KCBIT_PPTAG) |
22730+ 0;
22731+}
2ba6f0dd 22732+
8931d859
AM
22733diff -NurpP --minimal linux-4.4.161/mm/memcontrol.c linux-4.4.161-vs2.3.9.8/mm/memcontrol.c
22734--- linux-4.4.161/mm/memcontrol.c 2018-10-20 02:34:31.000000000 +0000
22735+++ linux-4.4.161-vs2.3.9.8/mm/memcontrol.c 2018-10-20 05:50:20.000000000 +0000
1d9ad342 22736@@ -2888,6 +2888,42 @@ static u64 mem_cgroup_read_u64(struct cg
927ca606 22737 }
4bf69007
AM
22738 }
22739
1d9ad342 22740+unsigned long mem_cgroup_mem_usage_pages(struct mem_cgroup *memcg)
4bf69007 22741+{
1d9ad342 22742+ return mem_cgroup_usage(memcg, false);
4bf69007 22743+}
2ba6f0dd 22744+
1d9ad342 22745+unsigned long mem_cgroup_mem_limit_pages(struct mem_cgroup *memcg)
4bf69007 22746+{
927ca606 22747+ return (u64)memcg->memory.limit;
4bf69007 22748+}
2ba6f0dd 22749+
1d9ad342 22750+unsigned long mem_cgroup_memsw_usage_pages(struct mem_cgroup *memcg)
4bf69007 22751+{
1d9ad342 22752+ return mem_cgroup_usage(memcg, true);
4bf69007 22753+}
2ba6f0dd 22754+
1d9ad342 22755+unsigned long mem_cgroup_memsw_limit_pages(struct mem_cgroup *memcg)
4bf69007 22756+{
927ca606 22757+ return (u64)memcg->memsw.limit;
4bf69007 22758+}
2ba6f0dd 22759+
1d9ad342
AM
22760+void dump_mem_cgroup(struct mem_cgroup *memcg)
22761+{
22762+ printk(KERN_INFO "memcg: %p/%d:\n"
22763+ "\tmemory:\t%lu/%lu %lu/%lu\n"
22764+ "\tmemsw:\t%lu/%lu %lu/%lu\n"
22765+ "\tkmem:\t%lu/%lu %lu/%lu\n",
22766+ memcg, memcg->id.id,
22767+ page_counter_read(&memcg->memory), memcg->memory.limit,
22768+ memcg->memory.watermark, memcg->memory.failcnt,
22769+ page_counter_read(&memcg->memsw), memcg->memsw.limit,
22770+ memcg->memsw.watermark, memcg->memsw.failcnt,
22771+ page_counter_read(&memcg->kmem), memcg->kmem.limit,
22772+ memcg->kmem.watermark, memcg->kmem.failcnt);
22773+}
2ba6f0dd 22774+
927ca606
AM
22775+
22776 #ifdef CONFIG_MEMCG_KMEM
22777 static int memcg_activate_kmem(struct mem_cgroup *memcg,
22778 unsigned long nr_pages)
8931d859
AM
22779diff -NurpP --minimal linux-4.4.161/mm/oom_kill.c linux-4.4.161-vs2.3.9.8/mm/oom_kill.c
22780--- linux-4.4.161/mm/oom_kill.c 2016-01-10 23:01:32.000000000 +0000
22781+++ linux-4.4.161-vs2.3.9.8/mm/oom_kill.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
22782@@ -35,6 +35,8 @@
22783 #include <linux/freezer.h>
22784 #include <linux/ftrace.h>
22785 #include <linux/ratelimit.h>
22786+#include <linux/reboot.h>
22787+#include <linux/vs_context.h>
22788
22789 #define CREATE_TRACE_POINTS
22790 #include <trace/events/oom.h>
927ca606 22791@@ -131,11 +133,18 @@ static inline bool is_sysrq_oom(struct o
4bf69007 22792 static bool oom_unkillable_task(struct task_struct *p,
927ca606 22793 struct mem_cgroup *memcg, const nodemask_t *nodemask)
4bf69007
AM
22794 {
22795- if (is_global_init(p))
22796+ unsigned xid = vx_current_xid();
2ba6f0dd 22797+
4bf69007
AM
22798+ /* skip the init task, global and per guest */
22799+ if (task_is_init(p))
22800 return true;
22801 if (p->flags & PF_KTHREAD)
22802 return true;
22803
22804+ /* skip other guest and host processes if oom in guest */
22805+ if (xid && vx_task_xid(p) != xid)
22806+ return true;
2ba6f0dd 22807+
4bf69007
AM
22808 /* When mem_cgroup_out_of_memory() and p is not member of the group */
22809 if (memcg && !task_in_mem_cgroup(p, memcg))
22810 return true;
927ca606
AM
22811@@ -534,8 +543,8 @@ void oom_kill_process(struct oom_control
22812 if (__ratelimit(&oom_rs))
22813 dump_header(oc, p, memcg);
4bf69007 22814
927ca606 22815- pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
4bf69007
AM
22816- message, task_pid_nr(p), p->comm, points);
22817+ pr_err("%s: Kill process %d:#%u (%s) score %d or sacrifice child\n",
22818+ message, task_pid_nr(p), p->xid, p->comm, points);
4bf69007
AM
22819
22820 /*
927ca606
AM
22821 * If any of p's children has a different mm and is eligible for kill,
22822@@ -585,8 +594,8 @@ void oom_kill_process(struct oom_control
22823 */
22824 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
22825 mark_oom_victim(victim);
4bf69007
AM
22826- pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
22827- task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
927ca606 22828+ pr_err("Killed process %d:%u (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
4bf69007
AM
22829+ task_pid_nr(victim), victim->xid, victim->comm, K(victim->mm->total_vm),
22830 K(get_mm_counter(victim->mm, MM_ANONPAGES)),
22831 K(get_mm_counter(victim->mm, MM_FILEPAGES)));
22832 task_unlock(victim);
927ca606 22833@@ -622,6 +631,8 @@ void oom_kill_process(struct oom_control
4bf69007 22834 }
927ca606 22835 #undef K
4bf69007
AM
22836
22837+long vs_oom_action(unsigned int);
2ba6f0dd 22838+
4bf69007 22839 /*
927ca606
AM
22840 * Determines whether the kernel must panic because of the panic_on_oom sysctl.
22841 */
22842@@ -722,7 +733,12 @@ bool out_of_memory(struct oom_control *o
4bf69007 22843 /* Found nothing?!?! Either we hang forever, or we panic. */
927ca606
AM
22844 if (!p && !is_sysrq_oom(oc)) {
22845 dump_header(oc, NULL, NULL);
4bf69007 22846- panic("Out of memory and no killable processes...\n");
2ba6f0dd 22847+
4bf69007
AM
22848+ /* avoid panic for guest OOM */
22849+ if (vx_current_xid())
22850+ vs_oom_action(LINUX_REBOOT_CMD_OOM);
22851+ else
22852+ panic("Out of memory and no killable processes...\n");
22853 }
927ca606
AM
22854 if (p && p != (void *)-1UL) {
22855 oom_kill_process(oc, p, points, totalpages, NULL,
8931d859
AM
22856diff -NurpP --minimal linux-4.4.161/mm/page_alloc.c linux-4.4.161-vs2.3.9.8/mm/page_alloc.c
22857--- linux-4.4.161/mm/page_alloc.c 2018-10-20 02:34:31.000000000 +0000
22858+++ linux-4.4.161-vs2.3.9.8/mm/page_alloc.c 2018-10-20 04:57:21.000000000 +0000
927ca606 22859@@ -62,6 +62,8 @@
b00e13aa 22860 #include <linux/sched/rt.h>
927ca606
AM
22861 #include <linux/page_owner.h>
22862 #include <linux/kthread.h>
4bf69007
AM
22863+#include <linux/vs_base.h>
22864+#include <linux/vs_limit.h>
22865
c2e5f7c8 22866 #include <asm/sections.h>
4bf69007 22867 #include <asm/tlbflush.h>
8931d859 22868@@ -3659,6 +3661,9 @@ void si_meminfo(struct sysinfo *val)
4bf69007
AM
22869 val->totalhigh = totalhigh_pages;
22870 val->freehigh = nr_free_highpages();
22871 val->mem_unit = PAGE_SIZE;
2ba6f0dd 22872+
4bf69007
AM
22873+ if (vx_flags(VXF_VIRT_MEM, 0))
22874+ vx_vsi_meminfo(val);
22875 }
22876
22877 EXPORT_SYMBOL(si_meminfo);
8931d859 22878@@ -3684,6 +3689,9 @@ void si_meminfo_node(struct sysinfo *val
4bf69007
AM
22879 val->freehigh = 0;
22880 #endif
22881 val->mem_unit = PAGE_SIZE;
2ba6f0dd 22882+
4bf69007
AM
22883+ if (vx_flags(VXF_VIRT_MEM, 0))
22884+ vx_vsi_meminfo(val);
22885 }
22886 #endif
22887
8931d859
AM
22888diff -NurpP --minimal linux-4.4.161/mm/pgtable-generic.c linux-4.4.161-vs2.3.9.8/mm/pgtable-generic.c
22889--- linux-4.4.161/mm/pgtable-generic.c 2018-10-20 02:34:31.000000000 +0000
22890+++ linux-4.4.161-vs2.3.9.8/mm/pgtable-generic.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
22891@@ -6,6 +6,8 @@
22892 * Copyright (C) 2010 Linus Torvalds
22893 */
22894
22895+#include <linux/mm.h>
2ba6f0dd 22896+
4bf69007
AM
22897 #include <linux/pagemap.h>
22898 #include <asm/tlb.h>
22899 #include <asm-generic/pgtable.h>
8931d859
AM
22900diff -NurpP --minimal linux-4.4.161/mm/shmem.c linux-4.4.161-vs2.3.9.8/mm/shmem.c
22901--- linux-4.4.161/mm/shmem.c 2018-10-20 02:34:31.000000000 +0000
22902+++ linux-4.4.161-vs2.3.9.8/mm/shmem.c 2018-10-20 05:50:20.000000000 +0000
22903@@ -2200,7 +2200,7 @@ static int shmem_statfs(struct dentry *d
4bf69007
AM
22904 {
22905 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
22906
22907- buf->f_type = TMPFS_MAGIC;
22908+ buf->f_type = TMPFS_SUPER_MAGIC;
22909 buf->f_bsize = PAGE_CACHE_SIZE;
22910 buf->f_namelen = NAME_MAX;
22911 if (sbinfo->max_blocks) {
8931d859 22912@@ -3046,7 +3046,7 @@ int shmem_fill_super(struct super_block
4bf69007
AM
22913 sb->s_maxbytes = MAX_LFS_FILESIZE;
22914 sb->s_blocksize = PAGE_CACHE_SIZE;
22915 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
22916- sb->s_magic = TMPFS_MAGIC;
22917+ sb->s_magic = TMPFS_SUPER_MAGIC;
22918 sb->s_op = &shmem_ops;
22919 sb->s_time_gran = 1;
22920 #ifdef CONFIG_TMPFS_XATTR
8931d859
AM
22921diff -NurpP --minimal linux-4.4.161/mm/slab.c linux-4.4.161-vs2.3.9.8/mm/slab.c
22922--- linux-4.4.161/mm/slab.c 2018-10-20 02:34:31.000000000 +0000
22923+++ linux-4.4.161-vs2.3.9.8/mm/slab.c 2018-10-20 04:57:21.000000000 +0000
927ca606 22924@@ -337,6 +337,8 @@ static void kmem_cache_node_init(struct
4bf69007
AM
22925 #define STATS_INC_FREEMISS(x) do { } while (0)
22926 #endif
22927
22928+#include "slab_vs.h"
2ba6f0dd 22929+
4bf69007
AM
22930 #if DEBUG
22931
22932 /*
927ca606 22933@@ -3183,6 +3185,7 @@ slab_alloc_node(struct kmem_cache *cache
4bf69007
AM
22934 /* ___cache_alloc_node can fall back to other nodes */
22935 ptr = ____cache_alloc_node(cachep, flags, nodeid);
22936 out:
22937+ vx_slab_alloc(cachep, flags);
22938 local_irq_restore(save_flags);
22939 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
22940 kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
927ca606 22941@@ -3371,6 +3374,7 @@ static inline void __cache_free(struct k
4bf69007
AM
22942 check_irq_off();
22943 kmemleak_free_recursive(objp, cachep->flags);
22944 objp = cache_free_debugcheck(cachep, objp, caller);
22945+ vx_slab_free(cachep);
22946
22947 kmemcheck_slab_free(cachep, objp, cachep->object_size);
22948
8931d859
AM
22949diff -NurpP --minimal linux-4.4.161/mm/slab_vs.h linux-4.4.161-vs2.3.9.8/mm/slab_vs.h
22950--- linux-4.4.161/mm/slab_vs.h 1970-01-01 00:00:00.000000000 +0000
22951+++ linux-4.4.161-vs2.3.9.8/mm/slab_vs.h 2018-10-20 04:57:21.000000000 +0000
4bf69007 22952@@ -0,0 +1,29 @@
2ba6f0dd 22953+
4bf69007 22954+#include <linux/vserver/context.h>
2ba6f0dd 22955+
4bf69007 22956+#include <linux/vs_context.h>
2ba6f0dd 22957+
4bf69007
AM
22958+static inline
22959+void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
22960+{
22961+ int what = gfp_zone(cachep->allocflags);
22962+ struct vx_info *vxi = current_vx_info();
2ba6f0dd 22963+
4bf69007
AM
22964+ if (!vxi)
22965+ return;
2ba6f0dd 22966+
4bf69007
AM
22967+ atomic_add(cachep->size, &vxi->cacct.slab[what]);
22968+}
2ba6f0dd 22969+
4bf69007
AM
22970+static inline
22971+void vx_slab_free(struct kmem_cache *cachep)
22972+{
22973+ int what = gfp_zone(cachep->allocflags);
22974+ struct vx_info *vxi = current_vx_info();
2ba6f0dd 22975+
4bf69007
AM
22976+ if (!vxi)
22977+ return;
2ba6f0dd 22978+
4bf69007
AM
22979+ atomic_sub(cachep->size, &vxi->cacct.slab[what]);
22980+}
2ba6f0dd 22981+
8931d859
AM
22982diff -NurpP --minimal linux-4.4.161/mm/swapfile.c linux-4.4.161-vs2.3.9.8/mm/swapfile.c
22983--- linux-4.4.161/mm/swapfile.c 2018-10-20 02:34:31.000000000 +0000
22984+++ linux-4.4.161-vs2.3.9.8/mm/swapfile.c 2018-10-20 05:50:20.000000000 +0000
4bf69007
AM
22985@@ -39,6 +39,7 @@
22986 #include <asm/tlbflush.h>
22987 #include <linux/swapops.h>
927ca606 22988 #include <linux/swap_cgroup.h>
4bf69007
AM
22989+#include <linux/vs_base.h>
22990
22991 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
22992 unsigned char);
927ca606 22993@@ -2070,6 +2071,16 @@ static int swap_show(struct seq_file *sw
4bf69007
AM
22994
22995 if (si == SEQ_START_TOKEN) {
22996 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
22997+ if (vx_flags(VXF_VIRT_MEM, 0)) {
927ca606 22998+ struct sysinfo si = { 0 };
2ba6f0dd 22999+
4bf69007
AM
23000+ vx_vsi_swapinfo(&si);
23001+ if (si.totalswap < (1 << 10))
23002+ return 0;
23003+ seq_printf(swap, "%s\t\t\t\t\t%s\t%lu\t%lu\t%d\n",
23004+ "hdv0", "partition", si.totalswap >> 10,
23005+ (si.totalswap - si.freeswap) >> 10, -1);
23006+ }
23007 return 0;
23008 }
23009
8931d859 23010@@ -2627,6 +2638,8 @@ void si_swapinfo(struct sysinfo *val)
b00e13aa 23011 val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
4bf69007
AM
23012 val->totalswap = total_swap_pages + nr_to_be_unused;
23013 spin_unlock(&swap_lock);
23014+ if (vx_flags(VXF_VIRT_MEM, 0))
23015+ vx_vsi_swapinfo(val);
23016 }
23017
23018 /*
8931d859
AM
23019diff -NurpP --minimal linux-4.4.161/net/bridge/br_multicast.c linux-4.4.161-vs2.3.9.8/net/bridge/br_multicast.c
23020--- linux-4.4.161/net/bridge/br_multicast.c 2018-10-20 02:34:31.000000000 +0000
23021+++ linux-4.4.161-vs2.3.9.8/net/bridge/br_multicast.c 2018-10-20 04:57:21.000000000 +0000
927ca606 23022@@ -462,7 +462,7 @@ static struct sk_buff *br_ip6_multicast_
4bf69007
AM
23023 ip6h->hop_limit = 1;
23024 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
23025 if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
23026- &ip6h->saddr)) {
23027+ &ip6h->saddr, NULL)) {
23028 kfree_skb(skb);
927ca606 23029 br->has_ipv6_addr = 0;
4bf69007 23030 return NULL;
8931d859
AM
23031diff -NurpP --minimal linux-4.4.161/net/core/dev.c linux-4.4.161-vs2.3.9.8/net/core/dev.c
23032--- linux-4.4.161/net/core/dev.c 2018-10-20 02:34:31.000000000 +0000
23033+++ linux-4.4.161-vs2.3.9.8/net/core/dev.c 2018-10-20 05:50:20.000000000 +0000
927ca606 23034@@ -124,6 +124,7 @@
4bf69007
AM
23035 #include <linux/in.h>
23036 #include <linux/jhash.h>
23037 #include <linux/random.h>
23038+#include <linux/vs_inet.h>
23039 #include <trace/events/napi.h>
23040 #include <trace/events/net.h>
23041 #include <trace/events/skb.h>
927ca606 23042@@ -726,7 +727,8 @@ struct net_device *__dev_get_by_name(str
4bf69007
AM
23043 struct hlist_head *head = dev_name_hash(net, name);
23044
b00e13aa 23045 hlist_for_each_entry(dev, head, name_hlist)
4bf69007
AM
23046- if (!strncmp(dev->name, name, IFNAMSIZ))
23047+ if (!strncmp(dev->name, name, IFNAMSIZ) &&
23048+ nx_dev_visible(current_nx_info(), dev))
23049 return dev;
23050
23051 return NULL;
927ca606 23052@@ -751,7 +753,8 @@ struct net_device *dev_get_by_name_rcu(s
4bf69007
AM
23053 struct hlist_head *head = dev_name_hash(net, name);
23054
b00e13aa 23055 hlist_for_each_entry_rcu(dev, head, name_hlist)
4bf69007
AM
23056- if (!strncmp(dev->name, name, IFNAMSIZ))
23057+ if (!strncmp(dev->name, name, IFNAMSIZ) &&
23058+ nx_dev_visible(current_nx_info(), dev))
23059 return dev;
23060
23061 return NULL;
927ca606 23062@@ -801,7 +804,8 @@ struct net_device *__dev_get_by_index(st
4bf69007
AM
23063 struct hlist_head *head = dev_index_hash(net, ifindex);
23064
b00e13aa 23065 hlist_for_each_entry(dev, head, index_hlist)
4bf69007
AM
23066- if (dev->ifindex == ifindex)
23067+ if ((dev->ifindex == ifindex) &&
23068+ nx_dev_visible(current_nx_info(), dev))
23069 return dev;
23070
23071 return NULL;
927ca606 23072@@ -819,7 +823,7 @@ EXPORT_SYMBOL(__dev_get_by_index);
4bf69007
AM
23073 * about locking. The caller must hold RCU lock.
23074 */
23075
23076-struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
23077+struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex)
23078 {
4bf69007 23079 struct net_device *dev;
b00e13aa 23080 struct hlist_head *head = dev_index_hash(net, ifindex);
927ca606 23081@@ -830,6 +834,16 @@ struct net_device *dev_get_by_index_rcu(
4bf69007
AM
23082
23083 return NULL;
23084 }
23085+EXPORT_SYMBOL(dev_get_by_index_real_rcu);
2ba6f0dd 23086+
4bf69007
AM
23087+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
23088+{
23089+ struct net_device *dev = dev_get_by_index_real_rcu(net, ifindex);
2ba6f0dd 23090+
4bf69007
AM
23091+ if (nx_dev_visible(current_nx_info(), dev))
23092+ return dev;
23093+ return NULL;
23094+}
23095 EXPORT_SYMBOL(dev_get_by_index_rcu);
23096
23097
927ca606 23098@@ -912,7 +926,8 @@ struct net_device *dev_getbyhwaddr_rcu(s
4bf69007
AM
23099
23100 for_each_netdev_rcu(net, dev)
23101 if (dev->type == type &&
23102- !memcmp(dev->dev_addr, ha, dev->addr_len))
23103+ !memcmp(dev->dev_addr, ha, dev->addr_len) &&
23104+ nx_dev_visible(current_nx_info(), dev))
23105 return dev;
23106
23107 return NULL;
927ca606 23108@@ -924,9 +939,11 @@ struct net_device *__dev_getfirstbyhwtyp
4bf69007
AM
23109 struct net_device *dev;
23110
23111 ASSERT_RTNL();
23112- for_each_netdev(net, dev)
23113- if (dev->type == type)
23114+ for_each_netdev(net, dev) {
23115+ if ((dev->type == type) &&
23116+ nx_dev_visible(current_nx_info(), dev))
23117 return dev;
23118+ }
23119
23120 return NULL;
23121 }
927ca606 23122@@ -938,7 +955,8 @@ struct net_device *dev_getfirstbyhwtype(
b00e13aa
AM
23123
23124 rcu_read_lock();
23125 for_each_netdev_rcu(net, dev)
23126- if (dev->type == type) {
23127+ if ((dev->type == type) &&
23128+ nx_dev_visible(current_nx_info(), dev)) {
23129 dev_hold(dev);
23130 ret = dev;
23131 break;
927ca606 23132@@ -968,7 +986,8 @@ struct net_device *__dev_get_by_flags(st
b00e13aa
AM
23133
23134 ret = NULL;
bb20add7 23135 for_each_netdev(net, dev) {
b00e13aa
AM
23136- if (((dev->flags ^ if_flags) & mask) == 0) {
23137+ if ((((dev->flags ^ if_flags) & mask) == 0) &&
23138+ nx_dev_visible(current_nx_info(), dev)) {
23139 ret = dev;
23140 break;
23141 }
927ca606 23142@@ -1046,6 +1065,8 @@ static int __dev_alloc_name(struct net *
4bf69007
AM
23143 continue;
23144 if (i < 0 || i >= max_netdevices)
23145 continue;
23146+ if (!nx_dev_visible(current_nx_info(), d))
23147+ continue;
23148
23149 /* avoid cases where sscanf is not exact inverse of printf */
23150 snprintf(buf, IFNAMSIZ, name, i);
8931d859
AM
23151diff -NurpP --minimal linux-4.4.161/net/core/net-procfs.c linux-4.4.161-vs2.3.9.8/net/core/net-procfs.c
23152--- linux-4.4.161/net/core/net-procfs.c 2016-01-10 23:01:32.000000000 +0000
23153+++ linux-4.4.161-vs2.3.9.8/net/core/net-procfs.c 2018-10-20 04:57:21.000000000 +0000
8ce283e1
AM
23154@@ -1,6 +1,7 @@
23155 #include <linux/netdevice.h>
23156 #include <linux/proc_fs.h>
23157 #include <linux/seq_file.h>
23158+#include <linux/vs_inet.h>
23159 #include <net/wext.h>
23160
23161 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
23162@@ -77,8 +78,13 @@ static void dev_seq_stop(struct seq_file
23163 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
23164 {
23165 struct rtnl_link_stats64 temp;
23166- const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
23167+ const struct rtnl_link_stats64 *stats;
23168+
23169+ /* device visible inside network context? */
23170+ if (!nx_dev_visible(current_nx_info(), dev))
23171+ return;
23172
23173+ stats = dev_get_stats(dev, &temp);
23174 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
23175 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
23176 dev->name, stats->rx_bytes, stats->rx_packets,
8931d859
AM
23177diff -NurpP --minimal linux-4.4.161/net/core/rtnetlink.c linux-4.4.161-vs2.3.9.8/net/core/rtnetlink.c
23178--- linux-4.4.161/net/core/rtnetlink.c 2018-10-20 02:34:31.000000000 +0000
23179+++ linux-4.4.161-vs2.3.9.8/net/core/rtnetlink.c 2018-10-20 05:50:20.000000000 +0000
927ca606
AM
23180@@ -1456,6 +1456,8 @@ static int rtnl_dump_ifinfo(struct sk_bu
23181 hlist_for_each_entry(dev, head, index_hlist) {
4bf69007
AM
23182 if (idx < s_idx)
23183 goto cont;
23184+ if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
23185+ continue;
7ed51edd
JR
23186 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
23187 NETLINK_CB(cb->skb).portid,
23188 cb->nlh->nlmsg_seq, 0,
8931d859 23189@@ -2562,6 +2564,9 @@ void rtmsg_ifinfo(int type, struct net_d
927ca606
AM
23190 {
23191 struct sk_buff *skb;
4bf69007
AM
23192
23193+ if (!nx_dev_visible(current_nx_info(), dev))
23194+ return;
2ba6f0dd 23195+
927ca606
AM
23196 if (dev->reg_state != NETREG_REGISTERED)
23197 return;
23198
8931d859
AM
23199diff -NurpP --minimal linux-4.4.161/net/core/sock.c linux-4.4.161-vs2.3.9.8/net/core/sock.c
23200--- linux-4.4.161/net/core/sock.c 2018-10-20 02:34:31.000000000 +0000
23201+++ linux-4.4.161-vs2.3.9.8/net/core/sock.c 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
23202@@ -134,6 +134,10 @@
23203 #include <linux/sock_diag.h>
4bf69007
AM
23204
23205 #include <linux/filter.h>
23206+#include <linux/vs_socket.h>
23207+#include <linux/vs_limit.h>
23208+#include <linux/vs_context.h>
23209+#include <linux/vs_network.h>
23210
23211 #include <trace/events/sock.h>
23212
927ca606 23213@@ -1363,6 +1367,8 @@ static struct sock *sk_prot_alloc(struct
4bf69007
AM
23214 goto out_free_sec;
23215 sk_tx_queue_clear(sk);
23216 }
23217+ sock_vx_init(sk);
23218+ sock_nx_init(sk);
23219
23220 return sk;
23221
927ca606 23222@@ -1469,6 +1475,11 @@ void sk_destruct(struct sock *sk)
4bf69007 23223 put_pid(sk->sk_peer_pid);
927ca606
AM
23224 if (likely(sk->sk_net_refcnt))
23225 put_net(sock_net(sk));
4bf69007
AM
23226+ vx_sock_dec(sk);
23227+ clr_vx_info(&sk->sk_vx_info);
23228+ sk->sk_xid = -1;
23229+ clr_nx_info(&sk->sk_nx_info);
23230+ sk->sk_nid = -1;
23231 sk_prot_free(sk->sk_prot_creator, sk);
23232 }
23233
927ca606 23234@@ -1521,6 +1532,8 @@ struct sock *sk_clone_lock(const struct
4bf69007 23235 /* SANITY */
927ca606
AM
23236 if (likely(newsk->sk_net_refcnt))
23237 get_net(sock_net(newsk));
4bf69007
AM
23238+ sock_vx_init(newsk);
23239+ sock_nx_init(newsk);
23240 sk_node_init(&newsk->sk_node);
23241 sock_lock_init(newsk);
23242 bh_lock_sock(newsk);
927ca606 23243@@ -1586,6 +1599,12 @@ struct sock *sk_clone_lock(const struct
4bf69007
AM
23244 smp_wmb();
23245 atomic_set(&newsk->sk_refcnt, 2);
23246
23247+ set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
23248+ newsk->sk_xid = sk->sk_xid;
23249+ vx_sock_inc(newsk);
23250+ set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
23251+ newsk->sk_nid = sk->sk_nid;
2ba6f0dd 23252+
4bf69007
AM
23253 /*
23254 * Increment the counter in the same struct proto as the master
23255 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
927ca606 23256@@ -2424,6 +2443,12 @@ void sock_init_data(struct socket *sock,
4bf69007
AM
23257
23258 sk->sk_stamp = ktime_set(-1L, 0);
23259
23260+ set_vx_info(&sk->sk_vx_info, current_vx_info());
23261+ sk->sk_xid = vx_current_xid();
23262+ vx_sock_inc(sk);
23263+ set_nx_info(&sk->sk_nx_info, current_nx_info());
23264+ sk->sk_nid = nx_current_nid();
2ba6f0dd 23265+
c2e5f7c8
JR
23266 #ifdef CONFIG_NET_RX_BUSY_POLL
23267 sk->sk_napi_id = 0;
23268 sk->sk_ll_usec = sysctl_net_busy_read;
8931d859
AM
23269diff -NurpP --minimal linux-4.4.161/net/ipv4/af_inet.c linux-4.4.161-vs2.3.9.8/net/ipv4/af_inet.c
23270--- linux-4.4.161/net/ipv4/af_inet.c 2018-10-20 02:34:31.000000000 +0000
23271+++ linux-4.4.161-vs2.3.9.8/net/ipv4/af_inet.c 2018-10-20 05:50:20.000000000 +0000
927ca606 23272@@ -308,10 +308,15 @@ lookup_protocol:
4bf69007
AM
23273 }
23274
23275 err = -EPERM;
23276+ if ((protocol == IPPROTO_ICMP) &&
23277+ nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
23278+ goto override;
927ca606 23279+
b00e13aa
AM
23280 if (sock->type == SOCK_RAW && !kern &&
23281 !ns_capable(net->user_ns, CAP_NET_RAW))
4bf69007 23282 goto out_rcu_unlock;
927ca606 23283
a4a22af8
AM
23284+override:
23285 sock->ops = answer->ops;
23286 answer_prot = answer->prot;
bb20add7 23287 answer_flags = answer->flags;
927ca606 23288@@ -425,6 +430,7 @@ int inet_bind(struct socket *sock, struc
4bf69007
AM
23289 struct sock *sk = sock->sk;
23290 struct inet_sock *inet = inet_sk(sk);
b00e13aa 23291 struct net *net = sock_net(sk);
927ca606 23292+ struct nx_v4_sock_addr nsa;
4bf69007
AM
23293 unsigned short snum;
23294 int chk_addr_ret;
927ca606
AM
23295 u32 tb_id = RT_TABLE_LOCAL;
23296@@ -450,7 +456,11 @@ int inet_bind(struct socket *sock, struc
4bf69007
AM
23297 }
23298
927ca606
AM
23299 tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
23300- chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
4bf69007
AM
23301+ err = v4_map_sock_addr(inet, addr, &nsa);
23302+ if (err)
23303+ goto out;
2ba6f0dd 23304+
927ca606 23305+ chk_addr_ret = inet_addr_type_table(net, nsa.saddr, tb_id);
4bf69007
AM
23306
23307 /* Not specified by any standard per-se, however it breaks too
23308 * many applications when removed. It is unfortunate since
927ca606 23309@@ -462,7 +472,7 @@ int inet_bind(struct socket *sock, struc
4bf69007 23310 err = -EADDRNOTAVAIL;
bb20add7 23311 if (!net->ipv4.sysctl_ip_nonlocal_bind &&
4bf69007
AM
23312 !(inet->freebind || inet->transparent) &&
23313- addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
23314+ nsa.saddr != htonl(INADDR_ANY) &&
23315 chk_addr_ret != RTN_LOCAL &&
23316 chk_addr_ret != RTN_MULTICAST &&
23317 chk_addr_ret != RTN_BROADCAST)
927ca606 23318@@ -488,7 +498,7 @@ int inet_bind(struct socket *sock, struc
4bf69007
AM
23319 if (sk->sk_state != TCP_CLOSE || inet->inet_num)
23320 goto out_release_sock;
23321
23322- inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
23323+ v4_set_sock_addr(inet, &nsa);
23324 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
23325 inet->inet_saddr = 0; /* Use device */
23326
927ca606 23327@@ -708,11 +718,13 @@ int inet_getname(struct socket *sock, st
4bf69007
AM
23328 peer == 1))
23329 return -ENOTCONN;
23330 sin->sin_port = inet->inet_dport;
23331- sin->sin_addr.s_addr = inet->inet_daddr;
23332+ sin->sin_addr.s_addr =
23333+ nx_map_sock_lback(sk->sk_nx_info, inet->inet_daddr);
23334 } else {
23335 __be32 addr = inet->inet_rcv_saddr;
23336 if (!addr)
23337 addr = inet->inet_saddr;
23338+ addr = nx_map_sock_lback(sk->sk_nx_info, addr);
23339 sin->sin_port = inet->inet_sport;
23340 sin->sin_addr.s_addr = addr;
23341 }
927ca606
AM
23342@@ -896,6 +908,7 @@ static int inet_compat_ioctl(struct sock
23343 return err;
23344 }
23345 #endif
23346+#include <linux/vs_limit.h>
23347
23348 const struct proto_ops inet_stream_ops = {
23349 .family = PF_INET,
8931d859
AM
23350diff -NurpP --minimal linux-4.4.161/net/ipv4/arp.c linux-4.4.161-vs2.3.9.8/net/ipv4/arp.c
23351--- linux-4.4.161/net/ipv4/arp.c 2018-10-20 02:34:31.000000000 +0000
23352+++ linux-4.4.161-vs2.3.9.8/net/ipv4/arp.c 2018-10-20 04:57:21.000000000 +0000
23353@@ -1307,6 +1307,7 @@ static void arp_format_neigh_entry(struc
4bf69007
AM
23354 struct net_device *dev = n->dev;
23355 int hatype = dev->type;
23356
23357+ /* FIXME: check for network context */
23358 read_lock(&n->lock);
23359 /* Convert hardware address to XX:XX:XX:XX ... form. */
23360 #if IS_ENABLED(CONFIG_AX25)
8931d859 23361@@ -1338,6 +1339,7 @@ static void arp_format_pneigh_entry(stru
4bf69007
AM
23362 int hatype = dev ? dev->type : 0;
23363 char tbuf[16];
23364
23365+ /* FIXME: check for network context */
23366 sprintf(tbuf, "%pI4", n->key);
23367 seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
23368 tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
8931d859
AM
23369diff -NurpP --minimal linux-4.4.161/net/ipv4/devinet.c linux-4.4.161-vs2.3.9.8/net/ipv4/devinet.c
23370--- linux-4.4.161/net/ipv4/devinet.c 2018-10-20 02:34:31.000000000 +0000
23371+++ linux-4.4.161-vs2.3.9.8/net/ipv4/devinet.c 2018-10-20 04:57:21.000000000 +0000
927ca606 23372@@ -538,6 +538,7 @@ struct in_device *inetdev_by_index(struc
4bf69007
AM
23373 }
23374 EXPORT_SYMBOL(inetdev_by_index);
23375
2ba6f0dd 23376+
4bf69007
AM
23377 /* Called only from RTNL semaphored context. No locks. */
23378
23379 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
927ca606 23380@@ -992,6 +993,8 @@ int devinet_ioctl(struct net *net, unsig
4bf69007
AM
23381
23382 in_dev = __in_dev_get_rtnl(dev);
23383 if (in_dev) {
23384+ struct nx_info *nxi = current_nx_info();
2ba6f0dd 23385+
4bf69007
AM
23386 if (tryaddrmatch) {
23387 /* Matthias Andree */
23388 /* compare label and address (4.4BSD style) */
927ca606 23389@@ -1000,6 +1003,8 @@ int devinet_ioctl(struct net *net, unsig
4bf69007
AM
23390 This is checked above. */
23391 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
23392 ifap = &ifa->ifa_next) {
23393+ if (!nx_v4_ifa_visible(nxi, ifa))
23394+ continue;
23395 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
23396 sin_orig.sin_addr.s_addr ==
23397 ifa->ifa_local) {
927ca606 23398@@ -1012,9 +1017,12 @@ int devinet_ioctl(struct net *net, unsig
4bf69007
AM
23399 comparing just the label */
23400 if (!ifa) {
23401 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
23402- ifap = &ifa->ifa_next)
23403+ ifap = &ifa->ifa_next) {
23404+ if (!nx_v4_ifa_visible(nxi, ifa))
23405+ continue;
23406 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
23407 break;
23408+ }
23409 }
23410 }
23411
927ca606 23412@@ -1168,6 +1176,8 @@ static int inet_gifconf(struct net_devic
4bf69007
AM
23413 goto out;
23414
23415 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
23416+ if (!nx_v4_ifa_visible(current_nx_info(), ifa))
23417+ continue;
23418 if (!buf) {
23419 done += sizeof(ifr);
23420 continue;
927ca606 23421@@ -1573,6 +1583,7 @@ static int inet_dump_ifaddr(struct sk_bu
4bf69007
AM
23422 struct net_device *dev;
23423 struct in_device *in_dev;
23424 struct in_ifaddr *ifa;
23425+ struct sock *sk = skb->sk;
23426 struct hlist_head *head;
4bf69007 23427
b00e13aa 23428 s_h = cb->args[0];
927ca606 23429@@ -1596,6 +1607,8 @@ static int inet_dump_ifaddr(struct sk_bu
4bf69007
AM
23430
23431 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
23432 ifa = ifa->ifa_next, ip_idx++) {
23433+ if (sk && !nx_v4_ifa_visible(sk->sk_nx_info, ifa))
23434+ continue;
23435 if (ip_idx < s_ip_idx)
23436 continue;
23437 if (inet_fill_ifaddr(skb, ifa,
8931d859
AM
23438diff -NurpP --minimal linux-4.4.161/net/ipv4/fib_trie.c linux-4.4.161-vs2.3.9.8/net/ipv4/fib_trie.c
23439--- linux-4.4.161/net/ipv4/fib_trie.c 2018-10-20 02:34:31.000000000 +0000
23440+++ linux-4.4.161-vs2.3.9.8/net/ipv4/fib_trie.c 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
23441@@ -2591,6 +2591,7 @@ static int fib_route_seq_show(struct seq
23442
23443 seq_setwidth(seq, 127);
23444
23445+ /* FIXME: check for network context? */
23446 if (fi)
23447 seq_printf(seq,
23448 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
8931d859
AM
23449diff -NurpP --minimal linux-4.4.161/net/ipv4/inet_connection_sock.c linux-4.4.161-vs2.3.9.8/net/ipv4/inet_connection_sock.c
23450--- linux-4.4.161/net/ipv4/inet_connection_sock.c 2018-10-20 02:34:31.000000000 +0000
23451+++ linux-4.4.161-vs2.3.9.8/net/ipv4/inet_connection_sock.c 2018-10-20 04:57:21.000000000 +0000
927ca606 23452@@ -43,6 +43,37 @@ void inet_get_local_port_range(struct ne
4bf69007
AM
23453 }
23454 EXPORT_SYMBOL(inet_get_local_port_range);
23455
23456+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
23457+{
c2e5f7c8
JR
23458+ __be32 sk1_rcv_saddr = sk1->sk_rcv_saddr,
23459+ sk2_rcv_saddr = sk2->sk_rcv_saddr;
2ba6f0dd 23460+
4bf69007
AM
23461+ if (inet_v6_ipv6only(sk2))
23462+ return 0;
2ba6f0dd 23463+
4bf69007
AM
23464+ if (sk1_rcv_saddr &&
23465+ sk2_rcv_saddr &&
23466+ sk1_rcv_saddr == sk2_rcv_saddr)
23467+ return 1;
2ba6f0dd 23468+
4bf69007
AM
23469+ if (sk1_rcv_saddr &&
23470+ !sk2_rcv_saddr &&
23471+ v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, NXA_MASK_BIND))
23472+ return 1;
2ba6f0dd 23473+
4bf69007
AM
23474+ if (sk2_rcv_saddr &&
23475+ !sk1_rcv_saddr &&
23476+ v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, NXA_MASK_BIND))
23477+ return 1;
2ba6f0dd 23478+
4bf69007
AM
23479+ if (!sk1_rcv_saddr &&
23480+ !sk2_rcv_saddr &&
23481+ nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info))
23482+ return 1;
2ba6f0dd 23483+
4bf69007
AM
23484+ return 0;
23485+}
2ba6f0dd 23486+
4bf69007
AM
23487 int inet_csk_bind_conflict(const struct sock *sk,
23488 const struct inet_bind_bucket *tb, bool relax)
23489 {
927ca606 23490@@ -70,15 +101,13 @@ int inet_csk_bind_conflict(const struct
b00e13aa
AM
23491 (sk2->sk_state != TCP_TIME_WAIT &&
23492 !uid_eq(uid, sock_i_uid(sk2))))) {
c2e5f7c8
JR
23493
23494- if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
23495- sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
4bf69007
AM
23496+ if (ipv4_rcv_saddr_equal(sk, sk2))
23497 break;
23498 }
23499 if (!relax && reuse && sk2->sk_reuse &&
b00e13aa 23500 sk2->sk_state != TCP_LISTEN) {
c2e5f7c8
JR
23501
23502- if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
23503- sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
b00e13aa
AM
23504+ if (ipv4_rcv_saddr_equal(sk, sk2))
23505 break;
23506 }
23507 }
8931d859
AM
23508diff -NurpP --minimal linux-4.4.161/net/ipv4/inet_diag.c linux-4.4.161-vs2.3.9.8/net/ipv4/inet_diag.c
23509--- linux-4.4.161/net/ipv4/inet_diag.c 2016-01-10 23:01:32.000000000 +0000
23510+++ linux-4.4.161-vs2.3.9.8/net/ipv4/inet_diag.c 2018-10-20 06:34:05.000000000 +0000
4bf69007
AM
23511@@ -31,6 +31,8 @@
23512
23513 #include <linux/inet.h>
23514 #include <linux/stddef.h>
23515+#include <linux/vs_network.h>
23516+#include <linux/vs_inet.h>
23517
23518 #include <linux/inet_diag.h>
23519 #include <linux/sock_diag.h>
8931d859
AM
23520@@ -85,8 +87,8 @@ static void inet_diag_msg_common_fill(st
23521 memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
23522 memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
23523
23524- r->id.idiag_src[0] = sk->sk_rcv_saddr;
23525- r->id.idiag_dst[0] = sk->sk_daddr;
23526+ r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, sk->sk_rcv_saddr);
23527+ r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, sk->sk_daddr);
23528 }
23529 }
23530
23531@@ -761,6 +763,9 @@ void inet_diag_dump_icsk(struct inet_has
4bf69007
AM
23532 if (!net_eq(sock_net(sk), net))
23533 continue;
23534
23535+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23536+ continue;
8931d859 23537+
4bf69007
AM
23538 if (num < s_num) {
23539 num++;
23540 continue;
8931d859 23541@@ -822,6 +827,8 @@ skip_listen_ht:
4bf69007
AM
23542
23543 if (!net_eq(sock_net(sk), net))
23544 continue;
23545+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23546+ continue;
23547 if (num < s_num)
23548 goto next_normal;
c2e5f7c8 23549 state = (sk->sk_state == TCP_TIME_WAIT) ?
8931d859
AM
23550diff -NurpP --minimal linux-4.4.161/net/ipv4/inet_hashtables.c linux-4.4.161-vs2.3.9.8/net/ipv4/inet_hashtables.c
23551--- linux-4.4.161/net/ipv4/inet_hashtables.c 2016-01-10 23:01:32.000000000 +0000
23552+++ linux-4.4.161-vs2.3.9.8/net/ipv4/inet_hashtables.c 2018-10-20 04:57:21.000000000 +0000
927ca606 23553@@ -23,6 +23,7 @@
4bf69007
AM
23554 #include <net/inet_connection_sock.h>
23555 #include <net/inet_hashtables.h>
23556 #include <net/secure_seq.h>
23557+#include <net/route.h>
23558 #include <net/ip.h>
23559
927ca606
AM
23560 static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
23561@@ -183,6 +184,11 @@ static inline int compute_score(struct s
4bf69007
AM
23562 if (rcv_saddr != daddr)
23563 return -1;
b00e13aa 23564 score += 4;
4bf69007
AM
23565+ } else {
23566+ /* block non nx_info ips */
23567+ if (!v4_addr_in_nx_info(sk->sk_nx_info,
23568+ daddr, NXA_MASK_BIND))
23569+ return -1;
23570 }
23571 if (sk->sk_bound_dev_if) {
23572 if (sk->sk_bound_dev_if != dif)
927ca606 23573@@ -202,7 +208,6 @@ static inline int compute_score(struct s
4bf69007
AM
23574 * wildcarded during the search since they can never be otherwise.
23575 */
23576
23577-
23578 struct sock *__inet_lookup_listener(struct net *net,
23579 struct inet_hashinfo *hashinfo,
b00e13aa 23580 const __be32 saddr, __be16 sport,
927ca606 23581@@ -238,6 +243,7 @@ begin:
b00e13aa 23582 phash = next_pseudo_random32(phash);
4bf69007
AM
23583 }
23584 }
2ba6f0dd 23585+
4bf69007
AM
23586 /*
23587 * if the nulls value we got at the end of this lookup is
23588 * not the expected one, we must restart lookup.
8931d859
AM
23589diff -NurpP --minimal linux-4.4.161/net/ipv4/netfilter.c linux-4.4.161-vs2.3.9.8/net/ipv4/netfilter.c
23590--- linux-4.4.161/net/ipv4/netfilter.c 2018-10-20 02:34:31.000000000 +0000
23591+++ linux-4.4.161-vs2.3.9.8/net/ipv4/netfilter.c 2018-10-20 04:57:21.000000000 +0000
09be7631 23592@@ -11,7 +11,7 @@
4bf69007
AM
23593 #include <linux/skbuff.h>
23594 #include <linux/gfp.h>
23595 #include <linux/export.h>
23596-#include <net/route.h>
23597+// #include <net/route.h>
23598 #include <net/xfrm.h>
23599 #include <net/ip.h>
23600 #include <net/netfilter/nf_queue.h>
8931d859
AM
23601diff -NurpP --minimal linux-4.4.161/net/ipv4/raw.c linux-4.4.161-vs2.3.9.8/net/ipv4/raw.c
23602--- linux-4.4.161/net/ipv4/raw.c 2018-10-20 02:34:31.000000000 +0000
23603+++ linux-4.4.161-vs2.3.9.8/net/ipv4/raw.c 2018-10-20 04:57:21.000000000 +0000
927ca606 23604@@ -126,7 +126,7 @@ static struct sock *__raw_v4_lookup(stru
4bf69007
AM
23605
23606 if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
23607 !(inet->inet_daddr && inet->inet_daddr != raddr) &&
23608- !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
23609+ v4_sock_addr_match(sk->sk_nx_info, inet, laddr) &&
23610 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
23611 goto found; /* gotcha */
23612 }
927ca606
AM
23613@@ -416,6 +416,12 @@ static int raw_send_hdrinc(struct sock *
23614 skb_transport_header(skb))->type);
23615 }
4bf69007
AM
23616
23617+ err = -EPERM;
23618+ if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
23619+ sk->sk_nx_info &&
23620+ !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
23621+ goto error_free;
2ba6f0dd 23622+
927ca606
AM
23623 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
23624 net, sk, skb, NULL, rt->dst.dev,
23625 dst_output);
23626@@ -626,6 +632,16 @@ static int raw_sendmsg(struct sock *sk,
4bf69007
AM
23627 goto done;
23628 }
23629
23630+ if (sk->sk_nx_info) {
23631+ rt = ip_v4_find_src(sock_net(sk), sk->sk_nx_info, &fl4);
23632+ if (IS_ERR(rt)) {
23633+ err = PTR_ERR(rt);
23634+ rt = NULL;
23635+ goto done;
23636+ }
23637+ ip_rt_put(rt);
23638+ }
2ba6f0dd 23639+
4bf69007 23640 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
927ca606 23641 rt = ip_route_output_flow(net, &fl4, sk);
4bf69007 23642 if (IS_ERR(rt)) {
927ca606 23643@@ -704,17 +720,19 @@ static int raw_bind(struct sock *sk, str
4bf69007
AM
23644 {
23645 struct inet_sock *inet = inet_sk(sk);
23646 struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
23647+ struct nx_v4_sock_addr nsa = { 0 };
23648 int ret = -EINVAL;
23649 int chk_addr_ret;
23650
23651 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
23652 goto out;
23653- chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
23654+ v4_map_sock_addr(inet, addr, &nsa);
23655+ chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
23656 ret = -EADDRNOTAVAIL;
23657- if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
23658+ if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
23659 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
23660 goto out;
23661- inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
23662+ v4_set_sock_addr(inet, &nsa);
23663 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
23664 inet->inet_saddr = 0; /* Use device */
23665 sk_dst_reset(sk);
927ca606 23666@@ -763,7 +781,8 @@ static int raw_recvmsg(struct sock *sk,
4bf69007
AM
23667 /* Copy the address. */
23668 if (sin) {
23669 sin->sin_family = AF_INET;
23670- sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
23671+ sin->sin_addr.s_addr =
23672+ nx_map_sock_lback(sk->sk_nx_info, ip_hdr(skb)->saddr);
23673 sin->sin_port = 0;
23674 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
c2e5f7c8 23675 *addr_len = sizeof(*sin);
927ca606 23676@@ -959,7 +978,8 @@ static struct sock *raw_get_first(struct
b00e13aa
AM
23677 for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
23678 ++state->bucket) {
23679 sk_for_each(sk, &state->h->ht[state->bucket])
4bf69007
AM
23680- if (sock_net(sk) == seq_file_net(seq))
23681+ if ((sock_net(sk) == seq_file_net(seq)) &&
b00e13aa 23682+ nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
4bf69007
AM
23683 goto found;
23684 }
23685 sk = NULL;
927ca606 23686@@ -975,7 +995,8 @@ static struct sock *raw_get_next(struct
4bf69007
AM
23687 sk = sk_next(sk);
23688 try_again:
23689 ;
23690- } while (sk && sock_net(sk) != seq_file_net(seq));
23691+ } while (sk && ((sock_net(sk) != seq_file_net(seq)) ||
23692+ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
23693
23694 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
23695 sk = sk_head(&state->h->ht[state->bucket]);
8931d859
AM
23696diff -NurpP --minimal linux-4.4.161/net/ipv4/route.c linux-4.4.161-vs2.3.9.8/net/ipv4/route.c
23697--- linux-4.4.161/net/ipv4/route.c 2018-10-20 02:34:31.000000000 +0000
23698+++ linux-4.4.161-vs2.3.9.8/net/ipv4/route.c 2018-10-20 04:57:21.000000000 +0000
23699@@ -2238,7 +2238,7 @@ struct rtable *__ip_route_output_key_has
4bf69007
AM
23700
23701
23702 if (fl4->flowi4_oif) {
23703- dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
23704+ dev_out = dev_get_by_index_real_rcu(net, fl4->flowi4_oif);
23705 rth = ERR_PTR(-ENODEV);
927ca606 23706 if (!dev_out)
4bf69007 23707 goto out;
8931d859
AM
23708diff -NurpP --minimal linux-4.4.161/net/ipv4/tcp.c linux-4.4.161-vs2.3.9.8/net/ipv4/tcp.c
23709--- linux-4.4.161/net/ipv4/tcp.c 2018-10-20 02:34:31.000000000 +0000
23710+++ linux-4.4.161-vs2.3.9.8/net/ipv4/tcp.c 2018-10-20 05:50:20.000000000 +0000
927ca606 23711@@ -269,6 +269,7 @@
4bf69007
AM
23712 #include <linux/crypto.h>
23713 #include <linux/time.h>
23714 #include <linux/slab.h>
23715+#include <linux/in.h>
23716
23717 #include <net/icmp.h>
23718 #include <net/inet_common.h>
8931d859
AM
23719diff -NurpP --minimal linux-4.4.161/net/ipv4/tcp_ipv4.c linux-4.4.161-vs2.3.9.8/net/ipv4/tcp_ipv4.c
23720--- linux-4.4.161/net/ipv4/tcp_ipv4.c 2018-10-20 02:34:31.000000000 +0000
23721+++ linux-4.4.161-vs2.3.9.8/net/ipv4/tcp_ipv4.c 2018-10-20 05:50:20.000000000 +0000
23722@@ -1890,6 +1890,10 @@ static void *listening_get_next(struct s
927ca606 23723 sk = sk_nulls_next(sk);
4bf69007
AM
23724 get_sk:
23725 sk_nulls_for_each_from(sk, node) {
23726+ vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
23727+ sk, sk->sk_nid, nx_current_nid());
23728+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23729+ continue;
23730 if (!net_eq(sock_net(sk), net))
23731 continue;
23732 if (sk->sk_family == st->family) {
8931d859 23733@@ -1954,6 +1958,11 @@ static void *established_get_first(struc
4bf69007
AM
23734
23735 spin_lock_bh(lock);
23736 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
23737+ vxdprintk(VXD_CBIT(net, 6),
23738+ "sk,egf: %p [#%d] (from %d)",
23739+ sk, sk->sk_nid, nx_current_nid());
23740+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23741+ continue;
23742 if (sk->sk_family != st->family ||
23743 !net_eq(sock_net(sk), net)) {
23744 continue;
8931d859 23745@@ -1980,6 +1989,11 @@ static void *established_get_next(struct
c2e5f7c8 23746 sk = sk_nulls_next(sk);
4bf69007
AM
23747
23748 sk_nulls_for_each_from(sk, node) {
23749+ vxdprintk(VXD_CBIT(net, 6),
23750+ "sk,egn: %p [#%d] (from %d)",
23751+ sk, sk->sk_nid, nx_current_nid());
23752+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23753+ continue;
23754 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
c2e5f7c8 23755 return sk;
4bf69007 23756 }
8931d859 23757@@ -2171,9 +2185,9 @@ static void get_openreq4(const struct re
4bf69007 23758 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
c2e5f7c8 23759 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
4bf69007 23760 i,
c2e5f7c8
JR
23761- ireq->ir_loc_addr,
23762+ nx_map_sock_lback(current_nx_info(), ireq->ir_loc_addr),
927ca606 23763 ireq->ir_num,
c2e5f7c8
JR
23764- ireq->ir_rmt_addr,
23765+ nx_map_sock_lback(current_nx_info(), ireq->ir_rmt_addr),
23766 ntohs(ireq->ir_rmt_port),
4bf69007
AM
23767 TCP_SYN_RECV,
23768 0, 0, /* could print option size, but that is af dependent. */
8931d859 23769@@ -2196,8 +2210,8 @@ static void get_tcp4_sock(struct sock *s
4bf69007
AM
23770 const struct inet_connection_sock *icsk = inet_csk(sk);
23771 const struct inet_sock *inet = inet_sk(sk);
927ca606 23772 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
4bf69007
AM
23773- __be32 dest = inet->inet_daddr;
23774- __be32 src = inet->inet_rcv_saddr;
23775+ __be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
23776+ __be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
23777 __u16 destp = ntohs(inet->inet_dport);
23778 __u16 srcp = ntohs(inet->inet_sport);
23779 int rx_queue;
8931d859 23780@@ -2256,8 +2270,8 @@ static void get_timewait4_sock(const str
927ca606 23781 __be32 dest, src;
4bf69007 23782 __u16 destp, srcp;
4bf69007
AM
23783
23784- dest = tw->tw_daddr;
23785- src = tw->tw_rcv_saddr;
23786+ dest = nx_map_sock_lback(current_nx_info(), tw->tw_daddr);
23787+ src = nx_map_sock_lback(current_nx_info(), tw->tw_rcv_saddr);
23788 destp = ntohs(tw->tw_dport);
23789 srcp = ntohs(tw->tw_sport);
23790
8931d859
AM
23791diff -NurpP --minimal linux-4.4.161/net/ipv4/tcp_minisocks.c linux-4.4.161-vs2.3.9.8/net/ipv4/tcp_minisocks.c
23792--- linux-4.4.161/net/ipv4/tcp_minisocks.c 2018-10-20 02:34:31.000000000 +0000
23793+++ linux-4.4.161-vs2.3.9.8/net/ipv4/tcp_minisocks.c 2018-10-20 05:50:20.000000000 +0000
4bf69007
AM
23794@@ -23,6 +23,9 @@
23795 #include <linux/slab.h>
23796 #include <linux/sysctl.h>
23797 #include <linux/workqueue.h>
23798+#include <linux/vs_limit.h>
23799+#include <linux/vs_socket.h>
23800+#include <linux/vs_context.h>
23801 #include <net/tcp.h>
23802 #include <net/inet_common.h>
23803 #include <net/xfrm.h>
8931d859 23804@@ -292,6 +295,11 @@ void tcp_time_wait(struct sock *sk, int
b00e13aa 23805 tcptw->tw_ts_offset = tp->tsoffset;
927ca606 23806 tcptw->tw_last_oow_ack_time = 0;
4bf69007
AM
23807
23808+ tw->tw_xid = sk->sk_xid;
23809+ tw->tw_vx_info = NULL;
23810+ tw->tw_nid = sk->sk_nid;
23811+ tw->tw_nx_info = NULL;
2ba6f0dd 23812+
4bf69007
AM
23813 #if IS_ENABLED(CONFIG_IPV6)
23814 if (tw->tw_family == PF_INET6) {
23815 struct ipv6_pinfo *np = inet6_sk(sk);
8931d859
AM
23816diff -NurpP --minimal linux-4.4.161/net/ipv4/udp.c linux-4.4.161-vs2.3.9.8/net/ipv4/udp.c
23817--- linux-4.4.161/net/ipv4/udp.c 2018-10-20 02:34:31.000000000 +0000
23818+++ linux-4.4.161-vs2.3.9.8/net/ipv4/udp.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 23819@@ -309,14 +309,7 @@ fail:
4bf69007
AM
23820 }
23821 EXPORT_SYMBOL(udp_lib_get_port);
23822
23823-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
23824-{
23825- struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
23826-
23827- return (!ipv6_only_sock(sk2) &&
23828- (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
23829- inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
23830-}
23831+extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
23832
927ca606
AM
23833 static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
23834 unsigned int port)
23835@@ -355,6 +348,11 @@ static inline int compute_score(struct s
23836 if (inet->inet_rcv_saddr != daddr)
23837 return -1;
23838 score += 4;
4bf69007
AM
23839+ } else {
23840+ /* block non nx_info ips */
23841+ if (!v4_addr_in_nx_info(sk->sk_nx_info,
23842+ daddr, NXA_MASK_BIND))
23843+ return -1;
927ca606
AM
23844 }
23845
23846 if (inet->inet_daddr) {
23847@@ -489,6 +487,7 @@ begin:
4bf69007
AM
23848 return result;
23849 }
23850
2ba6f0dd 23851+
4bf69007
AM
23852 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
23853 * harder than this. -DaveM
23854 */
927ca606 23855@@ -535,6 +534,11 @@ begin:
4bf69007
AM
23856 sk_nulls_for_each_rcu(sk, node, &hslot->head) {
23857 score = compute_score(sk, net, saddr, hnum, sport,
23858 daddr, dport, dif);
23859+ /* FIXME: disabled?
23860+ if (score == 9) {
23861+ result = sk;
23862+ break;
23863+ } else */
23864 if (score > badness) {
23865 result = sk;
23866 badness = score;
927ca606 23867@@ -559,6 +563,7 @@ begin:
4bf69007
AM
23868 if (get_nulls_value(node) != slot)
23869 goto begin;
23870
2ba6f0dd 23871+
4bf69007
AM
23872 if (result) {
23873 if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
23874 result = NULL;
927ca606 23875@@ -568,6 +573,7 @@ begin:
4bf69007
AM
23876 goto begin;
23877 }
23878 }
2ba6f0dd 23879+
4bf69007
AM
23880 rcu_read_unlock();
23881 return result;
23882 }
927ca606 23883@@ -602,7 +608,7 @@ static inline bool __udp_is_mcast_sock(s
c2e5f7c8
JR
23884 udp_sk(sk)->udp_port_hash != hnum ||
23885 (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
23886 (inet->inet_dport != rmt_port && inet->inet_dport) ||
23887- (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
23888+ !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr) ||
23889 ipv6_only_sock(sk) ||
23890 (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
23891 return false;
8931d859 23892@@ -1035,6 +1041,16 @@ int udp_sendmsg(struct sock *sk, struct
927ca606
AM
23893 goto out;
23894 }
4bf69007
AM
23895
23896+ if (sk->sk_nx_info) {
23897+ rt = ip_v4_find_src(net, sk->sk_nx_info, fl4);
23898+ if (IS_ERR(rt)) {
23899+ err = PTR_ERR(rt);
23900+ rt = NULL;
23901+ goto out;
23902+ }
23903+ ip_rt_put(rt);
23904+ }
2ba6f0dd 23905+
4bf69007
AM
23906 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
23907 rt = ip_route_output_flow(net, fl4, sk);
23908 if (IS_ERR(rt)) {
8931d859 23909@@ -1340,7 +1356,8 @@ try_again:
4bf69007
AM
23910 if (sin) {
23911 sin->sin_family = AF_INET;
23912 sin->sin_port = udp_hdr(skb)->source;
23913- sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
23914+ sin->sin_addr.s_addr = nx_map_sock_lback(
23915+ skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
23916 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
c2e5f7c8 23917 *addr_len = sizeof(*sin);
4bf69007 23918 }
8931d859 23919@@ -2327,6 +2344,8 @@ static struct sock *udp_get_first(struct
4bf69007
AM
23920 sk_nulls_for_each(sk, node, &hslot->head) {
23921 if (!net_eq(sock_net(sk), net))
23922 continue;
23923+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23924+ continue;
23925 if (sk->sk_family == state->family)
23926 goto found;
23927 }
8931d859 23928@@ -2344,7 +2363,9 @@ static struct sock *udp_get_next(struct
4bf69007
AM
23929
23930 do {
23931 sk = sk_nulls_next(sk);
23932- } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
23933+ } while (sk && (!net_eq(sock_net(sk), net) ||
23934+ sk->sk_family != state->family ||
23935+ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
23936
23937 if (!sk) {
23938 if (state->bucket <= state->udp_table->mask)
8931d859 23939@@ -2440,8 +2461,8 @@ static void udp4_format_sock(struct sock
c2e5f7c8 23940 int bucket)
4bf69007
AM
23941 {
23942 struct inet_sock *inet = inet_sk(sp);
23943- __be32 dest = inet->inet_daddr;
23944- __be32 src = inet->inet_rcv_saddr;
23945+ __be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
23946+ __be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
23947 __u16 destp = ntohs(inet->inet_dport);
23948 __u16 srcp = ntohs(inet->inet_sport);
23949
8931d859
AM
23950diff -NurpP --minimal linux-4.4.161/net/ipv4/udp_diag.c linux-4.4.161-vs2.3.9.8/net/ipv4/udp_diag.c
23951--- linux-4.4.161/net/ipv4/udp_diag.c 2016-01-10 23:01:32.000000000 +0000
23952+++ linux-4.4.161-vs2.3.9.8/net/ipv4/udp_diag.c 2018-10-20 04:57:21.000000000 +0000
23953@@ -118,6 +118,8 @@ static void udp_dump(struct udp_table *t
23954
23955 if (!net_eq(sock_net(sk), net))
23956 continue;
23957+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23958+ continue;
23959 if (num < s_num)
23960 goto next;
23961 if (!(r->idiag_states & (1 << sk->sk_state)))
23962diff -NurpP --minimal linux-4.4.161/net/ipv6/addrconf.c linux-4.4.161-vs2.3.9.8/net/ipv6/addrconf.c
23963--- linux-4.4.161/net/ipv6/addrconf.c 2018-10-20 02:34:31.000000000 +0000
23964+++ linux-4.4.161-vs2.3.9.8/net/ipv6/addrconf.c 2018-10-20 04:57:21.000000000 +0000
927ca606 23965@@ -92,6 +92,8 @@
4bf69007
AM
23966 #include <linux/proc_fs.h>
23967 #include <linux/seq_file.h>
23968 #include <linux/export.h>
23969+#include <linux/vs_network.h>
23970+#include <linux/vs_inet6.h>
23971
23972 /* Set to 3 to get tracing... */
23973 #define ACONF_DEBUG 2
8931d859 23974@@ -1445,7 +1447,8 @@ static int __ipv6_dev_get_saddr(struct n
927ca606
AM
23975 struct ipv6_saddr_dst *dst,
23976 struct inet6_dev *idev,
23977 struct ipv6_saddr_score *scores,
23978- int hiscore_idx)
23979+ int hiscore_idx,
23980+ struct nx_info *nxi)
23981 {
23982 struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
23983
8931d859 23984@@ -1475,6 +1478,8 @@ static int __ipv6_dev_get_saddr(struct n
927ca606
AM
23985 idev->dev->name);
23986 continue;
23987 }
23988+ if (!v6_addr_in_nx_info(nxi, &score->ifa->addr, -1))
23989+ continue;
23990
23991 score->rule = -1;
23992 bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
8931d859 23993@@ -1522,7 +1527,7 @@ out:
4bf69007
AM
23994
23995 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
23996 const struct in6_addr *daddr, unsigned int prefs,
23997- struct in6_addr *saddr)
23998+ struct in6_addr *saddr, struct nx_info *nxi)
23999 {
927ca606
AM
24000 struct ipv6_saddr_score scores[2], *hiscore;
24001 struct ipv6_saddr_dst dst;
8931d859 24002@@ -1571,13 +1576,15 @@ int ipv6_dev_get_saddr(struct net *net,
927ca606
AM
24003
24004 if (use_oif_addr) {
24005 if (idev)
24006- hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
24007+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst,
24008+ idev, scores, hiscore_idx, nxi);
24009 } else {
24010 for_each_netdev_rcu(net, dev) {
24011 idev = __in6_dev_get(dev);
24012 if (!idev)
4bf69007 24013 continue;
927ca606
AM
24014- hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
24015+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst,
24016+ idev, scores, hiscore_idx, nxi);
24017 }
24018 }
24019 rcu_read_unlock();
8931d859 24020@@ -3849,7 +3856,10 @@ static void if6_seq_stop(struct seq_file
4bf69007
AM
24021 static int if6_seq_show(struct seq_file *seq, void *v)
24022 {
24023 struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
24024- seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
2ba6f0dd 24025+
4bf69007
AM
24026+ if (nx_check(0, VS_ADMIN|VS_WATCH) ||
24027+ v6_addr_in_nx_info(current_nx_info(), &ifp->addr, -1))
24028+ seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
24029 &ifp->addr,
24030 ifp->idev->dev->ifindex,
24031 ifp->prefix_len,
8931d859 24032@@ -4433,6 +4443,11 @@ static int in6_dump_addrs(struct inet6_d
4bf69007
AM
24033 struct ifacaddr6 *ifaca;
24034 int err = 1;
24035 int ip_idx = *p_ip_idx;
24036+ struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
2ba6f0dd 24037+
4bf69007
AM
24038+ /* disable ipv6 on non v6 guests */
24039+ if (nxi && !nx_info_has_v6(nxi))
24040+ return skb->len;
24041
24042 read_lock_bh(&idev->lock);
24043 switch (type) {
8931d859 24044@@ -4443,6 +4458,8 @@ static int in6_dump_addrs(struct inet6_d
4bf69007 24045 list_for_each_entry(ifa, &idev->addr_list, if_list) {
1a30d8a3
JR
24046 if (ip_idx < s_ip_idx)
24047 goto next;
927ca606 24048+ if (!v6_addr_in_nx_info(nxi, &ifa->addr, -1))
1a30d8a3 24049+ goto next;
4bf69007
AM
24050 err = inet6_fill_ifaddr(skb, ifa,
24051 NETLINK_CB(cb->skb).portid,
24052 cb->nlh->nlmsg_seq,
8931d859 24053@@ -4460,6 +4477,8 @@ static int in6_dump_addrs(struct inet6_d
4bf69007
AM
24054 ifmca = ifmca->next, ip_idx++) {
24055 if (ip_idx < s_ip_idx)
24056 continue;
927ca606
AM
24057+ if (!v6_addr_in_nx_info(nxi, &ifmca->mca_addr, -1))
24058+ continue;
4bf69007
AM
24059 err = inet6_fill_ifmcaddr(skb, ifmca,
24060 NETLINK_CB(cb->skb).portid,
24061 cb->nlh->nlmsg_seq,
8931d859 24062@@ -4475,6 +4494,8 @@ static int in6_dump_addrs(struct inet6_d
4bf69007
AM
24063 ifaca = ifaca->aca_next, ip_idx++) {
24064 if (ip_idx < s_ip_idx)
24065 continue;
927ca606
AM
24066+ if (!v6_addr_in_nx_info(nxi, &ifaca->aca_addr, -1))
24067+ continue;
4bf69007
AM
24068 err = inet6_fill_ifacaddr(skb, ifaca,
24069 NETLINK_CB(cb->skb).portid,
24070 cb->nlh->nlmsg_seq,
8931d859 24071@@ -4503,6 +4524,10 @@ static int inet6_dump_addr(struct sk_buf
4bf69007
AM
24072 struct inet6_dev *idev;
24073 struct hlist_head *head;
b00e13aa 24074
4bf69007
AM
24075+ /* FIXME: maybe disable ipv6 on non v6 guests?
24076+ if (skb->sk && skb->sk->sk_vx_info)
24077+ return skb->len; */
b00e13aa
AM
24078+
24079 s_h = cb->args[0];
24080 s_idx = idx = cb->args[1];
24081 s_ip_idx = ip_idx = cb->args[2];
8931d859 24082@@ -5011,6 +5036,7 @@ static int inet6_dump_ifinfo(struct sk_b
b00e13aa
AM
24083 struct net_device *dev;
24084 struct inet6_dev *idev;
24085 struct hlist_head *head;
24086+ struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
4bf69007
AM
24087
24088 s_h = cb->args[0];
24089 s_idx = cb->args[1];
8931d859 24090@@ -5022,6 +5048,8 @@ static int inet6_dump_ifinfo(struct sk_b
b00e13aa 24091 hlist_for_each_entry_rcu(dev, head, index_hlist) {
4bf69007
AM
24092 if (idx < s_idx)
24093 goto cont;
24094+ if (!v6_dev_in_nx_info(dev, nxi))
24095+ goto cont;
24096 idev = __in6_dev_get(dev);
24097 if (!idev)
24098 goto cont;
8931d859
AM
24099diff -NurpP --minimal linux-4.4.161/net/ipv6/af_inet6.c linux-4.4.161-vs2.3.9.8/net/ipv6/af_inet6.c
24100--- linux-4.4.161/net/ipv6/af_inet6.c 2018-10-20 02:34:31.000000000 +0000
24101+++ linux-4.4.161-vs2.3.9.8/net/ipv6/af_inet6.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
24102@@ -43,6 +43,8 @@
24103 #include <linux/netdevice.h>
24104 #include <linux/icmpv6.h>
24105 #include <linux/netfilter_ipv6.h>
24106+#include <linux/vs_inet.h>
24107+#include <linux/vs_inet6.h>
24108
24109 #include <net/ip.h>
24110 #include <net/ipv6.h>
927ca606 24111@@ -158,10 +160,13 @@ lookup_protocol:
4bf69007
AM
24112 }
24113
24114 err = -EPERM;
24115+ if ((protocol == IPPROTO_ICMPV6) &&
24116+ nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
24117+ goto override;
b00e13aa
AM
24118 if (sock->type == SOCK_RAW && !kern &&
24119 !ns_capable(net->user_ns, CAP_NET_RAW))
4bf69007
AM
24120 goto out_rcu_unlock;
24121-
24122+override:
24123 sock->ops = answer->ops;
24124 answer_prot = answer->prot;
bb20add7 24125 answer_flags = answer->flags;
927ca606 24126@@ -259,6 +264,7 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24127 struct inet_sock *inet = inet_sk(sk);
24128 struct ipv6_pinfo *np = inet6_sk(sk);
24129 struct net *net = sock_net(sk);
24130+ struct nx_v6_sock_addr nsa;
24131 __be32 v4addr = 0;
24132 unsigned short snum;
24133 int addr_type = 0;
927ca606 24134@@ -274,6 +280,10 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24135 if (addr->sin6_family != AF_INET6)
24136 return -EAFNOSUPPORT;
24137
24138+ err = v6_map_sock_addr(inet, addr, &nsa);
24139+ if (err)
24140+ return err;
2ba6f0dd 24141+
4bf69007
AM
24142 addr_type = ipv6_addr_type(&addr->sin6_addr);
24143 if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
24144 return -EINVAL;
927ca606 24145@@ -314,6 +324,10 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24146 err = -EADDRNOTAVAIL;
24147 goto out;
24148 }
24149+ if (!v4_addr_in_nx_info(sk->sk_nx_info, v4addr, NXA_MASK_BIND)) {
24150+ err = -EADDRNOTAVAIL;
24151+ goto out;
24152+ }
24153 } else {
24154 if (addr_type != IPV6_ADDR_ANY) {
24155 struct net_device *dev = NULL;
927ca606 24156@@ -340,6 +354,11 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24157 }
24158 }
24159
24160+ if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
24161+ err = -EADDRNOTAVAIL;
24162+ goto out_unlock;
24163+ }
2ba6f0dd 24164+
4bf69007
AM
24165 /* ipv4 addr of the socket is invalid. Only the
24166 * unspecified and mapped address have a v4 equivalent.
24167 */
927ca606 24168@@ -357,6 +376,9 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24169 }
24170 }
24171
24172+ /* what's that for? */
24173+ v6_set_sock_addr(inet, &nsa);
2ba6f0dd 24174+
4bf69007
AM
24175 inet->inet_rcv_saddr = v4addr;
24176 inet->inet_saddr = v4addr;
24177
927ca606 24178@@ -461,9 +483,11 @@ int inet6_getname(struct socket *sock, s
4bf69007
AM
24179 return -ENOTCONN;
24180 sin->sin6_port = inet->inet_dport;
c2e5f7c8 24181 sin->sin6_addr = sk->sk_v6_daddr;
4bf69007
AM
24182+ /* FIXME: remap lback? */
24183 if (np->sndflow)
24184 sin->sin6_flowinfo = np->flow_label;
24185 } else {
24186+ /* FIXME: remap lback? */
c2e5f7c8 24187 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
4bf69007
AM
24188 sin->sin6_addr = np->saddr;
24189 else
8931d859
AM
24190diff -NurpP --minimal linux-4.4.161/net/ipv6/datagram.c linux-4.4.161-vs2.3.9.8/net/ipv6/datagram.c
24191--- linux-4.4.161/net/ipv6/datagram.c 2018-10-20 02:34:31.000000000 +0000
24192+++ linux-4.4.161-vs2.3.9.8/net/ipv6/datagram.c 2018-10-20 05:50:20.000000000 +0000
24193@@ -736,7 +736,7 @@ int ip6_datagram_send_ctl(struct net *ne
4bf69007
AM
24194
24195 rcu_read_lock();
24196 if (fl6->flowi6_oif) {
24197- dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
24198+ dev = dev_get_by_index_real_rcu(net, fl6->flowi6_oif);
24199 if (!dev) {
24200 rcu_read_unlock();
24201 return -ENODEV;
8931d859
AM
24202diff -NurpP --minimal linux-4.4.161/net/ipv6/fib6_rules.c linux-4.4.161-vs2.3.9.8/net/ipv6/fib6_rules.c
24203--- linux-4.4.161/net/ipv6/fib6_rules.c 2018-10-20 02:34:31.000000000 +0000
24204+++ linux-4.4.161-vs2.3.9.8/net/ipv6/fib6_rules.c 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 24205@@ -97,7 +97,7 @@ static int fib6_rule_action(struct fib_r
4bf69007
AM
24206 ip6_dst_idev(&rt->dst)->dev,
24207 &flp6->daddr,
24208 rt6_flags2srcprefs(flags),
24209- &saddr))
24210+ &saddr, NULL))
24211 goto again;
24212 if (!ipv6_prefix_equal(&saddr, &r->src.addr,
24213 r->src.plen))
8931d859
AM
24214diff -NurpP --minimal linux-4.4.161/net/ipv6/inet6_hashtables.c linux-4.4.161-vs2.3.9.8/net/ipv6/inet6_hashtables.c
24215--- linux-4.4.161/net/ipv6/inet6_hashtables.c 2016-01-10 23:01:32.000000000 +0000
24216+++ linux-4.4.161-vs2.3.9.8/net/ipv6/inet6_hashtables.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
24217@@ -16,6 +16,7 @@
24218
24219 #include <linux/module.h>
24220 #include <linux/random.h>
24221+#include <linux/vs_inet6.h>
24222
24223 #include <net/inet_connection_sock.h>
24224 #include <net/inet_hashtables.h>
927ca606 24225@@ -66,7 +67,6 @@ struct sock *__inet6_lookup_established(
4bf69007
AM
24226 unsigned int slot = hash & hashinfo->ehash_mask;
24227 struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
24228
24229-
24230 rcu_read_lock();
24231 begin:
24232 sk_nulls_for_each_rcu(sk, node, &head->chain) {
927ca606 24233@@ -108,6 +108,9 @@ static inline int compute_score(struct s
c2e5f7c8 24234 if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
4bf69007
AM
24235 return -1;
24236 score++;
24237+ } else {
24238+ if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
24239+ return -1;
24240 }
24241 if (sk->sk_bound_dev_if) {
24242 if (sk->sk_bound_dev_if != dif)
8931d859
AM
24243diff -NurpP --minimal linux-4.4.161/net/ipv6/ip6_fib.c linux-4.4.161-vs2.3.9.8/net/ipv6/ip6_fib.c
24244--- linux-4.4.161/net/ipv6/ip6_fib.c 2018-10-20 02:34:31.000000000 +0000
24245+++ linux-4.4.161-vs2.3.9.8/net/ipv6/ip6_fib.c 2018-10-20 04:57:21.000000000 +0000
927ca606 24246@@ -1968,6 +1968,7 @@ static int ipv6_route_seq_show(struct se
c2e5f7c8
JR
24247 struct rt6_info *rt = v;
24248 struct ipv6_route_iter *iter = seq->private;
24249
24250+ /* FIXME: check for network context? */
24251 seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
24252
24253 #ifdef CONFIG_IPV6_SUBTREES
8931d859
AM
24254diff -NurpP --minimal linux-4.4.161/net/ipv6/ip6_output.c linux-4.4.161-vs2.3.9.8/net/ipv6/ip6_output.c
24255--- linux-4.4.161/net/ipv6/ip6_output.c 2018-10-20 02:34:31.000000000 +0000
24256+++ linux-4.4.161-vs2.3.9.8/net/ipv6/ip6_output.c 2018-10-20 05:50:20.000000000 +0000
24257@@ -943,7 +943,8 @@ static int ip6_dst_lookup_tail(struct ne
927ca606 24258 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
4bf69007
AM
24259 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
24260 sk ? inet6_sk(sk)->srcprefs : 0,
24261- &fl6->saddr);
24262+ &fl6->saddr,
24263+ sk ? sk->sk_nx_info : NULL);
24264 if (err)
24265 goto out_err_release;
927ca606 24266
8931d859
AM
24267diff -NurpP --minimal linux-4.4.161/net/ipv6/ndisc.c linux-4.4.161-vs2.3.9.8/net/ipv6/ndisc.c
24268--- linux-4.4.161/net/ipv6/ndisc.c 2018-10-20 02:34:31.000000000 +0000
24269+++ linux-4.4.161-vs2.3.9.8/net/ipv6/ndisc.c 2018-10-20 04:57:21.000000000 +0000
927ca606 24270@@ -501,7 +501,7 @@ void ndisc_send_na(struct net_device *de
4bf69007
AM
24271 } else {
24272 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
24273 inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
24274- &tmpaddr))
24275+ &tmpaddr, NULL))
24276 return;
24277 src_addr = &tmpaddr;
24278 }
8931d859
AM
24279diff -NurpP --minimal linux-4.4.161/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c linux-4.4.161-vs2.3.9.8/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
24280--- linux-4.4.161/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c 2016-01-10 23:01:32.000000000 +0000
24281+++ linux-4.4.161-vs2.3.9.8/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 24282@@ -35,7 +35,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *s
4bf69007
AM
24283 ctinfo == IP_CT_RELATED_REPLY));
24284
927ca606 24285 if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
4bf69007
AM
24286- &ipv6_hdr(skb)->daddr, 0, &src) < 0)
24287+ &ipv6_hdr(skb)->daddr, 0, &src, NULL) < 0)
24288 return NF_DROP;
24289
bb20add7 24290 nfct_nat(ct)->masq_index = out->ifindex;
8931d859
AM
24291diff -NurpP --minimal linux-4.4.161/net/ipv6/raw.c linux-4.4.161-vs2.3.9.8/net/ipv6/raw.c
24292--- linux-4.4.161/net/ipv6/raw.c 2018-10-20 02:34:31.000000000 +0000
24293+++ linux-4.4.161-vs2.3.9.8/net/ipv6/raw.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
24294@@ -30,6 +30,7 @@
24295 #include <linux/icmpv6.h>
24296 #include <linux/netfilter.h>
24297 #include <linux/netfilter_ipv6.h>
24298+#include <linux/vs_inet6.h>
24299 #include <linux/skbuff.h>
24300 #include <linux/compat.h>
927ca606 24301 #include <linux/uaccess.h>
bb20add7 24302@@ -291,6 +292,13 @@ static int rawv6_bind(struct sock *sk, s
4bf69007
AM
24303 goto out_unlock;
24304 }
24305
24306+ if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
24307+ err = -EADDRNOTAVAIL;
24308+ if (dev)
24309+ dev_put(dev);
24310+ goto out;
24311+ }
2ba6f0dd 24312+
4bf69007
AM
24313 /* ipv4 addr of the socket is invalid. Only the
24314 * unspecified and mapped address have a v4 equivalent.
24315 */
8931d859
AM
24316diff -NurpP --minimal linux-4.4.161/net/ipv6/route.c linux-4.4.161-vs2.3.9.8/net/ipv6/route.c
24317--- linux-4.4.161/net/ipv6/route.c 2018-10-20 02:34:31.000000000 +0000
24318+++ linux-4.4.161-vs2.3.9.8/net/ipv6/route.c 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
24319@@ -62,6 +62,7 @@
24320 #include <net/lwtunnel.h>
24321 #include <net/ip_tunnels.h>
24322 #include <net/l3mdev.h>
4bf69007
AM
24323+#include <linux/vs_inet6.h>
24324
24325 #include <asm/uaccess.h>
24326
8931d859 24327@@ -2549,16 +2550,18 @@ int ip6_route_get_saddr(struct net *net,
4bf69007
AM
24328 struct rt6_info *rt,
24329 const struct in6_addr *daddr,
24330 unsigned int prefs,
24331- struct in6_addr *saddr)
24332+ struct in6_addr *saddr,
24333+ struct nx_info *nxi)
24334 {
927ca606
AM
24335 struct inet6_dev *idev =
24336 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
4bf69007 24337 int err = 0;
927ca606
AM
24338- if (rt && rt->rt6i_prefsrc.plen)
24339+ if (rt && rt->rt6i_prefsrc.plen && (!nxi ||
4bf69007
AM
24340+ v6_addr_in_nx_info(nxi, &rt->rt6i_prefsrc.addr, NXA_TYPE_ADDR)))
24341 *saddr = rt->rt6i_prefsrc.addr;
24342 else
24343 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
24344- daddr, prefs, saddr);
24345+ daddr, prefs, saddr, nxi);
24346 return err;
24347 }
24348
8931d859 24349@@ -3174,7 +3177,8 @@ static int rt6_fill_node(struct net *net
4bf69007
AM
24350 goto nla_put_failure;
24351 } else if (dst) {
24352 struct in6_addr saddr_buf;
24353- if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
24354+ if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf,
24355+ (skb->sk ? skb->sk->sk_nx_info : NULL)) == 0 &&
927ca606 24356 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4bf69007
AM
24357 goto nla_put_failure;
24358 }
8931d859
AM
24359diff -NurpP --minimal linux-4.4.161/net/ipv6/tcp_ipv6.c linux-4.4.161-vs2.3.9.8/net/ipv6/tcp_ipv6.c
24360--- linux-4.4.161/net/ipv6/tcp_ipv6.c 2018-10-20 02:34:31.000000000 +0000
24361+++ linux-4.4.161-vs2.3.9.8/net/ipv6/tcp_ipv6.c 2018-10-20 05:50:20.000000000 +0000
bb20add7 24362@@ -69,6 +69,7 @@
4bf69007
AM
24363
24364 #include <linux/crypto.h>
24365 #include <linux/scatterlist.h>
24366+#include <linux/vs_inet6.h>
24367
927ca606
AM
24368 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
24369 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
24370@@ -150,11 +151,18 @@ static int tcp_v6_connect(struct sock *s
4bf69007
AM
24371 */
24372
927ca606
AM
24373 if (ipv6_addr_any(&usin->sin6_addr)) {
24374- if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
24375- ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
24376- &usin->sin6_addr);
24377- else
24378- usin->sin6_addr = in6addr_loopback;
4bf69007 24379+ struct nx_info *nxi = sk->sk_nx_info;
2ba6f0dd 24380+
4bf69007
AM
24381+ if (nxi && nx_info_has_v6(nxi))
24382+ /* FIXME: remap lback? */
24383+ usin->sin6_addr = nxi->v6.ip;
927ca606
AM
24384+ else {
24385+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
24386+ ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
24387+ &usin->sin6_addr);
24388+ else
24389+ usin->sin6_addr = in6addr_loopback;
24390+ }
24391 }
4bf69007
AM
24392
24393 addr_type = ipv6_addr_type(&usin->sin6_addr);
8931d859
AM
24394diff -NurpP --minimal linux-4.4.161/net/ipv6/udp.c linux-4.4.161-vs2.3.9.8/net/ipv6/udp.c
24395--- linux-4.4.161/net/ipv6/udp.c 2018-10-20 02:34:31.000000000 +0000
24396+++ linux-4.4.161-vs2.3.9.8/net/ipv6/udp.c 2018-10-20 04:57:21.000000000 +0000
c2e5f7c8 24397@@ -47,6 +47,7 @@
4bf69007 24398 #include <net/xfrm.h>
b00e13aa 24399 #include <net/inet6_hashtables.h>
c2e5f7c8 24400 #include <net/busy_poll.h>
4bf69007
AM
24401+#include <linux/vs_inet6.h>
24402
24403 #include <linux/proc_fs.h>
24404 #include <linux/seq_file.h>
927ca606
AM
24405@@ -76,32 +77,60 @@ static u32 udp6_ehashfn(const struct net
24406 udp_ipv6_hash_secret + net_hash_mix(net));
24407 }
24408
24409-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
24410+int ipv6_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
24411 {
24412+ const struct in6_addr *sk1_rcv_saddr6 = inet6_rcv_saddr(sk1);
24413 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
24414+ __be32 sk1_rcv_saddr = sk1->sk_rcv_saddr;
24415+ __be32 sk2_rcv_saddr = sk2->sk_rcv_saddr;
24416 int sk2_ipv6only = inet_v6_ipv6only(sk2);
24417- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
24418+ int addr_type1 = ipv6_addr_type(sk1_rcv_saddr6);
24419 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
24420
24421 /* if both are mapped, treat as IPv4 */
24422- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
24423- return (!sk2_ipv6only &&
24424- (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
24425- sk->sk_rcv_saddr == sk2->sk_rcv_saddr));
24426+ if (addr_type1 == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
24427+ if (!sk2_ipv6only &&
24428+ (!sk1->sk_rcv_saddr || !sk2->sk_rcv_saddr ||
24429+ sk1->sk_rcv_saddr == sk2->sk_rcv_saddr))
24430+ goto vs_v4;
24431+ else
24432+ return 0;
24433+ }
24434
24435 if (addr_type2 == IPV6_ADDR_ANY &&
24436- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
24437- return 1;
24438+ !(sk2_ipv6only && addr_type1 == IPV6_ADDR_MAPPED))
24439+ goto vs;
24440
24441- if (addr_type == IPV6_ADDR_ANY &&
24442- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
24443- return 1;
24444+ if (addr_type1 == IPV6_ADDR_ANY &&
24445+ !(ipv6_only_sock(sk1) && addr_type2 == IPV6_ADDR_MAPPED))
24446+ goto vs;
24447
24448 if (sk2_rcv_saddr6 &&
24449- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
24450- return 1;
24451+ ipv6_addr_equal(&sk1->sk_v6_rcv_saddr, sk2_rcv_saddr6))
24452+ goto vs;
24453
24454 return 0;
24455+
24456+vs_v4:
24457+ if (!sk1_rcv_saddr && !sk2_rcv_saddr)
24458+ return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24459+ if (!sk2_rcv_saddr)
24460+ return v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, -1);
24461+ if (!sk1_rcv_saddr)
24462+ return v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, -1);
24463+ return 1;
24464+vs:
24465+ if (addr_type2 == IPV6_ADDR_ANY && addr_type1 == IPV6_ADDR_ANY)
24466+ return nx_v6_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24467+ else if (addr_type2 == IPV6_ADDR_ANY)
24468+ return v6_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr6, -1);
24469+ else if (addr_type1 == IPV6_ADDR_ANY) {
24470+ if (addr_type2 == IPV6_ADDR_MAPPED)
24471+ return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24472+ else
24473+ return v6_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr6, -1);
24474+ }
24475+ return 1;
24476 }
24477
24478 static u32 udp6_portaddr_hash(const struct net *net,
24479@@ -162,6 +191,10 @@ static inline int compute_score(struct s
24480 if (inet->inet_dport != sport)
24481 return -1;
24482 score++;
4bf69007
AM
24483+ } else {
24484+ /* block non nx_info ips */
24485+ if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
24486+ return -1;
927ca606
AM
24487 }
24488
24489 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
8931d859
AM
24490diff -NurpP --minimal linux-4.4.161/net/ipv6/xfrm6_policy.c linux-4.4.161-vs2.3.9.8/net/ipv6/xfrm6_policy.c
24491--- linux-4.4.161/net/ipv6/xfrm6_policy.c 2018-10-20 02:34:31.000000000 +0000
24492+++ linux-4.4.161-vs2.3.9.8/net/ipv6/xfrm6_policy.c 2018-10-20 04:57:21.000000000 +0000
927ca606
AM
24493@@ -64,7 +64,8 @@ static int xfrm6_get_saddr(struct net *n
24494 return -EHOSTUNREACH;
24495
4bf69007 24496 dev = ip6_dst_idev(dst)->dev;
927ca606
AM
24497- ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
24498+ ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6,
24499+ 0, &saddr->in6, NULL);
4bf69007
AM
24500 dst_release(dst);
24501 return 0;
24502 }
8931d859
AM
24503diff -NurpP --minimal linux-4.4.161/net/netfilter/ipvs/ip_vs_xmit.c linux-4.4.161-vs2.3.9.8/net/netfilter/ipvs/ip_vs_xmit.c
24504--- linux-4.4.161/net/netfilter/ipvs/ip_vs_xmit.c 2016-01-10 23:01:32.000000000 +0000
24505+++ linux-4.4.161-vs2.3.9.8/net/netfilter/ipvs/ip_vs_xmit.c 2018-10-20 04:57:21.000000000 +0000
927ca606 24506@@ -381,7 +381,7 @@ __ip_vs_route_output_v6(struct net *net,
4bf69007
AM
24507 return dst;
24508 if (ipv6_addr_any(&fl6.saddr) &&
24509 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
24510- &fl6.daddr, 0, &fl6.saddr) < 0)
24511+ &fl6.daddr, 0, &fl6.saddr, NULL) < 0)
24512 goto out_err;
24513 if (do_xfrm) {
24514 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
8931d859
AM
24515diff -NurpP --minimal linux-4.4.161/net/netlink/af_netlink.c linux-4.4.161-vs2.3.9.8/net/netlink/af_netlink.c
24516--- linux-4.4.161/net/netlink/af_netlink.c 2018-10-20 02:34:31.000000000 +0000
24517+++ linux-4.4.161-vs2.3.9.8/net/netlink/af_netlink.c 2018-10-20 05:53:02.000000000 +0000
24518@@ -63,6 +63,8 @@
bb20add7 24519 #include <linux/hash.h>
927ca606 24520 #include <linux/genetlink.h>
7b1099ae 24521 #include <linux/nospec.h>
4bf69007
AM
24522+#include <linux/vs_context.h>
24523+#include <linux/vs_network.h>
4bf69007
AM
24524
24525 #include <net/net_namespace.h>
bb20add7 24526 #include <net/sock.h>
8931d859 24527@@ -2472,7 +2474,8 @@ static void *__netlink_seq_next(struct s
927ca606
AM
24528 if (err)
24529 return ERR_PTR(err);
24530 }
24531- } while (sock_net(&nlk->sk) != seq_file_net(seq));
24532+ } while ((sock_net(&nlk->sk) != seq_file_net(seq)) ||
24533+ !nx_check(nlk->sk.sk_nid, VS_WATCH_P | VS_IDENT));
bb20add7 24534
927ca606
AM
24535 return nlk;
24536 }
8931d859
AM
24537diff -NurpP --minimal linux-4.4.161/net/packet/diag.c linux-4.4.161-vs2.3.9.8/net/packet/diag.c
24538--- linux-4.4.161/net/packet/diag.c 2016-01-10 23:01:32.000000000 +0000
24539+++ linux-4.4.161-vs2.3.9.8/net/packet/diag.c 2018-10-20 04:57:21.000000000 +0000
24540@@ -4,6 +4,7 @@
24541 #include <linux/netdevice.h>
24542 #include <linux/packet_diag.h>
24543 #include <linux/percpu.h>
24544+#include <linux/vs_network.h>
24545 #include <net/net_namespace.h>
24546 #include <net/sock.h>
24547
24548@@ -201,6 +202,8 @@ static int packet_diag_dump(struct sk_bu
24549 sk_for_each(sk, &net->packet.sklist) {
24550 if (!net_eq(sock_net(sk), net))
24551 continue;
24552+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24553+ continue;
24554 if (num < s_num)
24555 goto next;
24556
24557diff -NurpP --minimal linux-4.4.161/net/socket.c linux-4.4.161-vs2.3.9.8/net/socket.c
24558--- linux-4.4.161/net/socket.c 2018-10-20 02:34:31.000000000 +0000
24559+++ linux-4.4.161-vs2.3.9.8/net/socket.c 2018-10-20 05:50:20.000000000 +0000
24560@@ -100,10 +100,12 @@
4bf69007
AM
24561
24562 #include <net/sock.h>
24563 #include <linux/netfilter.h>
4bf69007
AM
24564+#include <linux/vs_socket.h>
24565+#include <linux/vs_inet.h>
24566+#include <linux/vs_inet6.h>
24567
24568 #include <linux/if_tun.h>
24569 #include <linux/ipv6_route.h>
927ca606
AM
24570-#include <linux/route.h>
24571 #include <linux/sockios.h>
24572 #include <linux/atalk.h>
24573 #include <net/busy_poll.h>
8931d859 24574@@ -609,8 +611,24 @@ EXPORT_SYMBOL(__sock_tx_timestamp);
4bf69007 24575
927ca606
AM
24576 static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
24577 {
24578- int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
24579- BUG_ON(ret == -EIOCBQUEUED);
24580+ size_t size = msg_data_left(msg);
24581+ int ret = sock->ops->sendmsg(sock, msg, size);
24582+#if 0
4bf69007 24583+ if (sock->sk) {
927ca606 24584+ if (!ret)
4bf69007 24585+ vx_sock_fail(sock->sk, size);
927ca606
AM
24586+ else
24587+ vx_sock_send(sock->sk, size);
4bf69007 24588+ }
927ca606 24589+#endif
4bf69007 24590+ vxdprintk(VXD_CBIT(net, 7),
927ca606 24591+ "sock_sendmsg_nosec: %p[%p,%p,%p;%d/%d]:%zu/%zu",
4bf69007
AM
24592+ sock, sock->sk,
24593+ (sock->sk)?sock->sk->sk_nx_info:0,
24594+ (sock->sk)?sock->sk->sk_vx_info:0,
24595+ (sock->sk)?sock->sk->sk_xid:0,
24596+ (sock->sk)?sock->sk->sk_nid:0,
927ca606
AM
24597+ size, msg_data_left(msg));
24598 return ret;
4bf69007
AM
24599 }
24600
8931d859 24601@@ -1101,6 +1119,13 @@ int __sock_create(struct net *net, int f
4bf69007
AM
24602 if (type < 0 || type >= SOCK_MAX)
24603 return -EINVAL;
24604
24605+ if (!nx_check(0, VS_ADMIN)) {
24606+ if (family == PF_INET && !current_nx_info_has_v4())
24607+ return -EAFNOSUPPORT;
24608+ if (family == PF_INET6 && !current_nx_info_has_v6())
24609+ return -EAFNOSUPPORT;
24610+ }
2ba6f0dd 24611+
4bf69007
AM
24612 /* Compatibility.
24613
24614 This uglymoron is moved from INET layer to here to avoid
8931d859 24615@@ -1235,6 +1260,7 @@ SYSCALL_DEFINE3(socket, int, family, int
4bf69007
AM
24616 if (retval < 0)
24617 goto out;
24618
24619+ set_bit(SOCK_USER_SOCKET, &sock->flags);
24620 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
24621 if (retval < 0)
24622 goto out_release;
8931d859 24623@@ -1276,10 +1302,12 @@ SYSCALL_DEFINE4(socketpair, int, family,
4bf69007
AM
24624 err = sock_create(family, type, protocol, &sock1);
24625 if (err < 0)
24626 goto out;
24627+ set_bit(SOCK_USER_SOCKET, &sock1->flags);
24628
24629 err = sock_create(family, type, protocol, &sock2);
24630 if (err < 0)
24631 goto out_release_1;
24632+ set_bit(SOCK_USER_SOCKET, &sock2->flags);
24633
24634 err = sock1->ops->socketpair(sock1, sock2);
24635 if (err < 0)
8931d859
AM
24636diff -NurpP --minimal linux-4.4.161/net/sunrpc/auth.c linux-4.4.161-vs2.3.9.8/net/sunrpc/auth.c
24637--- linux-4.4.161/net/sunrpc/auth.c 2016-01-10 23:01:32.000000000 +0000
24638+++ linux-4.4.161-vs2.3.9.8/net/sunrpc/auth.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
24639@@ -15,6 +15,7 @@
24640 #include <linux/sunrpc/clnt.h>
24641 #include <linux/sunrpc/gss_api.h>
24642 #include <linux/spinlock.h>
24643+#include <linux/vs_tag.h>
24644
927ca606 24645 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
4bf69007 24646 # define RPCDBG_FACILITY RPCDBG_AUTH
bb20add7 24647@@ -630,6 +631,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
4bf69007
AM
24648 memset(&acred, 0, sizeof(acred));
24649 acred.uid = cred->fsuid;
24650 acred.gid = cred->fsgid;
a4a22af8 24651+ acred.tag = make_ktag(&init_user_ns, dx_current_tag());
bb20add7 24652 acred.group_info = cred->group_info;
4bf69007 24653 ret = auth->au_ops->lookup_cred(auth, &acred, flags);
bb20add7
AM
24654 return ret;
24655@@ -669,6 +671,7 @@ rpcauth_bind_root_cred(struct rpc_task *
4bf69007 24656 struct auth_cred acred = {
b00e13aa
AM
24657 .uid = GLOBAL_ROOT_UID,
24658 .gid = GLOBAL_ROOT_GID,
a4a22af8 24659+ .tag = KTAGT_INIT(dx_current_tag()),
4bf69007
AM
24660 };
24661
24662 dprintk("RPC: %5u looking up %s cred\n",
8931d859
AM
24663diff -NurpP --minimal linux-4.4.161/net/sunrpc/auth_unix.c linux-4.4.161-vs2.3.9.8/net/sunrpc/auth_unix.c
24664--- linux-4.4.161/net/sunrpc/auth_unix.c 2016-01-10 23:01:32.000000000 +0000
24665+++ linux-4.4.161-vs2.3.9.8/net/sunrpc/auth_unix.c 2018-10-20 04:57:21.000000000 +0000
4bf69007
AM
24666@@ -13,11 +13,13 @@
24667 #include <linux/sunrpc/clnt.h>
24668 #include <linux/sunrpc/auth.h>
24669 #include <linux/user_namespace.h>
24670+#include <linux/vs_tag.h>
24671
24672 #define NFS_NGROUPS 16
24673
24674 struct unx_cred {
24675 struct rpc_cred uc_base;
b00e13aa
AM
24676+ ktag_t uc_tag;
24677 kgid_t uc_gid;
24678 kgid_t uc_gids[NFS_NGROUPS];
4bf69007 24679 };
b00e13aa 24680@@ -80,6 +82,7 @@ unx_create_cred(struct rpc_auth *auth, s
4bf69007
AM
24681 groups = NFS_NGROUPS;
24682
24683 cred->uc_gid = acred->gid;
24684+ cred->uc_tag = acred->tag;
b00e13aa
AM
24685 for (i = 0; i < groups; i++)
24686 cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
24687 if (i < NFS_NGROUPS)
24688@@ -121,7 +124,9 @@ unx_match(struct auth_cred *acred, struc
4bf69007
AM
24689 unsigned int i;
24690
24691
b00e13aa
AM
24692- if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid))
24693+ if (!uid_eq(cred->uc_uid, acred->uid) ||
24694+ !gid_eq(cred->uc_gid, acred->gid) ||
24695+ !tag_eq(cred->uc_tag, acred->tag))
4bf69007
AM
24696 return 0;
24697
24698 if (acred->group_info != NULL)
b00e13aa 24699@@ -146,7 +151,7 @@ unx_marshal(struct rpc_task *task, __be3
4bf69007
AM
24700 struct rpc_clnt *clnt = task->tk_client;
24701 struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
24702 __be32 *base, *hold;
24703- int i;
24704+ int i, tag;
24705
24706 *p++ = htonl(RPC_AUTH_UNIX);
24707 base = p++;
a4a22af8 24708@@ -157,8 +162,11 @@ unx_marshal(struct rpc_task *task, __be3
4bf69007
AM
24709 */
24710 p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
4bf69007 24711
b00e13aa
AM
24712- *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
24713- *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
24714+ tag = task->tk_client->cl_tag;
a4a22af8
AM
24715+ *p++ = htonl((u32) from_kuid(&init_user_ns,
24716+ TAGINO_KUID(tag, cred->uc_uid, cred->uc_tag)));
24717+ *p++ = htonl((u32) from_kgid(&init_user_ns,
24718+ TAGINO_KGID(tag, cred->uc_gid, cred->uc_tag)));
4bf69007 24719 hold = p++;
b00e13aa
AM
24720 for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++)
24721 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
8931d859
AM
24722diff -NurpP --minimal linux-4.4.161/net/sunrpc/clnt.c linux-4.4.161-vs2.3.9.8/net/sunrpc/clnt.c
24723--- linux-4.4.161/net/sunrpc/clnt.c 2018-10-20 02:34:31.000000000 +0000
24724+++ linux-4.4.161-vs2.3.9.8/net/sunrpc/clnt.c 2018-10-20 04:57:21.000000000 +0000
4bf69007 24725@@ -31,6 +31,7 @@
c2e5f7c8 24726 #include <linux/in.h>
4bf69007
AM
24727 #include <linux/in6.h>
24728 #include <linux/un.h>
4bf69007
AM
24729+#include <linux/vs_cvirt.h>
24730
24731 #include <linux/sunrpc/clnt.h>
b00e13aa 24732 #include <linux/sunrpc/addr.h>
927ca606 24733@@ -477,6 +478,9 @@ static struct rpc_clnt *rpc_create_xprt(
4bf69007
AM
24734 if (!(args->flags & RPC_CLNT_CREATE_QUIET))
24735 clnt->cl_chatty = 1;
24736
24737+ /* TODO: handle RPC_CLNT_CREATE_TAGGED
24738+ if (args->flags & RPC_CLNT_CREATE_TAGGED)
24739+ clnt->cl_tag = 1; */
24740 return clnt;
24741 }
927ca606 24742
8931d859
AM
24743diff -NurpP --minimal linux-4.4.161/net/unix/af_unix.c linux-4.4.161-vs2.3.9.8/net/unix/af_unix.c
24744--- linux-4.4.161/net/unix/af_unix.c 2018-10-20 02:34:31.000000000 +0000
24745+++ linux-4.4.161-vs2.3.9.8/net/unix/af_unix.c 2018-10-20 04:57:21.000000000 +0000
bb20add7 24746@@ -117,6 +117,8 @@
4bf69007
AM
24747 #include <net/checksum.h>
24748 #include <linux/security.h>
c2e5f7c8 24749 #include <linux/freezer.h>
4bf69007
AM
24750+#include <linux/vs_context.h>
24751+#include <linux/vs_limit.h>
24752
24753 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
24754 EXPORT_SYMBOL_GPL(unix_socket_table);
927ca606 24755@@ -282,6 +284,8 @@ static struct sock *__unix_find_socket_b
4bf69007
AM
24756 if (!net_eq(sock_net(s), net))
24757 continue;
24758
24759+ if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
24760+ continue;
24761 if (u->addr->len == len &&
24762 !memcmp(u->addr->name, sunname, len))
24763 goto found;
927ca606 24764@@ -2741,6 +2745,8 @@ static struct sock *unix_from_bucket(str
4bf69007
AM
24765 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
24766 if (sock_net(sk) != seq_file_net(seq))
24767 continue;
24768+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24769+ continue;
24770 if (++count == offset)
24771 break;
24772 }
927ca606 24773@@ -2758,6 +2764,8 @@ static struct sock *unix_next_socket(str
4bf69007
AM
24774 sk = sk_next(sk);
24775 if (!sk)
24776 goto next_bucket;
24777+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24778+ continue;
24779 if (sock_net(sk) == seq_file_net(seq))
24780 return sk;
24781 }
8931d859
AM
24782diff -NurpP --minimal linux-4.4.161/net/unix/diag.c linux-4.4.161-vs2.3.9.8/net/unix/diag.c
24783--- linux-4.4.161/net/unix/diag.c 2018-10-20 02:34:31.000000000 +0000
24784+++ linux-4.4.161-vs2.3.9.8/net/unix/diag.c 2018-10-20 04:57:21.000000000 +0000
24785@@ -4,6 +4,7 @@
24786 #include <linux/unix_diag.h>
24787 #include <linux/skbuff.h>
24788 #include <linux/module.h>
24789+#include <linux/vs_network.h>
24790 #include <net/netlink.h>
24791 #include <net/af_unix.h>
24792 #include <net/tcp_states.h>
24793@@ -199,6 +200,8 @@ static int unix_diag_dump(struct sk_buff
24794 sk_for_each(sk, &unix_socket_table[slot]) {
24795 if (!net_eq(sock_net(sk), net))
24796 continue;
24797+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24798+ continue;
24799 if (num < s_num)
24800 goto next;
24801 if (!(req->udiag_states & (1 << sk->sk_state)))
24802diff -NurpP --minimal linux-4.4.161/scripts/checksyscalls.sh linux-4.4.161-vs2.3.9.8/scripts/checksyscalls.sh
24803--- linux-4.4.161/scripts/checksyscalls.sh 2016-01-10 23:01:32.000000000 +0000
24804+++ linux-4.4.161-vs2.3.9.8/scripts/checksyscalls.sh 2018-10-20 04:57:21.000000000 +0000
bb20add7 24805@@ -196,7 +196,6 @@ cat << EOF
4bf69007
AM
24806 #define __IGNORE_afs_syscall
24807 #define __IGNORE_getpmsg
24808 #define __IGNORE_putpmsg
24809-#define __IGNORE_vserver
24810 EOF
24811 }
24812
8931d859
AM
24813diff -NurpP --minimal linux-4.4.161/security/commoncap.c linux-4.4.161-vs2.3.9.8/security/commoncap.c
24814--- linux-4.4.161/security/commoncap.c 2018-10-20 02:34:31.000000000 +0000
24815+++ linux-4.4.161-vs2.3.9.8/security/commoncap.c 2018-10-20 04:57:21.000000000 +0000
927ca606 24816@@ -71,6 +71,7 @@ static void warn_setuid_and_fcaps_mixed(
4bf69007
AM
24817 int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
24818 int cap, int audit)
24819 {
24820+ struct vx_info *vxi = current_vx_info(); /* FIXME: get vxi from cred? */
b00e13aa 24821 struct user_namespace *ns = targ_ns;
4bf69007 24822
b00e13aa 24823 /* See if cred has the capability in the target user namespace
927ca606 24824@@ -79,8 +80,12 @@ int cap_capable(const struct cred *cred,
b00e13aa
AM
24825 */
24826 for (;;) {
4bf69007 24827 /* Do we have the necessary capabilities? */
b00e13aa 24828- if (ns == cred->user_ns)
4bf69007 24829- return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
b00e13aa 24830+ if (ns == cred->user_ns) {
4bf69007
AM
24831+ if (vx_info_flags(vxi, VXF_STATE_SETUP, 0) &&
24832+ cap_raised(cred->cap_effective, cap))
24833+ return 0;
24834+ return vx_cap_raised(vxi, cred->cap_effective, cap) ? 0 : -EPERM;
24835+ }
24836
24837 /* Have we tried all of the parent namespaces? */
b00e13aa 24838 if (ns == &init_user_ns)
927ca606 24839@@ -664,7 +669,7 @@ int cap_inode_setxattr(struct dentry *de
4bf69007
AM
24840
24841 if (!strncmp(name, XATTR_SECURITY_PREFIX,
24842 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
24843- !capable(CAP_SYS_ADMIN))
24844+ !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
24845 return -EPERM;
24846 return 0;
24847 }
927ca606 24848@@ -690,7 +695,7 @@ int cap_inode_removexattr(struct dentry
4bf69007
AM
24849
24850 if (!strncmp(name, XATTR_SECURITY_PREFIX,
24851 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
24852- !capable(CAP_SYS_ADMIN))
24853+ !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
24854 return -EPERM;
24855 return 0;
24856 }
8931d859
AM
24857diff -NurpP --minimal linux-4.4.161/security/selinux/hooks.c linux-4.4.161-vs2.3.9.8/security/selinux/hooks.c
24858--- linux-4.4.161/security/selinux/hooks.c 2018-10-20 02:34:31.000000000 +0000
24859+++ linux-4.4.161-vs2.3.9.8/security/selinux/hooks.c 2018-10-20 04:57:21.000000000 +0000
927ca606 24860@@ -67,7 +67,6 @@
4bf69007
AM
24861 #include <linux/dccp.h>
24862 #include <linux/quota.h>
24863 #include <linux/un.h> /* for Unix socket types */
24864-#include <net/af_unix.h> /* for Unix socket types */
24865 #include <linux/parser.h>
24866 #include <linux/nfs_mount.h>
24867 #include <net/ipv6.h>
This page took 5.181566 seconds and 4 git commands to generate.