]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-vserver-2.3.patch
- 4.9.207
[packages/kernel.git] / kernel-vserver-2.3.patch
CommitLineData
09a55596
AM
1diff -NurpP --minimal linux-4.9.135/Documentation/vserver/debug.txt linux-4.9.135-vs2.3.9.8/Documentation/vserver/debug.txt
2--- linux-4.9.135/Documentation/vserver/debug.txt 1970-01-01 00:00:00.000000000 +0000
3+++ linux-4.9.135-vs2.3.9.8/Documentation/vserver/debug.txt 2018-10-20 04:58:12.000000000 +0000
d33d7b00
AM
4@@ -0,0 +1,154 @@
5+
6+debug_cvirt:
7+
8+ 2 4 "vx_map_tgid: %p/%llx: %d -> %d"
9+ "vx_rmap_tgid: %p/%llx: %d -> %d"
10+
11+debug_dlim:
12+
13+ 0 1 "ALLOC (%p,#%d)%c inode (%d)"
14+ "FREE (%p,#%d)%c inode"
15+ 1 2 "ALLOC (%p,#%d)%c %lld bytes (%d)"
16+ "FREE (%p,#%d)%c %lld bytes"
17+ 2 4 "ADJUST: %lld,%lld on %ld,%ld [mult=%d]"
18+ 3 8 "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d"
19+ "ext3_has_free_blocks(%p): free=%lu, root=%lu"
20+ "rcu_free_dl_info(%p)"
21+ 4 10 "alloc_dl_info(%p,%d) = %p"
22+ "dealloc_dl_info(%p)"
23+ "get_dl_info(%p[#%d.%d])"
24+ "put_dl_info(%p[#%d.%d])"
25+ 5 20 "alloc_dl_info(%p,%d)*"
26+ 6 40 "__hash_dl_info: %p[#%d]"
27+ "__unhash_dl_info: %p[#%d]"
28+ 7 80 "locate_dl_info(%p,#%d) = %p"
29+
30+debug_misc:
31+
32+ 0 1 "destroy_dqhash: %p [#0x%08x] c=%d"
33+ "new_dqhash: %p [#0x%08x]"
34+ "vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]"
35+ "vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]"
36+ "vroot[%d]_set_dev: dev=%p[%lu,%d:%d]"
37+ "vroot_get_real_bdev not set"
cc23e853
AM
38+ 1 2 "cow_break_link(?%s?)"
39+ "temp copy ?%s?"
d33d7b00
AM
40+ 2 4 "dentry_open(new): %p"
41+ "dentry_open(old): %p"
42+ "lookup_create(new): %p"
cc23e853 43+ "old path ?%s?"
d33d7b00
AM
44+ "path_lookup(old): %d"
45+ "vfs_create(new): %d"
46+ "vfs_rename: %d"
47+ "vfs_sendfile: %d"
48+ 3 8 "fput(new_file=%p[#%d])"
49+ "fput(old_file=%p[#%d])"
50+ 4 10 "vx_info_kill(%p[#%d],%d,%d) = %d"
51+ "vx_info_kill(%p[#%d],%d,%d)*"
52+ 5 20 "vs_reboot(%p[#%d],%d)"
53+ 6 40 "dropping task %p[#%u,%u] for %p[#%u,%u]"
54+
55+debug_net:
56+
57+ 2 4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d"
58+ 3 8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d"
59+ "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d"
60+ 4 10 "ip_route_connect(%p) %p,%p;%lx"
61+ 5 20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx"
62+ 6 40 "sk,egf: %p [#%d] (from %d)"
63+ "sk,egn: %p [#%d] (from %d)"
64+ "sk,req: %p [#%d] (from %d)"
65+ "sk: %p [#%d] (from %d)"
66+ "tw: %p [#%d] (from %d)"
67+ 7 80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d"
68+ "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d"
69+
70+debug_nid:
71+
72+ 0 1 "__lookup_nx_info(#%u): %p[#%u]"
73+ "alloc_nx_info(%d) = %p"
74+ "create_nx_info(%d) (dynamic rejected)"
75+ "create_nx_info(%d) = %p (already there)"
76+ "create_nx_info(%d) = %p (new)"
77+ "dealloc_nx_info(%p)"
78+ 1 2 "alloc_nx_info(%d)*"
79+ "create_nx_info(%d)*"
80+ 2 4 "get_nx_info(%p[#%d.%d])"
81+ "put_nx_info(%p[#%d.%d])"
82+ 3 8 "claim_nx_info(%p[#%d.%d.%d]) %p"
83+ "clr_nx_info(%p[#%d.%d])"
84+ "init_nx_info(%p[#%d.%d])"
85+ "release_nx_info(%p[#%d.%d.%d]) %p"
86+ "set_nx_info(%p[#%d.%d])"
87+ 4 10 "__hash_nx_info: %p[#%d]"
88+ "__nx_dynamic_id: [#%d]"
89+ "__unhash_nx_info: %p[#%d.%d.%d]"
90+ 5 20 "moved task %p into nxi:%p[#%d]"
91+ "nx_migrate_task(%p,%p[#%d.%d.%d])"
92+ "task_get_nx_info(%p)"
93+ 6 40 "nx_clear_persistent(%p[#%d])"
94+
95+debug_quota:
96+
97+ 0 1 "quota_sync_dqh(%p,%d) discard inode %p"
98+ 1 2 "quota_sync_dqh(%p,%d)"
99+ "sync_dquots(%p,%d)"
100+ "sync_dquots_dqh(%p,%d)"
101+ 3 8 "do_quotactl(%p,%d,cmd=%d,id=%d,%p)"
102+
103+debug_switch:
104+
105+ 0 1 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]"
106+ 1 2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]"
107+ 4 10 "%s: (%s %s) returned %s with %d"
108+
109+debug_tag:
110+
cc23e853 111+ 7 80 "dx_parse_tag(?%s?): %d:#%d"
d33d7b00
AM
112+ "dx_propagate_tag(%p[#%lu.%d]): %d,%d"
113+
114+debug_xid:
115+
116+ 0 1 "__lookup_vx_info(#%u): %p[#%u]"
117+ "alloc_vx_info(%d) = %p"
118+ "alloc_vx_info(%d)*"
119+ "create_vx_info(%d) (dynamic rejected)"
120+ "create_vx_info(%d) = %p (already there)"
121+ "create_vx_info(%d) = %p (new)"
122+ "dealloc_vx_info(%p)"
123+ "loc_vx_info(%d) = %p (found)"
124+ "loc_vx_info(%d) = %p (new)"
125+ "loc_vx_info(%d) = %p (not available)"
126+ 1 2 "create_vx_info(%d)*"
127+ "loc_vx_info(%d)*"
128+ 2 4 "get_vx_info(%p[#%d.%d])"
129+ "put_vx_info(%p[#%d.%d])"
130+ 3 8 "claim_vx_info(%p[#%d.%d.%d]) %p"
131+ "clr_vx_info(%p[#%d.%d])"
132+ "init_vx_info(%p[#%d.%d])"
133+ "release_vx_info(%p[#%d.%d.%d]) %p"
134+ "set_vx_info(%p[#%d.%d])"
135+ 4 10 "__hash_vx_info: %p[#%d]"
136+ "__unhash_vx_info: %p[#%d.%d.%d]"
137+ "__vx_dynamic_id: [#%d]"
138+ 5 20 "enter_vx_info(%p[#%d],%p) %p[#%d,%p]"
139+ "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]"
140+ "moved task %p into vxi:%p[#%d]"
141+ "task_get_vx_info(%p)"
142+ "vx_migrate_task(%p,%p[#%d.%d])"
143+ 6 40 "vx_clear_persistent(%p[#%d])"
144+ "vx_exit_init(%p[#%d],%p[#%d,%d,%d])"
145+ "vx_set_init(%p[#%d],%p[#%d,%d,%d])"
146+ "vx_set_persistent(%p[#%d])"
147+ "vx_set_reaper(%p[#%d],%p[#%d,%d])"
148+ 7 80 "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]"
149+
150+
151+debug_limit:
152+
153+ n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s"
154+ "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
155+
156+ m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s"
157+ "vx_acc_pages[%5d,%s,%2d]: %5d += %5d"
158+ "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d"
09a55596
AM
159diff -NurpP --minimal linux-4.9.135/arch/alpha/Kconfig linux-4.9.135-vs2.3.9.8/arch/alpha/Kconfig
160--- linux-4.9.135/arch/alpha/Kconfig 2016-12-11 19:17:54.000000000 +0000
161+++ linux-4.9.135-vs2.3.9.8/arch/alpha/Kconfig 2018-10-20 04:58:12.000000000 +0000
cc23e853 162@@ -743,6 +743,8 @@ config DUMMY_CONSOLE
2380c486
JR
163 depends on VGA_HOSE
164 default y
d337f35e
JR
165
166+source "kernel/vserver/Kconfig"
167+
168 source "security/Kconfig"
169
170 source "crypto/Kconfig"
09a55596
AM
171diff -NurpP --minimal linux-4.9.135/arch/alpha/kernel/systbls.S linux-4.9.135-vs2.3.9.8/arch/alpha/kernel/systbls.S
172--- linux-4.9.135/arch/alpha/kernel/systbls.S 2016-12-11 19:17:54.000000000 +0000
173+++ linux-4.9.135-vs2.3.9.8/arch/alpha/kernel/systbls.S 2018-10-20 04:58:12.000000000 +0000
d337f35e
JR
174@@ -446,7 +446,7 @@ sys_call_table:
175 .quad sys_stat64 /* 425 */
176 .quad sys_lstat64
177 .quad sys_fstat64
178- .quad sys_ni_syscall /* sys_vserver */
179+ .quad sys_vserver /* sys_vserver */
180 .quad sys_ni_syscall /* sys_mbind */
181 .quad sys_ni_syscall /* sys_get_mempolicy */
182 .quad sys_ni_syscall /* sys_set_mempolicy */
09a55596
AM
183diff -NurpP --minimal linux-4.9.135/arch/alpha/kernel/traps.c linux-4.9.135-vs2.3.9.8/arch/alpha/kernel/traps.c
184--- linux-4.9.135/arch/alpha/kernel/traps.c 2018-10-20 10:38:42.000000000 +0000
185+++ linux-4.9.135-vs2.3.9.8/arch/alpha/kernel/traps.c 2018-10-20 04:58:12.000000000 +0000
cef7ea10 186@@ -179,7 +179,8 @@ die_if_kernel(char * str, struct pt_regs
d337f35e
JR
187 #ifdef CONFIG_SMP
188 printk("CPU %d ", hard_smp_processor_id());
189 #endif
2380c486 190- printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
61333608 191+ printk("%s(%d:#%u): %s %ld\n", current->comm,
2380c486 192+ task_pid_nr(current), current->xid, str, err);
d337f35e 193 dik_show_regs(regs, r9_15);
b00e13aa 194 add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
d337f35e 195 dik_show_trace((unsigned long *)(regs+1));
09a55596
AM
196diff -NurpP --minimal linux-4.9.135/arch/arm/Kconfig linux-4.9.135-vs2.3.9.8/arch/arm/Kconfig
197--- linux-4.9.135/arch/arm/Kconfig 2016-12-11 19:17:54.000000000 +0000
198+++ linux-4.9.135-vs2.3.9.8/arch/arm/Kconfig 2018-10-20 04:58:12.000000000 +0000
cc23e853 199@@ -2199,6 +2199,8 @@ source "fs/Kconfig"
d337f35e
JR
200
201 source "arch/arm/Kconfig.debug"
202
203+source "kernel/vserver/Kconfig"
204+
205 source "security/Kconfig"
206
207 source "crypto/Kconfig"
09a55596
AM
208diff -NurpP --minimal linux-4.9.135/arch/arm/kernel/calls.S linux-4.9.135-vs2.3.9.8/arch/arm/kernel/calls.S
209--- linux-4.9.135/arch/arm/kernel/calls.S 2016-12-11 19:17:54.000000000 +0000
210+++ linux-4.9.135-vs2.3.9.8/arch/arm/kernel/calls.S 2018-10-20 04:58:12.000000000 +0000
d337f35e
JR
211@@ -322,7 +322,7 @@
212 /* 310 */ CALL(sys_request_key)
213 CALL(sys_keyctl)
214 CALL(ABI(sys_semtimedop, sys_oabi_semtimedop))
215-/* vserver */ CALL(sys_ni_syscall)
216+ CALL(sys_vserver)
217 CALL(sys_ioprio_set)
218 /* 315 */ CALL(sys_ioprio_get)
219 CALL(sys_inotify_init)
09a55596
AM
220diff -NurpP --minimal linux-4.9.135/arch/arm/kernel/traps.c linux-4.9.135-vs2.3.9.8/arch/arm/kernel/traps.c
221--- linux-4.9.135/arch/arm/kernel/traps.c 2018-10-20 10:38:42.000000000 +0000
222+++ linux-4.9.135-vs2.3.9.8/arch/arm/kernel/traps.c 2018-10-20 04:58:12.000000000 +0000
223@@ -279,8 +279,8 @@ static int __die(const char *str, int er
78865d5b 224
d337f35e
JR
225 print_modules();
226 __show_regs(regs);
cc23e853
AM
227- pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
228- TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
229+ pr_emerg("Process %.*s (pid: %d:%u, stack limit = 0x%p)\n",
230+ TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), tsk->xid, end_of_stack(tsk));
d337f35e
JR
231
232 if (!user_mode(regs) || in_interrupt()) {
7e46296a 233 dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
09a55596
AM
234diff -NurpP --minimal linux-4.9.135/arch/cris/Kconfig linux-4.9.135-vs2.3.9.8/arch/cris/Kconfig
235--- linux-4.9.135/arch/cris/Kconfig 2016-12-11 19:17:54.000000000 +0000
236+++ linux-4.9.135-vs2.3.9.8/arch/cris/Kconfig 2018-10-20 04:58:12.000000000 +0000
cc23e853 237@@ -583,6 +583,8 @@ source "fs/Kconfig"
d337f35e
JR
238
239 source "arch/cris/Kconfig.debug"
240
241+source "kernel/vserver/Kconfig"
242+
243 source "security/Kconfig"
244
245 source "crypto/Kconfig"
09a55596
AM
246diff -NurpP --minimal linux-4.9.135/arch/ia64/Kconfig linux-4.9.135-vs2.3.9.8/arch/ia64/Kconfig
247--- linux-4.9.135/arch/ia64/Kconfig 2016-12-11 19:17:54.000000000 +0000
248+++ linux-4.9.135-vs2.3.9.8/arch/ia64/Kconfig 2018-10-20 04:58:12.000000000 +0000
cc23e853 249@@ -602,6 +602,8 @@ source "fs/Kconfig"
2380c486
JR
250
251 source "arch/ia64/Kconfig.debug"
d337f35e
JR
252
253+source "kernel/vserver/Kconfig"
254+
255 source "security/Kconfig"
256
257 source "crypto/Kconfig"
09a55596
AM
258diff -NurpP --minimal linux-4.9.135/arch/ia64/kernel/entry.S linux-4.9.135-vs2.3.9.8/arch/ia64/kernel/entry.S
259--- linux-4.9.135/arch/ia64/kernel/entry.S 2016-12-11 19:17:54.000000000 +0000
260+++ linux-4.9.135-vs2.3.9.8/arch/ia64/kernel/entry.S 2018-10-20 04:58:12.000000000 +0000
cc23e853 261@@ -1697,7 +1697,7 @@ sys_call_table:
2380c486
JR
262 data8 sys_mq_notify
263 data8 sys_mq_getsetattr
264 data8 sys_kexec_load
265- data8 sys_ni_syscall // reserved for vserver
266+ data8 sys_vserver
267 data8 sys_waitid // 1270
268 data8 sys_add_key
269 data8 sys_request_key
09a55596
AM
270diff -NurpP --minimal linux-4.9.135/arch/ia64/kernel/ptrace.c linux-4.9.135-vs2.3.9.8/arch/ia64/kernel/ptrace.c
271--- linux-4.9.135/arch/ia64/kernel/ptrace.c 2018-10-20 10:38:44.000000000 +0000
272+++ linux-4.9.135-vs2.3.9.8/arch/ia64/kernel/ptrace.c 2018-10-20 04:58:12.000000000 +0000
78865d5b 273@@ -21,6 +21,7 @@
2380c486 274 #include <linux/regset.h>
d337f35e 275 #include <linux/elf.h>
ec22aa5c 276 #include <linux/tracehook.h>
d337f35e
JR
277+#include <linux/vs_base.h>
278
279 #include <asm/pgtable.h>
280 #include <asm/processor.h>
09a55596
AM
281diff -NurpP --minimal linux-4.9.135/arch/ia64/kernel/traps.c linux-4.9.135-vs2.3.9.8/arch/ia64/kernel/traps.c
282--- linux-4.9.135/arch/ia64/kernel/traps.c 2016-12-11 19:17:54.000000000 +0000
283+++ linux-4.9.135-vs2.3.9.8/arch/ia64/kernel/traps.c 2018-10-20 04:58:12.000000000 +0000
1e8b8f9b 284@@ -60,8 +60,9 @@ die (const char *str, struct pt_regs *re
d337f35e
JR
285 put_cpu();
286
287 if (++die.lock_owner_depth < 3) {
288- printk("%s[%d]: %s %ld [%d]\n",
2380c486 289- current->comm, task_pid_nr(current), str, err, ++die_counter);
61333608 290+ printk("%s[%d:#%u]: %s %ld [%d]\n",
2380c486 291+ current->comm, task_pid_nr(current), current->xid,
d337f35e 292+ str, err, ++die_counter);
2380c486
JR
293 if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
294 != NOTIFY_STOP)
295 show_regs(regs);
1e8b8f9b 296@@ -324,8 +325,9 @@ handle_fpu_swa (int fp_fault, struct pt_
2380c486
JR
297 if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) {
298 last.time = current_jiffies + 5 * HZ;
299 printk(KERN_WARNING
300- "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
301- current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
61333608 302+ "%s(%d:#%u): floating-point assist fault at ip %016lx, isr %016lx\n",
2380c486
JR
303+ current->comm, task_pid_nr(current), current->xid,
304+ regs->cr_iip + ia64_psr(regs)->ri, isr);
305 }
306 }
d337f35e 307 }
09a55596
AM
308diff -NurpP --minimal linux-4.9.135/arch/m32r/kernel/traps.c linux-4.9.135-vs2.3.9.8/arch/m32r/kernel/traps.c
309--- linux-4.9.135/arch/m32r/kernel/traps.c 2016-12-11 19:17:54.000000000 +0000
310+++ linux-4.9.135-vs2.3.9.8/arch/m32r/kernel/traps.c 2018-10-20 04:58:12.000000000 +0000
09be7631 311@@ -184,8 +184,9 @@ static void show_registers(struct pt_reg
d337f35e
JR
312 } else {
313 printk("SPI: %08lx\n", sp);
314 }
315- printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
2380c486 316- current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
61333608 317+ printk("Process %s (pid: %d:#%u, process nr: %d, stackpage=%08lx)",
2380c486 318+ current->comm, task_pid_nr(current), current->xid,
d337f35e
JR
319+ 0xffff & i, 4096+(unsigned long)current);
320
321 /*
322 * When in-kernel, we also print out the stack and code at the
09a55596
AM
323diff -NurpP --minimal linux-4.9.135/arch/m68k/Kconfig linux-4.9.135-vs2.3.9.8/arch/m68k/Kconfig
324--- linux-4.9.135/arch/m68k/Kconfig 2016-12-11 19:17:54.000000000 +0000
325+++ linux-4.9.135-vs2.3.9.8/arch/m68k/Kconfig 2018-10-20 04:58:12.000000000 +0000
cc23e853 326@@ -163,6 +163,8 @@ source "fs/Kconfig"
d337f35e
JR
327
328 source "arch/m68k/Kconfig.debug"
329
330+source "kernel/vserver/Kconfig"
331+
332 source "security/Kconfig"
333
334 source "crypto/Kconfig"
09a55596
AM
335diff -NurpP --minimal linux-4.9.135/arch/mips/Kconfig linux-4.9.135-vs2.3.9.8/arch/mips/Kconfig
336--- linux-4.9.135/arch/mips/Kconfig 2018-10-20 10:38:44.000000000 +0000
337+++ linux-4.9.135-vs2.3.9.8/arch/mips/Kconfig 2018-10-20 04:58:12.000000000 +0000
338@@ -3190,6 +3190,8 @@ source "fs/Kconfig"
d337f35e
JR
339
340 source "arch/mips/Kconfig.debug"
341
342+source "kernel/vserver/Kconfig"
343+
344 source "security/Kconfig"
345
346 source "crypto/Kconfig"
09a55596
AM
347diff -NurpP --minimal linux-4.9.135/arch/mips/kernel/ptrace.c linux-4.9.135-vs2.3.9.8/arch/mips/kernel/ptrace.c
348--- linux-4.9.135/arch/mips/kernel/ptrace.c 2018-10-20 10:38:44.000000000 +0000
349+++ linux-4.9.135-vs2.3.9.8/arch/mips/kernel/ptrace.c 2018-10-20 05:55:33.000000000 +0000
cc23e853 350@@ -30,6 +30,7 @@
2380c486
JR
351 #include <linux/audit.h>
352 #include <linux/seccomp.h>
c2e5f7c8 353 #include <linux/ftrace.h>
d337f35e
JR
354+#include <linux/vs_base.h>
355
356 #include <asm/byteorder.h>
357 #include <asm/cpu.h>
09a55596 358@@ -798,6 +799,9 @@ long arch_ptrace(struct task_struct *chi
ab30d09f
AM
359 void __user *datavp = (void __user *) data;
360 unsigned long __user *datalp = (void __user *) data;
d337f35e 361
2380c486 362+ if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT))
d337f35e
JR
363+ goto out;
364+
365 switch (request) {
366 /* when I and D space are separate, these will need to be fixed. */
367 case PTRACE_PEEKTEXT: /* read word at location addr. */
09a55596
AM
368diff -NurpP --minimal linux-4.9.135/arch/mips/kernel/scall32-o32.S linux-4.9.135-vs2.3.9.8/arch/mips/kernel/scall32-o32.S
369--- linux-4.9.135/arch/mips/kernel/scall32-o32.S 2018-10-20 10:38:44.000000000 +0000
370+++ linux-4.9.135-vs2.3.9.8/arch/mips/kernel/scall32-o32.S 2018-10-20 04:58:12.000000000 +0000
cc23e853 371@@ -511,7 +511,7 @@ EXPORT(sys_call_table)
c2e5f7c8
JR
372 PTR sys_mq_timedreceive
373 PTR sys_mq_notify /* 4275 */
374 PTR sys_mq_getsetattr
375- PTR sys_ni_syscall /* sys_vserver */
376+ PTR sys_vserver
377 PTR sys_waitid
378 PTR sys_ni_syscall /* available, was setaltroot */
379 PTR sys_add_key /* 4280 */
09a55596
AM
380diff -NurpP --minimal linux-4.9.135/arch/mips/kernel/scall64-64.S linux-4.9.135-vs2.3.9.8/arch/mips/kernel/scall64-64.S
381--- linux-4.9.135/arch/mips/kernel/scall64-64.S 2018-10-20 10:38:44.000000000 +0000
382+++ linux-4.9.135-vs2.3.9.8/arch/mips/kernel/scall64-64.S 2018-10-20 04:58:12.000000000 +0000
cc23e853 383@@ -348,7 +348,7 @@ EXPORT(sys_call_table)
d337f35e
JR
384 PTR sys_mq_timedreceive
385 PTR sys_mq_notify
386 PTR sys_mq_getsetattr /* 5235 */
387- PTR sys_ni_syscall /* sys_vserver */
388+ PTR sys_vserver
389 PTR sys_waitid
390 PTR sys_ni_syscall /* available, was setaltroot */
391 PTR sys_add_key
09a55596
AM
392diff -NurpP --minimal linux-4.9.135/arch/mips/kernel/scall64-n32.S linux-4.9.135-vs2.3.9.8/arch/mips/kernel/scall64-n32.S
393--- linux-4.9.135/arch/mips/kernel/scall64-n32.S 2018-10-20 10:38:44.000000000 +0000
394+++ linux-4.9.135-vs2.3.9.8/arch/mips/kernel/scall64-n32.S 2018-10-20 04:58:12.000000000 +0000
cc23e853 395@@ -343,7 +343,7 @@ EXPORT(sysn32_call_table)
d337f35e
JR
396 PTR compat_sys_mq_timedreceive
397 PTR compat_sys_mq_notify
398 PTR compat_sys_mq_getsetattr
399- PTR sys_ni_syscall /* 6240, sys_vserver */
400+ PTR sys32_vserver /* 6240 */
2380c486 401 PTR compat_sys_waitid
d337f35e
JR
402 PTR sys_ni_syscall /* available, was setaltroot */
403 PTR sys_add_key
09a55596
AM
404diff -NurpP --minimal linux-4.9.135/arch/mips/kernel/scall64-o32.S linux-4.9.135-vs2.3.9.8/arch/mips/kernel/scall64-o32.S
405--- linux-4.9.135/arch/mips/kernel/scall64-o32.S 2018-10-20 10:38:44.000000000 +0000
406+++ linux-4.9.135-vs2.3.9.8/arch/mips/kernel/scall64-o32.S 2018-10-20 04:58:12.000000000 +0000
cc23e853 407@@ -499,7 +499,7 @@ EXPORT(sys32_call_table)
d337f35e
JR
408 PTR compat_sys_mq_timedreceive
409 PTR compat_sys_mq_notify /* 4275 */
410 PTR compat_sys_mq_getsetattr
411- PTR sys_ni_syscall /* sys_vserver */
412+ PTR sys32_vserver
b00e13aa 413 PTR compat_sys_waitid
d337f35e
JR
414 PTR sys_ni_syscall /* available, was setaltroot */
415 PTR sys_add_key /* 4280 */
09a55596
AM
416diff -NurpP --minimal linux-4.9.135/arch/mips/kernel/traps.c linux-4.9.135-vs2.3.9.8/arch/mips/kernel/traps.c
417--- linux-4.9.135/arch/mips/kernel/traps.c 2018-10-20 10:38:44.000000000 +0000
418+++ linux-4.9.135-vs2.3.9.8/arch/mips/kernel/traps.c 2018-10-20 04:58:13.000000000 +0000
419@@ -361,9 +361,10 @@ void show_registers(struct pt_regs *regs
2380c486
JR
420
421 __show_regs(regs);
d337f35e 422 print_modules();
2380c486
JR
423- printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
424- current->comm, current->pid, current_thread_info(), current,
425- field, current_thread_info()->tp_value);
426+ printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p, tls=%0*lx)\n",
427+ current->comm, task_pid_nr(current), current->xid,
428+ current_thread_info(), current,
429+ field, current_thread_info()->tp_value);
430 if (cpu_has_userlocal) {
431 unsigned long tls;
432
09a55596
AM
433diff -NurpP --minimal linux-4.9.135/arch/parisc/Kconfig linux-4.9.135-vs2.3.9.8/arch/parisc/Kconfig
434--- linux-4.9.135/arch/parisc/Kconfig 2018-10-20 10:38:44.000000000 +0000
435+++ linux-4.9.135-vs2.3.9.8/arch/parisc/Kconfig 2018-10-20 05:55:33.000000000 +0000
cc23e853 436@@ -348,6 +348,8 @@ config SECCOMP
d337f35e 437
bb20add7 438 If unsure, say Y. Only embedded should say N here.
d337f35e
JR
439
440+source "kernel/vserver/Kconfig"
441+
442 source "security/Kconfig"
443
444 source "crypto/Kconfig"
09a55596
AM
445diff -NurpP --minimal linux-4.9.135/arch/parisc/kernel/syscall_table.S linux-4.9.135-vs2.3.9.8/arch/parisc/kernel/syscall_table.S
446--- linux-4.9.135/arch/parisc/kernel/syscall_table.S 2018-10-20 10:38:45.000000000 +0000
447+++ linux-4.9.135-vs2.3.9.8/arch/parisc/kernel/syscall_table.S 2018-10-20 04:58:13.000000000 +0000
b00e13aa 448@@ -358,7 +358,7 @@
d337f35e
JR
449 ENTRY_COMP(mbind) /* 260 */
450 ENTRY_COMP(get_mempolicy)
451 ENTRY_COMP(set_mempolicy)
452- ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */
453+ ENTRY_DIFF(vserver)
454 ENTRY_SAME(add_key)
455 ENTRY_SAME(request_key) /* 265 */
cc23e853 456 ENTRY_COMP(keyctl)
09a55596
AM
457diff -NurpP --minimal linux-4.9.135/arch/parisc/kernel/traps.c linux-4.9.135-vs2.3.9.8/arch/parisc/kernel/traps.c
458--- linux-4.9.135/arch/parisc/kernel/traps.c 2016-12-11 19:17:54.000000000 +0000
459+++ linux-4.9.135-vs2.3.9.8/arch/parisc/kernel/traps.c 2018-10-20 04:58:13.000000000 +0000
cc23e853 460@@ -235,8 +235,9 @@ void die_if_kernel(char *str, struct pt_
d337f35e
JR
461 return; /* STFU */
462
98968f7b
JR
463 parisc_printk_ratelimited(1, regs,
464- KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
2380c486 465- current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
98968f7b 466+ KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n",
2380c486 467+ current->comm, task_pid_nr(current), current->xid,
d337f35e 468+ str, err, regs->iaoq[0]);
98968f7b
JR
469
470 return;
471 }
cc23e853 472@@ -266,8 +267,8 @@ void die_if_kernel(char *str, struct pt_
d337f35e
JR
473 pdc_console_restart();
474
2380c486
JR
475 if (err)
476- printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
477- current->comm, task_pid_nr(current), str, err);
478+ printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n",
479+ current->comm, task_pid_nr(current), current->xid, str, err);
480
481 /* Wot's wrong wif bein' racy? */
482 if (current->thread.flags & PARISC_KERNEL_DEATH) {
09a55596
AM
483diff -NurpP --minimal linux-4.9.135/arch/powerpc/Kconfig linux-4.9.135-vs2.3.9.8/arch/powerpc/Kconfig
484--- linux-4.9.135/arch/powerpc/Kconfig 2018-10-20 10:38:45.000000000 +0000
485+++ linux-4.9.135-vs2.3.9.8/arch/powerpc/Kconfig 2018-10-20 04:58:13.000000000 +0000
cef7ea10 486@@ -1086,6 +1086,8 @@ source "lib/Kconfig"
d33d7b00
AM
487
488 source "arch/powerpc/Kconfig.debug"
489
490+source "kernel/vserver/Kconfig"
491+
492 source "security/Kconfig"
493
cc23e853 494 source "crypto/Kconfig"
09a55596
AM
495diff -NurpP --minimal linux-4.9.135/arch/powerpc/include/uapi/asm/unistd.h linux-4.9.135-vs2.3.9.8/arch/powerpc/include/uapi/asm/unistd.h
496--- linux-4.9.135/arch/powerpc/include/uapi/asm/unistd.h 2016-12-11 19:17:54.000000000 +0000
497+++ linux-4.9.135-vs2.3.9.8/arch/powerpc/include/uapi/asm/unistd.h 2018-10-20 04:58:13.000000000 +0000
adc1caaa
AM
498@@ -275,7 +275,7 @@
499 #endif
500 #define __NR_rtas 255
501 #define __NR_sys_debug_setcontext 256
502-/* Number 257 is reserved for vserver */
503+#define __NR_vserver 257
504 #define __NR_migrate_pages 258
505 #define __NR_mbind 259
506 #define __NR_get_mempolicy 260
09a55596
AM
507diff -NurpP --minimal linux-4.9.135/arch/s390/Kconfig linux-4.9.135-vs2.3.9.8/arch/s390/Kconfig
508--- linux-4.9.135/arch/s390/Kconfig 2018-10-20 10:38:45.000000000 +0000
509+++ linux-4.9.135-vs2.3.9.8/arch/s390/Kconfig 2018-10-20 04:58:13.000000000 +0000
510@@ -775,6 +775,8 @@ source "fs/Kconfig"
d33d7b00
AM
511
512 source "arch/s390/Kconfig.debug"
513
514+source "kernel/vserver/Kconfig"
515+
516 source "security/Kconfig"
0411181d 517
d33d7b00 518 source "crypto/Kconfig"
09a55596
AM
519diff -NurpP --minimal linux-4.9.135/arch/s390/include/asm/tlb.h linux-4.9.135-vs2.3.9.8/arch/s390/include/asm/tlb.h
520--- linux-4.9.135/arch/s390/include/asm/tlb.h 2016-12-11 19:17:54.000000000 +0000
521+++ linux-4.9.135-vs2.3.9.8/arch/s390/include/asm/tlb.h 2018-10-20 04:58:13.000000000 +0000
dd5f3080 522@@ -24,6 +24,7 @@
0411181d 523 #include <linux/mm.h>
d33d7b00 524 #include <linux/pagemap.h>
0411181d 525 #include <linux/swap.h>
0411181d
AM
526+
527 #include <asm/processor.h>
528 #include <asm/pgalloc.h>
763640ca 529 #include <asm/tlbflush.h>
09a55596
AM
530diff -NurpP --minimal linux-4.9.135/arch/s390/include/uapi/asm/unistd.h linux-4.9.135-vs2.3.9.8/arch/s390/include/uapi/asm/unistd.h
531--- linux-4.9.135/arch/s390/include/uapi/asm/unistd.h 2016-12-11 19:17:54.000000000 +0000
532+++ linux-4.9.135-vs2.3.9.8/arch/s390/include/uapi/asm/unistd.h 2018-10-20 04:58:13.000000000 +0000
92598135 533@@ -200,7 +200,7 @@
cc23e853
AM
534 #define __NR_clock_gettime 260
535 #define __NR_clock_getres 261
536 #define __NR_clock_nanosleep 262
0411181d
AM
537-/* Number 263 is reserved for vserver */
538+#define __NR_vserver 263
539 #define __NR_statfs64 265
540 #define __NR_fstatfs64 266
541 #define __NR_remap_file_pages 267
09a55596
AM
542diff -NurpP --minimal linux-4.9.135/arch/s390/kernel/ptrace.c linux-4.9.135-vs2.3.9.8/arch/s390/kernel/ptrace.c
543--- linux-4.9.135/arch/s390/kernel/ptrace.c 2018-10-20 10:38:46.000000000 +0000
544+++ linux-4.9.135-vs2.3.9.8/arch/s390/kernel/ptrace.c 2018-10-20 04:58:13.000000000 +0000
db55b927 545@@ -21,6 +21,7 @@
ec22aa5c
AM
546 #include <linux/tracehook.h>
547 #include <linux/seccomp.h>
969f5c41 548 #include <linux/compat.h>
db55b927 549+#include <linux/vs_base.h>
ec22aa5c 550 #include <trace/syscall.h>
d337f35e 551 #include <asm/segment.h>
db55b927 552 #include <asm/page.h>
09a55596
AM
553diff -NurpP --minimal linux-4.9.135/arch/s390/kernel/syscalls.S linux-4.9.135-vs2.3.9.8/arch/s390/kernel/syscalls.S
554--- linux-4.9.135/arch/s390/kernel/syscalls.S 2018-10-20 10:38:46.000000000 +0000
555+++ linux-4.9.135-vs2.3.9.8/arch/s390/kernel/syscalls.S 2018-10-20 04:58:13.000000000 +0000
cc23e853
AM
556@@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,compat_sys_clo
557 SYSCALL(sys_clock_gettime,compat_sys_clock_gettime) /* 260 */
558 SYSCALL(sys_clock_getres,compat_sys_clock_getres)
559 SYSCALL(sys_clock_nanosleep,compat_sys_clock_nanosleep)
560-NI_SYSCALL /* reserved for vserver */
d337f35e 561+SYSCALL(sys_vserver,sys_vserver,sys32_vserver)
cc23e853
AM
562 SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
563 SYSCALL(sys_statfs64,compat_sys_statfs64)
564 SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
09a55596
AM
565diff -NurpP --minimal linux-4.9.135/arch/sh/Kconfig linux-4.9.135-vs2.3.9.8/arch/sh/Kconfig
566--- linux-4.9.135/arch/sh/Kconfig 2016-12-11 19:17:54.000000000 +0000
567+++ linux-4.9.135-vs2.3.9.8/arch/sh/Kconfig 2018-10-20 04:58:13.000000000 +0000
cc23e853 568@@ -904,6 +904,8 @@ source "fs/Kconfig"
d337f35e
JR
569
570 source "arch/sh/Kconfig.debug"
571
572+source "kernel/vserver/Kconfig"
573+
574 source "security/Kconfig"
575
576 source "crypto/Kconfig"
09a55596
AM
577diff -NurpP --minimal linux-4.9.135/arch/sh/kernel/irq.c linux-4.9.135-vs2.3.9.8/arch/sh/kernel/irq.c
578--- linux-4.9.135/arch/sh/kernel/irq.c 2016-12-11 19:17:54.000000000 +0000
579+++ linux-4.9.135-vs2.3.9.8/arch/sh/kernel/irq.c 2018-10-20 04:58:13.000000000 +0000
f86f0b53 580@@ -14,6 +14,7 @@
7e46296a 581 #include <linux/ftrace.h>
76514441 582 #include <linux/delay.h>
763640ca 583 #include <linux/ratelimit.h>
f86f0b53 584+// #include <linux/vs_context.h>
d337f35e 585 #include <asm/processor.h>
2380c486 586 #include <asm/machvec.h>
f86f0b53 587 #include <asm/uaccess.h>
09a55596
AM
588diff -NurpP --minimal linux-4.9.135/arch/sparc/Kconfig linux-4.9.135-vs2.3.9.8/arch/sparc/Kconfig
589--- linux-4.9.135/arch/sparc/Kconfig 2018-10-20 10:38:46.000000000 +0000
590+++ linux-4.9.135-vs2.3.9.8/arch/sparc/Kconfig 2018-10-20 04:58:13.000000000 +0000
cc23e853 591@@ -580,6 +580,8 @@ source "fs/Kconfig"
d33d7b00
AM
592
593 source "arch/sparc/Kconfig.debug"
594
595+source "kernel/vserver/Kconfig"
596+
597 source "security/Kconfig"
598
599 source "crypto/Kconfig"
09a55596
AM
600diff -NurpP --minimal linux-4.9.135/arch/sparc/include/uapi/asm/unistd.h linux-4.9.135-vs2.3.9.8/arch/sparc/include/uapi/asm/unistd.h
601--- linux-4.9.135/arch/sparc/include/uapi/asm/unistd.h 2016-12-11 19:17:54.000000000 +0000
602+++ linux-4.9.135-vs2.3.9.8/arch/sparc/include/uapi/asm/unistd.h 2018-10-20 04:58:13.000000000 +0000
537831f9 603@@ -332,7 +332,7 @@
ec22aa5c
AM
604 #define __NR_timer_getoverrun 264
605 #define __NR_timer_delete 265
606 #define __NR_timer_create 266
607-/* #define __NR_vserver 267 Reserved for VSERVER */
608+#define __NR_vserver 267
609 #define __NR_io_setup 268
610 #define __NR_io_destroy 269
611 #define __NR_io_submit 270
09a55596
AM
612diff -NurpP --minimal linux-4.9.135/arch/sparc/kernel/systbls_32.S linux-4.9.135-vs2.3.9.8/arch/sparc/kernel/systbls_32.S
613--- linux-4.9.135/arch/sparc/kernel/systbls_32.S 2016-12-11 19:17:54.000000000 +0000
614+++ linux-4.9.135-vs2.3.9.8/arch/sparc/kernel/systbls_32.S 2018-10-20 04:58:13.000000000 +0000
50e68740 615@@ -70,7 +70,7 @@ sys_call_table:
a168f21d 616 /*250*/ .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
50e68740
JR
617 /*255*/ .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
618 /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
619-/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
620+/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
621 /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
622 /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
623 /*280*/ .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
09a55596
AM
624diff -NurpP --minimal linux-4.9.135/arch/sparc/kernel/systbls_64.S linux-4.9.135-vs2.3.9.8/arch/sparc/kernel/systbls_64.S
625--- linux-4.9.135/arch/sparc/kernel/systbls_64.S 2016-12-11 19:17:54.000000000 +0000
626+++ linux-4.9.135-vs2.3.9.8/arch/sparc/kernel/systbls_64.S 2018-10-20 04:58:13.000000000 +0000
50e68740 627@@ -71,7 +71,7 @@ sys_call_table32:
b00e13aa 628 /*250*/ .word sys_mremap, compat_sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
50e68740
JR
629 .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
630 /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
631- .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
632+ .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy
633 /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
634 .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
b00e13aa 635 /*280*/ .word sys_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat
cc23e853 636@@ -152,7 +152,7 @@ sys_call_table:
a168f21d 637 /*250*/ .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
50e68740
JR
638 .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
639 /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
640- .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
641+ .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
642 /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
643 .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
644 /*280*/ .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
09a55596
AM
645diff -NurpP --minimal linux-4.9.135/arch/um/Kconfig.rest linux-4.9.135-vs2.3.9.8/arch/um/Kconfig.rest
646--- linux-4.9.135/arch/um/Kconfig.rest 2016-12-11 19:17:54.000000000 +0000
647+++ linux-4.9.135-vs2.3.9.8/arch/um/Kconfig.rest 2018-10-20 04:58:13.000000000 +0000
f6c5ef8b 648@@ -12,6 +12,8 @@ source "arch/um/Kconfig.net"
d33d7b00
AM
649
650 source "fs/Kconfig"
651
652+source "kernel/vserver/Kconfig"
653+
654 source "security/Kconfig"
655
656 source "crypto/Kconfig"
09a55596
AM
657diff -NurpP --minimal linux-4.9.135/arch/x86/Kconfig linux-4.9.135-vs2.3.9.8/arch/x86/Kconfig
658--- linux-4.9.135/arch/x86/Kconfig 2018-10-20 10:38:46.000000000 +0000
659+++ linux-4.9.135-vs2.3.9.8/arch/x86/Kconfig 2018-10-20 05:55:34.000000000 +0000
660@@ -2778,6 +2778,8 @@ source "fs/Kconfig"
e03b8c3c 661
d33d7b00 662 source "arch/x86/Kconfig.debug"
e03b8c3c
AM
663
664+source "kernel/vserver/Kconfig"
665+
666 source "security/Kconfig"
667
668 source "crypto/Kconfig"
09a55596
AM
669diff -NurpP --minimal linux-4.9.135/arch/x86/entry/syscalls/syscall_32.tbl linux-4.9.135-vs2.3.9.8/arch/x86/entry/syscalls/syscall_32.tbl
670--- linux-4.9.135/arch/x86/entry/syscalls/syscall_32.tbl 2016-12-11 19:17:54.000000000 +0000
671+++ linux-4.9.135-vs2.3.9.8/arch/x86/entry/syscalls/syscall_32.tbl 2018-10-20 04:58:13.000000000 +0000
db55b927
AM
672@@ -279,7 +279,7 @@
673 270 i386 tgkill sys_tgkill
674 271 i386 utimes sys_utimes compat_sys_utimes
675 272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
676-273 i386 vserver
677+273 i386 vserver sys_vserver sys32_vserver
678 274 i386 mbind sys_mbind
679 275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
680 276 i386 set_mempolicy sys_set_mempolicy
09a55596
AM
681diff -NurpP --minimal linux-4.9.135/arch/x86/entry/syscalls/syscall_64.tbl linux-4.9.135-vs2.3.9.8/arch/x86/entry/syscalls/syscall_64.tbl
682--- linux-4.9.135/arch/x86/entry/syscalls/syscall_64.tbl 2016-12-11 19:17:54.000000000 +0000
683+++ linux-4.9.135-vs2.3.9.8/arch/x86/entry/syscalls/syscall_64.tbl 2018-10-20 04:58:13.000000000 +0000
db55b927 684@@ -242,7 +242,7 @@
1e8b8f9b
AM
685 233 common epoll_ctl sys_epoll_ctl
686 234 common tgkill sys_tgkill
687 235 common utimes sys_utimes
db55b927
AM
688-236 64 vserver
689+236 64 vserver sys_vserver
1e8b8f9b
AM
690 237 common mbind sys_mbind
691 238 common set_mempolicy sys_set_mempolicy
692 239 common get_mempolicy sys_get_mempolicy
09a55596
AM
693diff -NurpP --minimal linux-4.9.135/block/ioprio.c linux-4.9.135-vs2.3.9.8/block/ioprio.c
694--- linux-4.9.135/block/ioprio.c 2016-12-11 19:17:54.000000000 +0000
695+++ linux-4.9.135-vs2.3.9.8/block/ioprio.c 2018-10-20 04:58:13.000000000 +0000
bb20add7
AM
696@@ -28,6 +28,7 @@
697 #include <linux/syscalls.h>
698 #include <linux/security.h>
699 #include <linux/pid_namespace.h>
700+#include <linux/vs_base.h>
701
702 int set_task_ioprio(struct task_struct *task, int ioprio)
703 {
704@@ -105,6 +106,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which,
705 else
706 pgrp = find_vpid(who);
707 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
708+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
709+ continue;
710 ret = set_task_ioprio(p, ioprio);
711 if (ret)
712 break;
cc23e853 713@@ -203,6 +206,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which,
bb20add7
AM
714 else
715 pgrp = find_vpid(who);
716 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
717+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
718+ continue;
719 tmpio = get_task_ioprio(p);
720 if (tmpio < 0)
721 continue;
09a55596
AM
722diff -NurpP --minimal linux-4.9.135/drivers/block/Kconfig linux-4.9.135-vs2.3.9.8/drivers/block/Kconfig
723--- linux-4.9.135/drivers/block/Kconfig 2016-12-11 19:17:54.000000000 +0000
724+++ linux-4.9.135-vs2.3.9.8/drivers/block/Kconfig 2018-10-20 04:58:13.000000000 +0000
cc23e853 725@@ -273,6 +273,13 @@ config BLK_DEV_CRYPTOLOOP
2bf5ad28
AM
726
727 source "drivers/block/drbd/Kconfig"
d337f35e
JR
728
729+config BLK_DEV_VROOT
730+ tristate "Virtual Root device support"
731+ depends on QUOTACTL
732+ ---help---
733+ Saying Y here will allow you to use quota/fs ioctls on a shared
734+ partition within a virtual server without compromising security.
735+
736 config BLK_DEV_NBD
737 tristate "Network block device support"
738 depends on NET
09a55596
AM
739diff -NurpP --minimal linux-4.9.135/drivers/block/Makefile linux-4.9.135-vs2.3.9.8/drivers/block/Makefile
740--- linux-4.9.135/drivers/block/Makefile 2016-12-11 19:17:54.000000000 +0000
741+++ linux-4.9.135-vs2.3.9.8/drivers/block/Makefile 2018-10-20 04:58:13.000000000 +0000
cc23e853 742@@ -31,6 +31,7 @@ obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
bb20add7 743
d33d7b00 744 obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
d33d7b00
AM
745 obj-$(CONFIG_BLK_DEV_HD) += hd.o
746+obj-$(CONFIG_BLK_DEV_VROOT) += vroot.o
747
748 obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
763640ca 749 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
09a55596
AM
750diff -NurpP --minimal linux-4.9.135/drivers/block/loop.c linux-4.9.135-vs2.3.9.8/drivers/block/loop.c
751--- linux-4.9.135/drivers/block/loop.c 2018-10-20 10:38:49.000000000 +0000
752+++ linux-4.9.135-vs2.3.9.8/drivers/block/loop.c 2018-10-20 04:58:13.000000000 +0000
cc23e853 753@@ -76,6 +76,7 @@
a168f21d 754 #include <linux/miscdevice.h>
f6c5ef8b 755 #include <linux/falloc.h>
cc23e853 756 #include <linux/uio.h>
d337f35e 757+#include <linux/vs_context.h>
c2e5f7c8 758 #include "loop.h"
f6c5ef8b 759
d337f35e 760 #include <asm/uaccess.h>
09a55596 761@@ -936,6 +937,7 @@ static int loop_set_fd(struct loop_devic
d337f35e
JR
762 lo->lo_blocksize = lo_blocksize;
763 lo->lo_device = bdev;
764 lo->lo_flags = lo_flags;
765+ lo->lo_xid = vx_current_xid();
766 lo->lo_backing_file = file;
cc23e853 767 lo->transfer = NULL;
d337f35e 768 lo->ioctl = NULL;
09a55596 769@@ -1056,6 +1058,7 @@ static int loop_clr_fd(struct loop_devic
cc23e853 770 lo->lo_offset = 0;
f6c5ef8b 771 lo->lo_sizelimit = 0;
2380c486 772 lo->lo_encrypt_key_size = 0;
2380c486
JR
773+ lo->lo_xid = 0;
774 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
775 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
776 memset(lo->lo_file_name, 0, LO_NAME_SIZE);
09a55596 777@@ -1102,7 +1105,7 @@ loop_set_status(struct loop_device *lo,
2380c486 778
ec22aa5c 779 if (lo->lo_encrypt_key_size &&
537831f9 780 !uid_eq(lo->lo_key_owner, uid) &&
d337f35e
JR
781- !capable(CAP_SYS_ADMIN))
782+ !vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP))
783 return -EPERM;
784 if (lo->lo_state != Lo_bound)
785 return -ENXIO;
09a55596 786@@ -1207,7 +1210,8 @@ loop_get_status(struct loop_device *lo,
d337f35e
JR
787 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
788 info->lo_encrypt_type =
789 lo->lo_encryption ? lo->lo_encryption->number : 0;
790- if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
791+ if (lo->lo_encrypt_key_size &&
792+ vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_CLOOP)) {
793 info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
794 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
795 lo->lo_encrypt_key_size);
09a55596 796@@ -1568,6 +1572,11 @@ static int lo_open(struct block_device *
a168f21d
AM
797 goto out;
798 }
d337f35e 799
dd5f3080 800+ if (!vx_check(lo->lo_xid, VS_IDENT|VS_HOSTID|VS_ADMIN_P)) {
801+ err = -EACCES;
802+ goto out;
803+ }
d337f35e 804+
cc23e853
AM
805 atomic_inc(&lo->lo_refcnt);
806 out:
807 mutex_unlock(&loop_index_mutex);
09a55596
AM
808diff -NurpP --minimal linux-4.9.135/drivers/block/loop.h linux-4.9.135-vs2.3.9.8/drivers/block/loop.h
809--- linux-4.9.135/drivers/block/loop.h 2018-10-20 10:38:49.000000000 +0000
810+++ linux-4.9.135-vs2.3.9.8/drivers/block/loop.h 2018-10-20 04:58:13.000000000 +0000
cc23e853 811@@ -43,6 +43,7 @@ struct loop_device {
c2e5f7c8
JR
812 struct loop_func_table *lo_encryption;
813 __u32 lo_init[2];
814 kuid_t lo_key_owner; /* Who set the key */
815+ vxid_t lo_xid;
816 int (*ioctl)(struct loop_device *, int cmd,
817 unsigned long arg);
818
09a55596
AM
819diff -NurpP --minimal linux-4.9.135/drivers/block/vroot.c linux-4.9.135-vs2.3.9.8/drivers/block/vroot.c
820--- linux-4.9.135/drivers/block/vroot.c 1970-01-01 00:00:00.000000000 +0000
821+++ linux-4.9.135-vs2.3.9.8/drivers/block/vroot.c 2018-10-20 04:58:13.000000000 +0000
cc23e853 822@@ -0,0 +1,291 @@
d337f35e
JR
823+/*
824+ * linux/drivers/block/vroot.c
825+ *
cc23e853
AM
826+ * written by Herbert P?tzl, 9/11/2002
827+ * ported to 2.6.10 by Herbert P?tzl, 30/12/2004
d337f35e
JR
828+ *
829+ * based on the loop.c code by Theodore Ts'o.
830+ *
cc23e853 831+ * Copyright (C) 2002-2007 by Herbert P?tzl.
d337f35e
JR
832+ * Redistribution of this file is permitted under the
833+ * GNU General Public License.
834+ *
835+ */
836+
837+#include <linux/module.h>
838+#include <linux/moduleparam.h>
839+#include <linux/file.h>
840+#include <linux/major.h>
841+#include <linux/blkdev.h>
76514441 842+#include <linux/slab.h>
d337f35e
JR
843+
844+#include <linux/vroot.h>
845+#include <linux/vs_context.h>
846+
847+
848+static int max_vroot = 8;
849+
850+static struct vroot_device *vroot_dev;
851+static struct gendisk **disks;
852+
853+
854+static int vroot_set_dev(
855+ struct vroot_device *vr,
d337f35e
JR
856+ struct block_device *bdev,
857+ unsigned int arg)
858+{
859+ struct block_device *real_bdev;
860+ struct file *file;
861+ struct inode *inode;
862+ int error;
863+
864+ error = -EBUSY;
865+ if (vr->vr_state != Vr_unbound)
866+ goto out;
867+
868+ error = -EBADF;
869+ file = fget(arg);
870+ if (!file)
871+ goto out;
872+
873+ error = -EINVAL;
cc23e853 874+ inode = file->f_path.dentry->d_inode;
d337f35e
JR
875+
876+
877+ if (S_ISBLK(inode->i_mode)) {
878+ real_bdev = inode->i_bdev;
879+ vr->vr_device = real_bdev;
880+ __iget(real_bdev->bd_inode);
881+ } else
882+ goto out_fput;
883+
884+ vxdprintk(VXD_CBIT(misc, 0),
885+ "vroot[%d]_set_dev: dev=" VXF_DEV,
886+ vr->vr_number, VXD_DEV(real_bdev));
887+
888+ vr->vr_state = Vr_bound;
889+ error = 0;
890+
891+ out_fput:
892+ fput(file);
893+ out:
894+ return error;
895+}
896+
897+static int vroot_clr_dev(
898+ struct vroot_device *vr,
d337f35e
JR
899+ struct block_device *bdev)
900+{
901+ struct block_device *real_bdev;
902+
903+ if (vr->vr_state != Vr_bound)
904+ return -ENXIO;
905+ if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */
906+ return -EBUSY;
907+
908+ real_bdev = vr->vr_device;
909+
910+ vxdprintk(VXD_CBIT(misc, 0),
911+ "vroot[%d]_clr_dev: dev=" VXF_DEV,
912+ vr->vr_number, VXD_DEV(real_bdev));
913+
914+ bdput(real_bdev);
915+ vr->vr_state = Vr_unbound;
916+ vr->vr_device = NULL;
917+ return 0;
918+}
919+
920+
ec22aa5c 921+static int vr_ioctl(struct block_device *bdev, fmode_t mode,
d337f35e
JR
922+ unsigned int cmd, unsigned long arg)
923+{
ec22aa5c 924+ struct vroot_device *vr = bdev->bd_disk->private_data;
d337f35e
JR
925+ int err;
926+
927+ down(&vr->vr_ctl_mutex);
928+ switch (cmd) {
929+ case VROOT_SET_DEV:
ec22aa5c 930+ err = vroot_set_dev(vr, bdev, arg);
d337f35e
JR
931+ break;
932+ case VROOT_CLR_DEV:
ec22aa5c 933+ err = vroot_clr_dev(vr, bdev);
d337f35e
JR
934+ break;
935+ default:
936+ err = -EINVAL;
937+ break;
938+ }
939+ up(&vr->vr_ctl_mutex);
940+ return err;
941+}
942+
ec22aa5c 943+static int vr_open(struct block_device *bdev, fmode_t mode)
d337f35e 944+{
ec22aa5c 945+ struct vroot_device *vr = bdev->bd_disk->private_data;
d337f35e
JR
946+
947+ down(&vr->vr_ctl_mutex);
948+ vr->vr_refcnt++;
949+ up(&vr->vr_ctl_mutex);
950+ return 0;
951+}
952+
09be7631 953+static void vr_release(struct gendisk *disk, fmode_t mode)
d337f35e 954+{
ec22aa5c 955+ struct vroot_device *vr = disk->private_data;
d337f35e
JR
956+
957+ down(&vr->vr_ctl_mutex);
958+ --vr->vr_refcnt;
959+ up(&vr->vr_ctl_mutex);
d337f35e
JR
960+}
961+
962+static struct block_device_operations vr_fops = {
963+ .owner = THIS_MODULE,
964+ .open = vr_open,
965+ .release = vr_release,
966+ .ioctl = vr_ioctl,
967+};
968+
cc23e853 969+static blk_qc_t vroot_make_request(struct request_queue *q, struct bio *bio)
b3b0d4fd
AM
970+{
971+ printk("vroot_make_request %p, %p\n", q, bio);
972+ bio_io_error(bio);
cc23e853 973+ return BLK_QC_T_NONE;
b3b0d4fd
AM
974+}
975+
d337f35e
JR
976+struct block_device *__vroot_get_real_bdev(struct block_device *bdev)
977+{
978+ struct inode *inode = bdev->bd_inode;
979+ struct vroot_device *vr;
980+ struct block_device *real_bdev;
981+ int minor = iminor(inode);
982+
983+ vr = &vroot_dev[minor];
984+ real_bdev = vr->vr_device;
985+
986+ vxdprintk(VXD_CBIT(misc, 0),
987+ "vroot[%d]_get_real_bdev: dev=" VXF_DEV,
988+ vr->vr_number, VXD_DEV(real_bdev));
989+
990+ if (vr->vr_state != Vr_bound)
991+ return ERR_PTR(-ENXIO);
992+
993+ __iget(real_bdev->bd_inode);
994+ return real_bdev;
995+}
996+
b3b0d4fd
AM
997+
998+
d337f35e
JR
999+/*
1000+ * And now the modules code and kernel interface.
1001+ */
1002+
1003+module_param(max_vroot, int, 0);
1004+
1005+MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
1006+MODULE_LICENSE("GPL");
1007+MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
1008+
cc23e853 1009+MODULE_AUTHOR ("Herbert P?tzl");
d337f35e
JR
1010+MODULE_DESCRIPTION ("Virtual Root Device Mapper");
1011+
1012+
1013+int __init vroot_init(void)
1014+{
1015+ int err, i;
1016+
1017+ if (max_vroot < 1 || max_vroot > 256) {
1018+ max_vroot = MAX_VROOT_DEFAULT;
1019+ printk(KERN_WARNING "vroot: invalid max_vroot "
1020+ "(must be between 1 and 256), "
1021+ "using default (%d)\n", max_vroot);
1022+ }
1023+
1024+ if (register_blkdev(VROOT_MAJOR, "vroot"))
1025+ return -EIO;
1026+
1027+ err = -ENOMEM;
1028+ vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
1029+ if (!vroot_dev)
1030+ goto out_mem1;
1031+ memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
1032+
1033+ disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
1034+ if (!disks)
1035+ goto out_mem2;
1036+
1037+ for (i = 0; i < max_vroot; i++) {
1038+ disks[i] = alloc_disk(1);
1039+ if (!disks[i])
1040+ goto out_mem3;
2380c486
JR
1041+ disks[i]->queue = blk_alloc_queue(GFP_KERNEL);
1042+ if (!disks[i]->queue)
1043+ goto out_mem3;
b3b0d4fd 1044+ blk_queue_make_request(disks[i]->queue, vroot_make_request);
d337f35e
JR
1045+ }
1046+
1047+ for (i = 0; i < max_vroot; i++) {
1048+ struct vroot_device *vr = &vroot_dev[i];
1049+ struct gendisk *disk = disks[i];
1050+
1051+ memset(vr, 0, sizeof(*vr));
5a9fc8e8 1052+ sema_init(&vr->vr_ctl_mutex, 1);
d337f35e
JR
1053+ vr->vr_number = i;
1054+ disk->major = VROOT_MAJOR;
1055+ disk->first_minor = i;
1056+ disk->fops = &vr_fops;
1057+ sprintf(disk->disk_name, "vroot%d", i);
1058+ disk->private_data = vr;
1059+ }
1060+
1061+ err = register_vroot_grb(&__vroot_get_real_bdev);
1062+ if (err)
1063+ goto out_mem3;
1064+
1065+ for (i = 0; i < max_vroot; i++)
1066+ add_disk(disks[i]);
1067+ printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
1068+ return 0;
1069+
1070+out_mem3:
1071+ while (i--)
1072+ put_disk(disks[i]);
1073+ kfree(disks);
1074+out_mem2:
1075+ kfree(vroot_dev);
1076+out_mem1:
1077+ unregister_blkdev(VROOT_MAJOR, "vroot");
1078+ printk(KERN_ERR "vroot: ran out of memory\n");
1079+ return err;
1080+}
1081+
1082+void vroot_exit(void)
1083+{
1084+ int i;
1085+
1086+ if (unregister_vroot_grb(&__vroot_get_real_bdev))
1087+ printk(KERN_WARNING "vroot: cannot unregister grb\n");
1088+
1089+ for (i = 0; i < max_vroot; i++) {
1090+ del_gendisk(disks[i]);
1091+ put_disk(disks[i]);
1092+ }
2380c486 1093+ unregister_blkdev(VROOT_MAJOR, "vroot");
d337f35e
JR
1094+
1095+ kfree(disks);
1096+ kfree(vroot_dev);
1097+}
1098+
1099+module_init(vroot_init);
1100+module_exit(vroot_exit);
1101+
1102+#ifndef MODULE
1103+
1104+static int __init max_vroot_setup(char *str)
1105+{
1106+ max_vroot = simple_strtol(str, NULL, 0);
1107+ return 1;
1108+}
1109+
1110+__setup("max_vroot=", max_vroot_setup);
1111+
1112+#endif
1113+
09a55596
AM
1114diff -NurpP --minimal linux-4.9.135/drivers/md/dm-core.h linux-4.9.135-vs2.3.9.8/drivers/md/dm-core.h
1115--- linux-4.9.135/drivers/md/dm-core.h 2018-10-20 10:38:58.000000000 +0000
1116+++ linux-4.9.135-vs2.3.9.8/drivers/md/dm-core.h 2018-10-20 04:58:13.000000000 +0000
cc23e853
AM
1117@@ -52,6 +52,7 @@ struct mapped_device {
1118
1119 atomic_t holders;
1120 atomic_t open_count;
1121+ vxid_t xid;
1122
1123 struct dm_target *immutable_target;
1124 struct target_type *immutable_target_type;
09a55596
AM
1125diff -NurpP --minimal linux-4.9.135/drivers/md/dm-ioctl.c linux-4.9.135-vs2.3.9.8/drivers/md/dm-ioctl.c
1126--- linux-4.9.135/drivers/md/dm-ioctl.c 2018-10-20 10:38:58.000000000 +0000
1127+++ linux-4.9.135-vs2.3.9.8/drivers/md/dm-ioctl.c 2018-10-20 04:58:13.000000000 +0000
3bac966d
AM
1128@@ -16,6 +16,7 @@
1129 #include <linux/dm-ioctl.h>
1130 #include <linux/hdreg.h>
1131 #include <linux/compat.h>
1132+#include <linux/vs_context.h>
1133
1134 #include <asm/uaccess.h>
1135
c2e5f7c8 1136@@ -114,7 +115,8 @@ static struct hash_cell *__get_name_cell
3bac966d
AM
1137 unsigned int h = hash_str(str);
1138
1139 list_for_each_entry (hc, _name_buckets + h, name_list)
1140- if (!strcmp(hc->name, str)) {
1141+ if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1142+ !strcmp(hc->name, str)) {
1143 dm_get(hc->md);
1144 return hc;
1145 }
c2e5f7c8 1146@@ -128,7 +130,8 @@ static struct hash_cell *__get_uuid_cell
3bac966d
AM
1147 unsigned int h = hash_str(str);
1148
1149 list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
1150- if (!strcmp(hc->uuid, str)) {
1151+ if (vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT) &&
1152+ !strcmp(hc->uuid, str)) {
1153 dm_get(hc->md);
1154 return hc;
1155 }
c2e5f7c8 1156@@ -139,13 +142,15 @@ static struct hash_cell *__get_uuid_cell
a168f21d
AM
1157 static struct hash_cell *__get_dev_cell(uint64_t dev)
1158 {
1159 struct mapped_device *md;
1160- struct hash_cell *hc;
1161+ struct hash_cell *hc = NULL;
1162
1163 md = dm_get_md(huge_decode_dev(dev));
1164 if (!md)
1165 return NULL;
1166
1167- hc = dm_get_mdptr(md);
1168+ if (vx_check(dm_get_xid(md), VS_WATCH_P | VS_IDENT))
1169+ hc = dm_get_mdptr(md);
1170+
1171 if (!hc) {
1172 dm_put(md);
1173 return NULL;
c2e5f7c8 1174@@ -467,6 +472,9 @@ typedef int (*ioctl_fn)(struct dm_ioctl
3bac966d
AM
1175
1176 static int remove_all(struct dm_ioctl *param, size_t param_size)
1177 {
1178+ if (!vx_check(0, VS_ADMIN))
1179+ return -EPERM;
1180+
c2e5f7c8 1181 dm_hash_remove_all(true, !!(param->flags & DM_DEFERRED_REMOVE), false);
3bac966d
AM
1182 param->data_size = 0;
1183 return 0;
c2e5f7c8 1184@@ -514,6 +522,8 @@ static int list_devices(struct dm_ioctl
3bac966d
AM
1185 */
1186 for (i = 0; i < NUM_BUCKETS; i++) {
1187 list_for_each_entry (hc, _name_buckets + i, name_list) {
1188+ if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1189+ continue;
1190 needed += sizeof(struct dm_name_list);
1191 needed += strlen(hc->name) + 1;
1192 needed += ALIGN_MASK;
c2e5f7c8 1193@@ -537,6 +547,8 @@ static int list_devices(struct dm_ioctl
3bac966d
AM
1194 */
1195 for (i = 0; i < NUM_BUCKETS; i++) {
1196 list_for_each_entry (hc, _name_buckets + i, name_list) {
1197+ if (!vx_check(dm_get_xid(hc->md), VS_WATCH_P | VS_IDENT))
1198+ continue;
1199 if (old_nl)
1200 old_nl->next = (uint32_t) ((void *) nl -
1201 (void *) old_nl);
cc23e853 1202@@ -1805,8 +1817,8 @@ static int ctl_ioctl(uint command, struc
763640ca 1203 size_t input_param_size;
b00e13aa 1204 struct dm_ioctl param_kernel;
3bac966d
AM
1205
1206- /* only root can play with this */
1207- if (!capable(CAP_SYS_ADMIN))
1208+ /* only root and certain contexts can play with this */
1209+ if (!vx_capable(CAP_SYS_ADMIN, VXC_ADMIN_MAPPER))
1210 return -EACCES;
1211
1212 if (_IOC_TYPE(command) != DM_IOCTL)
09a55596
AM
1213diff -NurpP --minimal linux-4.9.135/drivers/md/dm.c linux-4.9.135-vs2.3.9.8/drivers/md/dm.c
1214--- linux-4.9.135/drivers/md/dm.c 2018-10-20 10:38:58.000000000 +0000
1215+++ linux-4.9.135-vs2.3.9.8/drivers/md/dm.c 2018-10-20 04:58:13.000000000 +0000
cc23e853
AM
1216@@ -22,6 +22,7 @@
1217 #include <linux/wait.h>
1218 #include <linux/pr.h>
1219 #include <linux/vmalloc.h>
d33d7b00
AM
1220+#include <linux/vs_base.h>
1221
cc23e853 1222 #define DM_MSG_PREFIX "core"
d33d7b00 1223
cc23e853 1224@@ -300,6 +301,7 @@ int dm_deleting_md(struct mapped_device
d33d7b00
AM
1225 static int dm_blk_open(struct block_device *bdev, fmode_t mode)
1226 {
1227 struct mapped_device *md;
1228+ int ret = -ENXIO;
1229
1230 spin_lock(&_minor_lock);
1231
cc23e853 1232@@ -308,17 +310,19 @@ static int dm_blk_open(struct block_devi
d33d7b00
AM
1233 goto out;
1234
1235 if (test_bit(DMF_FREEING, &md->flags) ||
1236- dm_deleting_md(md)) {
1237- md = NULL;
1238+ dm_deleting_md(md))
1239+ goto out;
1240+
1241+ ret = -EACCES;
1242+ if (!vx_check(md->xid, VS_IDENT|VS_HOSTID))
1243 goto out;
1244- }
1245
1246 dm_get(md);
1247 atomic_inc(&md->open_count);
d33d7b00
AM
1248+ ret = 0;
1249 out:
1250 spin_unlock(&_minor_lock);
1251-
1252- return md ? 0 : -ENXIO;
1253+ return ret;
1254 }
1255
09be7631 1256 static void dm_blk_close(struct gendisk *disk, fmode_t mode)
cc23e853 1257@@ -744,6 +748,14 @@ int dm_set_geometry(struct mapped_device
d33d7b00
AM
1258 return 0;
1259 }
1260
1261+/*
1262+ * Get the xid associated with a dm device
1263+ */
61333608 1264+vxid_t dm_get_xid(struct mapped_device *md)
d33d7b00
AM
1265+{
1266+ return md->xid;
1267+}
1268+
1269 /*-----------------------------------------------------------------
1270 * CRUD START:
1271 * A more elegant soln is in the works that uses the queue
09a55596 1272@@ -1549,6 +1561,7 @@ static struct mapped_device *alloc_dev(i
cc23e853 1273 INIT_LIST_HEAD(&md->uevent_list);
bb20add7 1274 INIT_LIST_HEAD(&md->table_devices);
d33d7b00 1275 spin_lock_init(&md->uevent_lock);
d33d7b00 1276+ md->xid = vx_current_xid();
cc23e853
AM
1277
1278 md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
d33d7b00 1279 if (!md->queue)
09a55596
AM
1280diff -NurpP --minimal linux-4.9.135/drivers/md/dm.h linux-4.9.135-vs2.3.9.8/drivers/md/dm.h
1281--- linux-4.9.135/drivers/md/dm.h 2016-12-11 19:17:54.000000000 +0000
1282+++ linux-4.9.135-vs2.3.9.8/drivers/md/dm.h 2018-10-20 04:58:13.000000000 +0000
cc23e853 1283@@ -45,6 +45,8 @@ struct dm_dev_internal {
d33d7b00
AM
1284 struct dm_table;
1285 struct dm_md_mempools;
1286
61333608 1287+vxid_t dm_get_xid(struct mapped_device *md);
d33d7b00
AM
1288+
1289 /*-----------------------------------------------------------------
1290 * Internal table functions.
1291 *---------------------------------------------------------------*/
09a55596
AM
1292diff -NurpP --minimal linux-4.9.135/drivers/net/tun.c linux-4.9.135-vs2.3.9.8/drivers/net/tun.c
1293--- linux-4.9.135/drivers/net/tun.c 2018-10-20 10:39:05.000000000 +0000
1294+++ linux-4.9.135-vs2.3.9.8/drivers/net/tun.c 2018-10-20 04:58:13.000000000 +0000
c2e5f7c8 1295@@ -65,6 +65,7 @@
d33d7b00
AM
1296 #include <linux/nsproxy.h>
1297 #include <linux/virtio_net.h>
1298 #include <linux/rcupdate.h>
1299+#include <linux/vs_network.h>
1300 #include <net/net_namespace.h>
1301 #include <net/netns/generic.h>
cc23e853
AM
1302 #include <net/rtnetlink.h>
1303@@ -194,6 +195,7 @@ struct tun_struct {
d33d7b00 1304 unsigned int flags;
537831f9
AM
1305 kuid_t owner;
1306 kgid_t group;
61333608 1307+ vnid_t nid;
d33d7b00
AM
1308
1309 struct net_device *dev;
db55b927 1310 netdev_features_t set_features;
cc23e853 1311@@ -490,6 +492,7 @@ static inline bool tun_not_capable(struc
b00e13aa
AM
1312 return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
1313 (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
1314 !ns_capable(net->user_ns, CAP_NET_ADMIN);
1315+ /* !cap_raised(current_cap(), CAP_NET_ADMIN) */
1316 }
1317
1318 static void tun_set_real_num_queues(struct tun_struct *tun)
5ba7a31c 1319@@ -1558,6 +1561,7 @@ static void tun_setup(struct net_device
2380c486 1320
537831f9
AM
1321 tun->owner = INVALID_UID;
1322 tun->group = INVALID_GID;
1323+ tun->nid = nx_current_nid();
2380c486 1324
ec22aa5c
AM
1325 dev->ethtool_ops = &tun_ethtool_ops;
1326 dev->destructor = tun_free_netdev;
5ba7a31c 1327@@ -1769,7 +1773,7 @@ static int tun_set_iff(struct net *net,
b00e13aa
AM
1328 int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
1329 MAX_TAP_QUEUES : 1;
1330
1331- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
c2e5f7c8 1332+ if (!nx_ns_capable(net->user_ns, CAP_NET_ADMIN, NXC_TUN_CREATE))
b00e13aa
AM
1333 return -EPERM;
1334 err = security_tun_dev_create();
1335 if (err < 0)
5ba7a31c 1336@@ -2134,6 +2138,16 @@ static long __tun_chr_ioctl(struct file
537831f9 1337 from_kgid(&init_user_ns, tun->group));
2380c486 1338 break;
d337f35e 1339
2380c486
JR
1340+ case TUNSETNID:
1341+ if (!capable(CAP_CONTEXT))
1342+ return -EPERM;
d337f35e 1343+
2380c486 1344+ /* Set nid owner of the device */
61333608 1345+ tun->nid = (vnid_t) arg;
d337f35e 1346+
763640ca 1347+ tun_debug(KERN_INFO, tun, "nid owner set to %u\n", tun->nid);
2380c486 1348+ break;
d337f35e 1349+
2380c486
JR
1350 case TUNSETLINK:
1351 /* Only allow setting the type when the interface is down */
ec22aa5c 1352 if (tun->dev->flags & IFF_UP) {
09a55596
AM
1353diff -NurpP --minimal linux-4.9.135/drivers/scsi/cxgbi/libcxgbi.c linux-4.9.135-vs2.3.9.8/drivers/scsi/cxgbi/libcxgbi.c
1354--- linux-4.9.135/drivers/scsi/cxgbi/libcxgbi.c 2016-12-11 19:17:54.000000000 +0000
1355+++ linux-4.9.135-vs2.3.9.8/drivers/scsi/cxgbi/libcxgbi.c 2018-10-20 04:58:13.000000000 +0000
cc23e853 1356@@ -772,7 +772,8 @@ static struct cxgbi_sock *cxgbi_check_ro
bb20add7
AM
1357 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
1358
1359 err = ipv6_dev_get_saddr(&init_net, idev ? idev->dev : NULL,
1360- &daddr6->sin6_addr, 0, &pref_saddr);
1361+ &daddr6->sin6_addr, 0, &pref_saddr,
1362+ NULL);
1363 if (err) {
1364 pr_info("failed to get source address to reach %pI6\n",
1365 &daddr6->sin6_addr);
09a55596
AM
1366diff -NurpP --minimal linux-4.9.135/drivers/tty/sysrq.c linux-4.9.135-vs2.3.9.8/drivers/tty/sysrq.c
1367--- linux-4.9.135/drivers/tty/sysrq.c 2018-10-20 10:39:13.000000000 +0000
1368+++ linux-4.9.135-vs2.3.9.8/drivers/tty/sysrq.c 2018-10-20 04:58:13.000000000 +0000
bb20add7 1369@@ -47,6 +47,7 @@
c2e5f7c8
JR
1370 #include <linux/syscalls.h>
1371 #include <linux/of.h>
bb20add7 1372 #include <linux/rcupdate.h>
ab30d09f
AM
1373+#include <linux/vserver/debug.h>
1374
1375 #include <asm/ptrace.h>
1376 #include <asm/irq_regs.h>
cc23e853 1377@@ -428,6 +429,21 @@ static struct sysrq_key_op sysrq_unrt_op
ab30d09f
AM
1378 .enable_mask = SYSRQ_ENABLE_RTNICE,
1379 };
1380
1381+
1382+#ifdef CONFIG_VSERVER_DEBUG
1383+static void sysrq_handle_vxinfo(int key)
1384+{
1385+ dump_vx_info_inactive((key == 'x') ? 0 : 1);
1386+}
1387+
1388+static struct sysrq_key_op sysrq_showvxinfo_op = {
1389+ .handler = sysrq_handle_vxinfo,
1390+ .help_msg = "conteXt",
1391+ .action_msg = "Show Context Info",
1392+ .enable_mask = SYSRQ_ENABLE_DUMP,
1393+};
1394+#endif
1395+
1396 /* Key Operations table and lock */
1397 static DEFINE_SPINLOCK(sysrq_key_table_lock);
1398
cc23e853
AM
1399@@ -484,7 +500,11 @@ static struct sysrq_key_op *sysrq_key_ta
1400 /* x: May be registered on mips for TLB dump */
ab30d09f 1401 /* x: May be registered on ppc/powerpc for xmon */
537831f9 1402 /* x: May be registered on sparc64 for global PMU dump */
ab30d09f
AM
1403+#ifdef CONFIG_VSERVER_DEBUG
1404+ &sysrq_showvxinfo_op, /* x */
1405+#else
4bf69007 1406 NULL, /* x */
ab30d09f
AM
1407+#endif
1408 /* y: May be registered on sparc64 for global register dump */
1409 NULL, /* y */
1410 &sysrq_ftrace_dump_op, /* z */
cc23e853 1411@@ -499,6 +519,8 @@ static int sysrq_key_table_key2index(int
ab30d09f
AM
1412 retval = key - '0';
1413 else if ((key >= 'a') && (key <= 'z'))
1414 retval = key + 10 - 'a';
1415+ else if ((key >= 'A') && (key <= 'Z'))
1416+ retval = key + 10 - 'A';
1417 else
1418 retval = -1;
1419 return retval;
09a55596
AM
1420diff -NurpP --minimal linux-4.9.135/drivers/tty/tty_io.c linux-4.9.135-vs2.3.9.8/drivers/tty/tty_io.c
1421--- linux-4.9.135/drivers/tty/tty_io.c 2018-10-20 10:39:13.000000000 +0000
1422+++ linux-4.9.135-vs2.3.9.8/drivers/tty/tty_io.c 2018-10-20 05:55:42.000000000 +0000
1e8b8f9b 1423@@ -104,6 +104,7 @@
ab30d09f
AM
1424
1425 #include <linux/kmod.h>
1426 #include <linux/nsproxy.h>
1427+#include <linux/vs_pid.h>
1428
1429 #undef TTY_DEBUG_HANGUP
cc23e853 1430 #ifdef TTY_DEBUG_HANGUP
09a55596 1431@@ -2308,7 +2309,8 @@ static int tiocsti(struct tty_struct *tt
ab30d09f
AM
1432 char ch, mbz = 0;
1433 struct tty_ldisc *ld;
1434
1435- if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
1436+ if (((current->signal->tty != tty) &&
1437+ !vx_capable(CAP_SYS_ADMIN, VXC_TIOCSTI)))
1438 return -EPERM;
1439 if (get_user(ch, p))
1440 return -EFAULT;
09a55596 1441@@ -2623,6 +2625,7 @@ static int tiocspgrp(struct tty_struct *
ab30d09f
AM
1442 return -ENOTTY;
1443 if (get_user(pgrp_nr, p))
1444 return -EFAULT;
1445+ pgrp_nr = vx_rmap_pid(pgrp_nr);
1446 if (pgrp_nr < 0)
1447 return -EINVAL;
1448 rcu_read_lock();
09a55596
AM
1449diff -NurpP --minimal linux-4.9.135/fs/attr.c linux-4.9.135-vs2.3.9.8/fs/attr.c
1450--- linux-4.9.135/fs/attr.c 2016-12-11 19:17:54.000000000 +0000
1451+++ linux-4.9.135-vs2.3.9.8/fs/attr.c 2018-10-20 04:58:13.000000000 +0000
537831f9 1452@@ -15,6 +15,9 @@
d337f35e 1453 #include <linux/security.h>
f6c5ef8b 1454 #include <linux/evm.h>
537831f9 1455 #include <linux/ima.h>
d337f35e
JR
1456+#include <linux/proc_fs.h>
1457+#include <linux/devpts_fs.h>
2380c486 1458+#include <linux/vs_tag.h>
d337f35e 1459
93de0823 1460 /**
cc23e853
AM
1461 * setattr_prepare - check if attribute changes to a dentry are allowed
1462@@ -90,6 +93,10 @@ kill_priv:
1463 return error;
d337f35e 1464 }
93de0823
AM
1465
1466+ /* check for inode tag permission */
2380c486 1467+ if (dx_permission(inode, MAY_WRITE))
93de0823 1468+ return -EACCES;
2380c486 1469+
93de0823
AM
1470 return 0;
1471 }
cc23e853
AM
1472 EXPORT_SYMBOL(setattr_prepare);
1473@@ -160,6 +167,8 @@ void setattr_copy(struct inode *inode, c
d337f35e
JR
1474 inode->i_uid = attr->ia_uid;
1475 if (ia_valid & ATTR_GID)
1476 inode->i_gid = attr->ia_gid;
1477+ if ((ia_valid & ATTR_TAG) && IS_TAGGED(inode))
1478+ inode->i_tag = attr->ia_tag;
1479 if (ia_valid & ATTR_ATIME)
1480 inode->i_atime = timespec_trunc(attr->ia_atime,
1481 inode->i_sb->s_time_gran);
cc23e853 1482@@ -210,7 +219,8 @@ int notify_change(struct dentry * dentry
92598135 1483
cc23e853 1484 WARN_ON_ONCE(!inode_is_locked(inode));
78865d5b
AM
1485
1486- if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
1487+ if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
1488+ ATTR_TAG | ATTR_TIMES_SET)) {
1489 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1490 return -EPERM;
1491 }
09a55596
AM
1492diff -NurpP --minimal linux-4.9.135/fs/block_dev.c linux-4.9.135-vs2.3.9.8/fs/block_dev.c
1493--- linux-4.9.135/fs/block_dev.c 2018-10-20 10:39:16.000000000 +0000
1494+++ linux-4.9.135-vs2.3.9.8/fs/block_dev.c 2018-10-20 04:58:13.000000000 +0000
cc23e853
AM
1495@@ -31,6 +31,7 @@
1496 #include <linux/dax.h>
1497 #include <linux/badblocks.h>
1498 #include <linux/falloc.h>
2380c486
JR
1499+#include <linux/vs_device.h>
1500 #include <asm/uaccess.h>
1501 #include "internal.h"
1502
cc23e853 1503@@ -720,6 +721,7 @@ struct block_device *bdget(dev_t dev)
2380c486
JR
1504 bdev->bd_invalidated = 0;
1505 inode->i_mode = S_IFBLK;
1506 inode->i_rdev = dev;
1507+ inode->i_mdev = dev;
1508 inode->i_bdev = bdev;
1509 inode->i_data.a_ops = &def_blk_aops;
1510 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
cc23e853 1511@@ -766,6 +768,11 @@ EXPORT_SYMBOL(bdput);
2380c486
JR
1512 static struct block_device *bd_acquire(struct inode *inode)
1513 {
1514 struct block_device *bdev;
1515+ dev_t mdev;
1516+
1517+ if (!vs_map_blkdev(inode->i_rdev, &mdev, DATTR_OPEN))
1518+ return NULL;
1519+ inode->i_mdev = mdev;
1520
1521 spin_lock(&bdev_lock);
1522 bdev = inode->i_bdev;
cc23e853 1523@@ -776,7 +783,7 @@ static struct block_device *bd_acquire(s
2380c486
JR
1524 }
1525 spin_unlock(&bdev_lock);
1526
1527- bdev = bdget(inode->i_rdev);
1528+ bdev = bdget(mdev);
1529 if (bdev) {
1530 spin_lock(&bdev_lock);
1531 if (!inode->i_bdev) {
09a55596
AM
1532diff -NurpP --minimal linux-4.9.135/fs/btrfs/ctree.h linux-4.9.135-vs2.3.9.8/fs/btrfs/ctree.h
1533--- linux-4.9.135/fs/btrfs/ctree.h 2018-10-20 10:39:16.000000000 +0000
1534+++ linux-4.9.135-vs2.3.9.8/fs/btrfs/ctree.h 2018-10-20 04:58:13.000000000 +0000
cc23e853 1535@@ -1321,6 +1321,8 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(c
c2e5f7c8 1536 #define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
cc23e853 1537 #define BTRFS_DEFAULT_MAX_INLINE (2048)
e22b5178
AM
1538
1539+#define BTRFS_MOUNT_TAGGED (1 << 24)
1540+
1541 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1542 #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
b00e13aa 1543 #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
cc23e853 1544@@ -1668,6 +1670,7 @@ BTRFS_SETGET_FUNCS(inode_block_group, st
e22b5178
AM
1545 BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
1546 BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
1547 BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
1548+BTRFS_SETGET_FUNCS(inode_tag, struct btrfs_inode_item, tag, 16);
1549 BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
1550 BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
1551 BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
cc23e853 1552@@ -1715,6 +1718,10 @@ BTRFS_SETGET_FUNCS(extent_flags, struct
78865d5b
AM
1553
1554 BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
1555
1556+#define BTRFS_INODE_IXUNLINK (1 << 24)
1557+#define BTRFS_INODE_BARRIER (1 << 25)
1558+#define BTRFS_INODE_COW (1 << 26)
1559+
1560
1561 BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
1562
cc23e853 1563@@ -3197,6 +3204,7 @@ int btrfs_ioctl_get_supported_features(v
d4263eb0
JR
1564 void btrfs_update_iflags(struct inode *inode);
1565 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
c2e5f7c8 1566 int btrfs_is_empty_uuid(u8 *uuid);
d4263eb0 1567+int btrfs_sync_flags(struct inode *inode, int, int);
763640ca
JR
1568 int btrfs_defrag_file(struct inode *inode, struct file *file,
1569 struct btrfs_ioctl_defrag_range_args *range,
1570 u64 newer_than, unsigned long max_pages);
09a55596
AM
1571diff -NurpP --minimal linux-4.9.135/fs/btrfs/disk-io.c linux-4.9.135-vs2.3.9.8/fs/btrfs/disk-io.c
1572--- linux-4.9.135/fs/btrfs/disk-io.c 2018-10-20 10:39:16.000000000 +0000
1573+++ linux-4.9.135-vs2.3.9.8/fs/btrfs/disk-io.c 2018-10-20 05:55:42.000000000 +0000
1574@@ -2852,6 +2852,9 @@ int open_ctree(struct super_block *sb,
763640ca 1575 goto fail_alloc;
e22b5178
AM
1576 }
1577
cc23e853 1578+ if (btrfs_test_opt(fs_info, TAGGED))
e22b5178
AM
1579+ sb->s_flags |= MS_TAGGED;
1580+
1581 features = btrfs_super_incompat_flags(disk_super) &
1582 ~BTRFS_FEATURE_INCOMPAT_SUPP;
1583 if (features) {
09a55596
AM
1584diff -NurpP --minimal linux-4.9.135/fs/btrfs/inode.c linux-4.9.135-vs2.3.9.8/fs/btrfs/inode.c
1585--- linux-4.9.135/fs/btrfs/inode.c 2018-10-20 10:39:16.000000000 +0000
1586+++ linux-4.9.135-vs2.3.9.8/fs/btrfs/inode.c 2018-10-20 04:58:13.000000000 +0000
c2e5f7c8 1587@@ -43,6 +43,7 @@
b00e13aa 1588 #include <linux/blkdev.h>
c2e5f7c8 1589 #include <linux/posix_acl_xattr.h>
cc23e853 1590 #include <linux/uio.h>
e22b5178 1591+#include <linux/vs_tag.h>
e22b5178
AM
1592 #include "ctree.h"
1593 #include "disk-io.h"
c2e5f7c8 1594 #include "transaction.h"
09a55596 1595@@ -3701,6 +3702,9 @@ static int btrfs_read_locked_inode(struc
bb20add7 1596 unsigned long ptr;
e22b5178 1597 int maybe_acls;
e22b5178 1598 u32 rdev;
a4a22af8
AM
1599+ kuid_t kuid;
1600+ kgid_t kgid;
1601+ ktag_t ktag;
e22b5178 1602 int ret;
763640ca 1603 bool filled = false;
bb20add7 1604 int first_xattr_slot;
09a55596 1605@@ -3733,8 +3737,14 @@ static int btrfs_read_locked_inode(struc
a168f21d 1606 struct btrfs_inode_item);
e22b5178 1607 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
f6c5ef8b 1608 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
537831f9
AM
1609- i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
1610- i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
e22b5178 1611+
a4a22af8
AM
1612+ kuid = make_kuid(&init_user_ns, btrfs_inode_uid(leaf, inode_item));
1613+ kgid = make_kgid(&init_user_ns, btrfs_inode_gid(leaf, inode_item));
1614+ ktag = make_ktag(&init_user_ns, btrfs_inode_tag(leaf, inode_item));
1615+
1616+ inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
1617+ inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
1618+ inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, ktag);
e22b5178
AM
1619 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
1620
cc23e853 1621 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
09a55596 1622@@ -3889,11 +3899,18 @@ static void fill_inode_item(struct btrfs
e22b5178
AM
1623 struct inode *inode)
1624 {
b00e13aa 1625 struct btrfs_map_token token;
a4a22af8
AM
1626+ uid_t uid = from_kuid(&init_user_ns,
1627+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
1628+ gid_t gid = from_kgid(&init_user_ns,
1629+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
b00e13aa
AM
1630
1631 btrfs_init_map_token(&token);
1632
1633- btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
1634- btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
1635+ btrfs_set_token_inode_uid(leaf, item, uid, &token);
1636+ btrfs_set_token_inode_gid(leaf, item, gid, &token);
e22b5178 1637+#ifdef CONFIG_TAGGING_INTERN
b00e13aa 1638+ btrfs_set_token_inode_tag(leaf, item, i_tag_read(inode), &token);
e22b5178 1639+#endif
b00e13aa
AM
1640 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
1641 &token);
1642 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
09a55596 1643@@ -10646,6 +10663,7 @@ static const struct inode_operations btr
cc23e853 1644 .mknod = btrfs_mknod,
d4263eb0 1645 .listxattr = btrfs_listxattr,
d4263eb0
JR
1646 .permission = btrfs_permission,
1647+ .sync_flags = btrfs_sync_flags,
a168f21d 1648 .get_acl = btrfs_get_acl,
f15949f2 1649 .set_acl = btrfs_set_acl,
c2e5f7c8 1650 .update_time = btrfs_update_time,
09a55596 1651@@ -10654,6 +10672,7 @@ static const struct inode_operations btr
7e46296a 1652 static const struct inode_operations btrfs_dir_ro_inode_operations = {
d4263eb0 1653 .lookup = btrfs_lookup,
d4263eb0 1654 .permission = btrfs_permission,
d4263eb0 1655+ .sync_flags = btrfs_sync_flags,
c2e5f7c8 1656 .update_time = btrfs_update_time,
cc23e853
AM
1657 };
1658
09a55596 1659@@ -10719,6 +10738,7 @@ static const struct inode_operations btr
cc23e853 1660 .listxattr = btrfs_listxattr,
c2e5f7c8
JR
1661 .permission = btrfs_permission,
1662 .fiemap = btrfs_fiemap,
1663+ .sync_flags = btrfs_sync_flags,
1664 .get_acl = btrfs_get_acl,
bb20add7 1665 .set_acl = btrfs_set_acl,
c2e5f7c8 1666 .update_time = btrfs_update_time,
09a55596
AM
1667diff -NurpP --minimal linux-4.9.135/fs/btrfs/ioctl.c linux-4.9.135-vs2.3.9.8/fs/btrfs/ioctl.c
1668--- linux-4.9.135/fs/btrfs/ioctl.c 2018-10-20 10:39:16.000000000 +0000
1669+++ linux-4.9.135-vs2.3.9.8/fs/btrfs/ioctl.c 2018-10-20 04:58:13.000000000 +0000
cc23e853 1670@@ -110,10 +110,13 @@ static unsigned int btrfs_flags_to_ioctl
d4263eb0
JR
1671 {
1672 unsigned int iflags = 0;
1673
1674- if (flags & BTRFS_INODE_SYNC)
1675- iflags |= FS_SYNC_FL;
1676 if (flags & BTRFS_INODE_IMMUTABLE)
1677 iflags |= FS_IMMUTABLE_FL;
1678+ if (flags & BTRFS_INODE_IXUNLINK)
1679+ iflags |= FS_IXUNLINK_FL;
1680+
1681+ if (flags & BTRFS_INODE_SYNC)
1682+ iflags |= FS_SYNC_FL;
1683 if (flags & BTRFS_INODE_APPEND)
1684 iflags |= FS_APPEND_FL;
1685 if (flags & BTRFS_INODE_NODUMP)
cc23e853
AM
1686@@ -130,34 +133,84 @@ static unsigned int btrfs_flags_to_ioctl
1687 else if (flags & BTRFS_INODE_COMPRESS)
1688 iflags |= FS_COMPR_FL;
d4263eb0
JR
1689
1690+ if (flags & BTRFS_INODE_BARRIER)
1691+ iflags |= FS_BARRIER_FL;
1692+ if (flags & BTRFS_INODE_COW)
1693+ iflags |= FS_COW_FL;
1694 return iflags;
1695 }
1696
1697 /*
1698- * Update inode->i_flags based on the btrfs internal flags.
1699+ * Update inode->i_(v)flags based on the btrfs internal flags.
1700 */
1701 void btrfs_update_iflags(struct inode *inode)
1702 {
1703 struct btrfs_inode *ip = BTRFS_I(inode);
bb20add7 1704 unsigned int new_fl = 0;
d4263eb0
JR
1705
1706- if (ip->flags & BTRFS_INODE_SYNC)
bb20add7 1707- new_fl |= S_SYNC;
d4263eb0 1708 if (ip->flags & BTRFS_INODE_IMMUTABLE)
bb20add7 1709 new_fl |= S_IMMUTABLE;
d4263eb0 1710+ if (ip->flags & BTRFS_INODE_IXUNLINK)
bb20add7 1711+ new_fl |= S_IXUNLINK;
d4263eb0
JR
1712+
1713+ if (ip->flags & BTRFS_INODE_SYNC)
bb20add7 1714+ new_fl |= S_SYNC;
d4263eb0 1715 if (ip->flags & BTRFS_INODE_APPEND)
bb20add7 1716 new_fl |= S_APPEND;
d4263eb0 1717 if (ip->flags & BTRFS_INODE_NOATIME)
bb20add7 1718 new_fl |= S_NOATIME;
d4263eb0 1719 if (ip->flags & BTRFS_INODE_DIRSYNC)
bb20add7
AM
1720 new_fl |= S_DIRSYNC;
1721-
1722 set_mask_bits(&inode->i_flags,
1723- S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
1724+ S_SYNC | S_APPEND | S_IMMUTABLE | S_IXUNLINK | S_NOATIME | S_DIRSYNC,
1725 new_fl);
d4263eb0 1726+
bb20add7 1727+ new_fl = 0;
d4263eb0 1728+ if (ip->flags & BTRFS_INODE_BARRIER)
bb20add7 1729+ new_fl |= V_BARRIER;
d4263eb0 1730+ if (ip->flags & BTRFS_INODE_COW)
bb20add7 1731+ new_fl |= V_COW;
d4263eb0 1732+
bb20add7
AM
1733+ set_mask_bits(&inode->i_vflags,
1734+ V_BARRIER | V_COW, new_fl);
1735 }
1736
1737 /*
d4263eb0
JR
1738+ * Update btrfs internal flags from inode->i_(v)flags.
1739+ */
1740+void btrfs_update_flags(struct inode *inode)
1741+{
1742+ struct btrfs_inode *ip = BTRFS_I(inode);
1743+
1744+ unsigned int flags = inode->i_flags;
1745+ unsigned int vflags = inode->i_vflags;
1746+
1747+ ip->flags &= ~(BTRFS_INODE_SYNC | BTRFS_INODE_APPEND |
1748+ BTRFS_INODE_IMMUTABLE | BTRFS_INODE_IXUNLINK |
1749+ BTRFS_INODE_NOATIME | BTRFS_INODE_DIRSYNC |
1750+ BTRFS_INODE_BARRIER | BTRFS_INODE_COW);
1751+
1752+ if (flags & S_IMMUTABLE)
1753+ ip->flags |= BTRFS_INODE_IMMUTABLE;
1754+ if (flags & S_IXUNLINK)
1755+ ip->flags |= BTRFS_INODE_IXUNLINK;
1756+
1757+ if (flags & S_SYNC)
1758+ ip->flags |= BTRFS_INODE_SYNC;
1759+ if (flags & S_APPEND)
1760+ ip->flags |= BTRFS_INODE_APPEND;
1761+ if (flags & S_NOATIME)
1762+ ip->flags |= BTRFS_INODE_NOATIME;
1763+ if (flags & S_DIRSYNC)
1764+ ip->flags |= BTRFS_INODE_DIRSYNC;
1765+
1766+ if (vflags & V_BARRIER)
1767+ ip->flags |= BTRFS_INODE_BARRIER;
1768+ if (vflags & V_COW)
1769+ ip->flags |= BTRFS_INODE_COW;
bb20add7
AM
1770+ }
1771+
1772+/*
1773 * Inherit flags from the parent inode.
1774 *
1775 * Currently only the compression flags and the cow flags are inherited.
cc23e853 1776@@ -170,6 +223,7 @@ void btrfs_inherit_iflags(struct inode *
f6c5ef8b 1777 return;
d4263eb0 1778
f6c5ef8b
AM
1779 flags = BTRFS_I(dir)->flags;
1780+ flags &= ~BTRFS_INODE_BARRIER;
d4263eb0 1781
f6c5ef8b
AM
1782 if (flags & BTRFS_INODE_NOCOMPRESS) {
1783 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
cc23e853 1784@@ -188,6 +242,30 @@ void btrfs_inherit_iflags(struct inode *
d4263eb0
JR
1785 btrfs_update_iflags(inode);
1786 }
1787
1788+int btrfs_sync_flags(struct inode *inode, int flags, int vflags)
1789+{
1790+ struct btrfs_inode *ip = BTRFS_I(inode);
1791+ struct btrfs_root *root = ip->root;
1792+ struct btrfs_trans_handle *trans;
1793+ int ret;
1794+
763640ca 1795+ trans = btrfs_join_transaction(root);
d4263eb0
JR
1796+ BUG_ON(!trans);
1797+
d4263eb0
JR
1798+ inode->i_flags = flags;
1799+ inode->i_vflags = vflags;
1800+ btrfs_update_flags(inode);
e22b5178
AM
1801+
1802+ ret = btrfs_update_inode(trans, root, inode);
1803+ BUG_ON(ret);
1804+
1805+ btrfs_update_iflags(inode);
d4263eb0
JR
1806+ inode->i_ctime = CURRENT_TIME;
1807+ btrfs_end_transaction(trans, root);
1808+
1809+ return 0;
1810+}
1811+
1812 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
1813 {
b00e13aa 1814 struct btrfs_inode *ip = BTRFS_I(file_inode(file));
cc23e853 1815@@ -250,21 +328,27 @@ static int btrfs_ioctl_setflags(struct f
d4263eb0
JR
1816
1817 flags = btrfs_mask_flags(inode->i_mode, flags);
1818 oldflags = btrfs_flags_to_ioctl(ip->flags);
1819- if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
1820+ if ((flags ^ oldflags) & (FS_APPEND_FL |
1821+ FS_IMMUTABLE_FL | FS_IXUNLINK_FL)) {
1822 if (!capable(CAP_LINUX_IMMUTABLE)) {
1823 ret = -EPERM;
1824 goto out_unlock;
92598135
AM
1825 }
1826 }
d4263eb0
JR
1827
1828- if (flags & FS_SYNC_FL)
1829- ip->flags |= BTRFS_INODE_SYNC;
1830- else
1831- ip->flags &= ~BTRFS_INODE_SYNC;
1832 if (flags & FS_IMMUTABLE_FL)
1833 ip->flags |= BTRFS_INODE_IMMUTABLE;
1834 else
1835 ip->flags &= ~BTRFS_INODE_IMMUTABLE;
1836+ if (flags & FS_IXUNLINK_FL)
1837+ ip->flags |= BTRFS_INODE_IXUNLINK;
1838+ else
1839+ ip->flags &= ~BTRFS_INODE_IXUNLINK;
1840+
1841+ if (flags & FS_SYNC_FL)
1842+ ip->flags |= BTRFS_INODE_SYNC;
1843+ else
1844+ ip->flags &= ~BTRFS_INODE_SYNC;
1845 if (flags & FS_APPEND_FL)
1846 ip->flags |= BTRFS_INODE_APPEND;
1847 else
09a55596
AM
1848diff -NurpP --minimal linux-4.9.135/fs/btrfs/super.c linux-4.9.135-vs2.3.9.8/fs/btrfs/super.c
1849--- linux-4.9.135/fs/btrfs/super.c 2018-10-20 10:39:16.000000000 +0000
1850+++ linux-4.9.135-vs2.3.9.8/fs/btrfs/super.c 2018-10-20 04:58:13.000000000 +0000
cc23e853
AM
1851@@ -327,7 +327,7 @@ enum {
1852 #ifdef CONFIG_BTRFS_DEBUG
1853 Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
1854 #endif
db55b927 1855- Opt_err,
f6c5ef8b 1856+ Opt_tag, Opt_notag, Opt_tagid, Opt_err,
e22b5178
AM
1857 };
1858
cc23e853
AM
1859 static const match_table_t tokens = {
1860@@ -388,6 +388,9 @@ static const match_table_t tokens = {
1861 {Opt_fragment_metadata, "fragment=metadata"},
1862 {Opt_fragment_all, "fragment=all"},
1863 #endif
e22b5178
AM
1864+ {Opt_tag, "tag"},
1865+ {Opt_notag, "notag"},
1866+ {Opt_tagid, "tagid=%u"},
1867 {Opt_err, NULL},
1868 };
1869
cc23e853
AM
1870@@ -833,6 +836,22 @@ int btrfs_parse_options(struct btrfs_roo
1871 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
1e8b8f9b 1872 break;
cc23e853 1873 #endif
e22b5178
AM
1874+#ifndef CONFIG_TAGGING_NONE
1875+ case Opt_tag:
1876+ printk(KERN_INFO "btrfs: use tagging\n");
1877+ btrfs_set_opt(info->mount_opt, TAGGED);
1878+ break;
1879+ case Opt_notag:
1880+ printk(KERN_INFO "btrfs: disabled tagging\n");
1881+ btrfs_clear_opt(info->mount_opt, TAGGED);
1882+ break;
1883+#endif
1884+#ifdef CONFIG_PROPAGATE
1885+ case Opt_tagid:
1886+ /* use args[0] */
1887+ btrfs_set_opt(info->mount_opt, TAGGED);
1888+ break;
1889+#endif
2bf5ad28 1890 case Opt_err:
cc23e853
AM
1891 btrfs_info(root->fs_info,
1892 "unrecognized mount option '%s'", p);
1893@@ -1754,6 +1773,12 @@ static int btrfs_remount(struct super_bl
42bc425c
AM
1894 btrfs_resize_thread_pool(fs_info,
1895 fs_info->thread_pool_size, old_thread_pool_size);
e22b5178 1896
cc23e853 1897+ if (btrfs_test_opt(fs_info, TAGGED) && !(sb->s_flags & MS_TAGGED)) {
e22b5178
AM
1898+ printk("btrfs: %s: tagging not permitted on remount.\n",
1899+ sb->s_id);
1900+ return -EINVAL;
1901+ }
1902+
1903 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
b00e13aa 1904 goto out;
e22b5178 1905
09a55596
AM
1906diff -NurpP --minimal linux-4.9.135/fs/char_dev.c linux-4.9.135-vs2.3.9.8/fs/char_dev.c
1907--- linux-4.9.135/fs/char_dev.c 2016-12-11 19:17:54.000000000 +0000
1908+++ linux-4.9.135-vs2.3.9.8/fs/char_dev.c 2018-10-20 04:58:13.000000000 +0000
4744a4b1 1909@@ -21,6 +21,8 @@
2380c486
JR
1910 #include <linux/mutex.h>
1911 #include <linux/backing-dev.h>
7942c842 1912 #include <linux/tty.h>
2380c486
JR
1913+#include <linux/vs_context.h>
1914+#include <linux/vs_device.h>
1915
ec22aa5c
AM
1916 #include "internal.h"
1917
cc23e853 1918@@ -354,14 +356,21 @@ static int chrdev_open(struct inode *ino
2380c486
JR
1919 struct cdev *p;
1920 struct cdev *new = NULL;
1921 int ret = 0;
1922+ dev_t mdev;
1923+
1924+ if (!vs_map_chrdev(inode->i_rdev, &mdev, DATTR_OPEN))
1925+ return -EPERM;
1926+ inode->i_mdev = mdev;
1927
1928 spin_lock(&cdev_lock);
1929 p = inode->i_cdev;
1930 if (!p) {
1931 struct kobject *kobj;
1932 int idx;
1933+
1934 spin_unlock(&cdev_lock);
1935- kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
1936+
1937+ kobj = kobj_lookup(cdev_map, mdev, &idx);
1938 if (!kobj)
1939 return -ENXIO;
1940 new = container_of(kobj, struct cdev, kobj);
09a55596
AM
1941diff -NurpP --minimal linux-4.9.135/fs/dcache.c linux-4.9.135-vs2.3.9.8/fs/dcache.c
1942--- linux-4.9.135/fs/dcache.c 2018-10-20 10:39:17.000000000 +0000
1943+++ linux-4.9.135-vs2.3.9.8/fs/dcache.c 2018-10-20 05:55:42.000000000 +0000
cc23e853 1944@@ -39,6 +39,7 @@
f6c5ef8b 1945 #include <linux/ratelimit.h>
c2e5f7c8 1946 #include <linux/list_lru.h>
cc23e853 1947 #include <linux/kasan.h>
d337f35e 1948+#include <linux/vs_limit.h>
cc23e853 1949
d337f35e 1950 #include "internal.h"
db55b927 1951 #include "mount.h"
09a55596 1952@@ -690,6 +691,7 @@ static inline bool fast_dput(struct dent
cc23e853
AM
1953 spin_lock(&dentry->d_lock);
1954 if (dentry->d_lockref.count > 1) {
1955 dentry->d_lockref.count--;
1956+ vx_dentry_dec(dentry);
1957 spin_unlock(&dentry->d_lock);
1958 return 1;
1959 }
09a55596 1960@@ -821,6 +823,7 @@ repeat:
cc23e853 1961 dentry_lru_add(dentry);
d337f35e 1962
cc23e853
AM
1963 dentry->d_lockref.count--;
1964+ vx_dentry_dec(dentry);
1965 spin_unlock(&dentry->d_lock);
1966 return;
1967
09a55596 1968@@ -838,6 +841,7 @@ EXPORT_SYMBOL(dput);
d33d7b00 1969 static inline void __dget_dlock(struct dentry *dentry)
2380c486 1970 {
c2e5f7c8 1971 dentry->d_lockref.count++;
2380c486 1972+ vx_dentry_inc(dentry);
d337f35e 1973 }
2380c486 1974
d33d7b00 1975 static inline void __dget(struct dentry *dentry)
09a55596 1976@@ -850,6 +854,8 @@ struct dentry *dget_parent(struct dentry
bb20add7
AM
1977 int gotref;
1978 struct dentry *ret;
1979
1980+ vx_dentry_dec(dentry);
1981+
1982 /*
1983 * Do optimistic parent lookup without any
1984 * locking.
09a55596 1985@@ -880,6 +886,7 @@ repeat:
cc23e853
AM
1986 rcu_read_unlock();
1987 BUG_ON(!ret->d_lockref.count);
1988 ret->d_lockref.count++;
1989+ vx_dentry_inc(ret);
1990 spin_unlock(&ret->d_lock);
1991 return ret;
1992 }
09a55596 1993@@ -1034,6 +1041,7 @@ static void shrink_dentry_list(struct li
cc23e853
AM
1994 parent = lock_parent(dentry);
1995 if (dentry->d_lockref.count != 1) {
1996 dentry->d_lockref.count--;
1997+ vx_dentry_dec(dentry);
1998 spin_unlock(&dentry->d_lock);
1999 if (parent)
2000 spin_unlock(&parent->d_lock);
09a55596 2001@@ -1600,6 +1608,9 @@ struct dentry *__d_alloc(struct super_bl
d337f35e 2002 char *dname;
cc23e853 2003 int err;
d337f35e
JR
2004
2005+ if (!vx_dentry_avail(1))
2006+ return NULL;
2007+
2380c486 2008 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
d337f35e
JR
2009 if (!dentry)
2010 return NULL;
09a55596 2011@@ -1643,6 +1654,7 @@ struct dentry *__d_alloc(struct super_bl
d337f35e 2012
c2e5f7c8 2013 dentry->d_lockref.count = 1;
763640ca 2014 dentry->d_flags = 0;
ab30d09f 2015+ vx_dentry_inc(dentry);
ab30d09f 2016 spin_lock_init(&dentry->d_lock);
d33d7b00 2017 seqcount_init(&dentry->d_seq);
763640ca 2018 dentry->d_inode = NULL;
09a55596 2019@@ -2316,6 +2328,7 @@ struct dentry *__d_lookup(const struct d
cc23e853 2020 goto next;
2380c486 2021
c2e5f7c8 2022 dentry->d_lockref.count++;
2380c486
JR
2023+ vx_dentry_inc(dentry);
2024 found = dentry;
d337f35e 2025 spin_unlock(&dentry->d_lock);
2380c486 2026 break;
09a55596 2027@@ -3574,6 +3587,7 @@ static enum d_walk_ret d_genocide_kill(v
cc23e853
AM
2028 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2029 dentry->d_flags |= DCACHE_GENOCIDE;
2030 dentry->d_lockref.count--;
2031+ vx_dentry_dec(dentry);
2032 }
2033 }
2034 return D_WALK_CONTINUE;
09a55596
AM
2035diff -NurpP --minimal linux-4.9.135/fs/devpts/inode.c linux-4.9.135-vs2.3.9.8/fs/devpts/inode.c
2036--- linux-4.9.135/fs/devpts/inode.c 2016-12-11 19:17:54.000000000 +0000
2037+++ linux-4.9.135-vs2.3.9.8/fs/devpts/inode.c 2018-10-20 04:58:13.000000000 +0000
bb20add7 2038@@ -27,6 +27,7 @@
d337f35e 2039 #include <linux/parser.h>
2380c486
JR
2040 #include <linux/fsnotify.h>
2041 #include <linux/seq_file.h>
d337f35e
JR
2042+#include <linux/vs_base.h>
2043
2380c486 2044 #define DEVPTS_DEFAULT_MODE 0600
ec22aa5c 2045 /*
bb20add7 2046@@ -38,6 +39,21 @@
ec22aa5c
AM
2047 #define DEVPTS_DEFAULT_PTMX_MODE 0000
2048 #define PTMX_MINOR 2
2380c486 2049
a168f21d 2050+static int devpts_permission(struct inode *inode, int mask)
d337f35e
JR
2051+{
2052+ int ret = -EACCES;
2053+
2054+ /* devpts is xid tagged */
61333608 2055+ if (vx_check((vxid_t)i_tag_read(inode), VS_WATCH_P | VS_IDENT))
a168f21d 2056+ ret = generic_permission(inode, mask);
d337f35e
JR
2057+ return ret;
2058+}
2059+
2060+static struct inode_operations devpts_file_inode_operations = {
2061+ .permission = devpts_permission,
2062+};
2380c486 2063+
1e8b8f9b
AM
2064+
2065 /*
2066 * sysctl support for setting limits on the number of Unix98 ptys allocated.
2067 * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
cc23e853 2068@@ -363,6 +379,34 @@ static int devpts_show_options(struct se
d337f35e
JR
2069 return 0;
2070 }
2071
2072+static int devpts_filter(struct dentry *de)
2073+{
61333608 2074+ vxid_t xid = 0;
b3b0d4fd 2075+
d337f35e 2076+ /* devpts is xid tagged */
b3b0d4fd 2077+ if (de && de->d_inode)
61333608 2078+ xid = (vxid_t)i_tag_read(de->d_inode);
b3b0d4fd
AM
2079+#ifdef CONFIG_VSERVER_WARN_DEVPTS
2080+ else
2081+ vxwprintk_task(1, "devpts " VS_Q("%.*s") " without inode.",
2082+ de->d_name.len, de->d_name.name);
2083+#endif
2084+ return vx_check(xid, VS_WATCH_P | VS_IDENT);
d337f35e
JR
2085+}
2086+
c2e5f7c8 2087+static int devpts_readdir(struct file * filp, struct dir_context *ctx)
d337f35e 2088+{
c2e5f7c8 2089+ return dcache_readdir_filter(filp, ctx, devpts_filter);
d337f35e
JR
2090+}
2091+
2092+static struct file_operations devpts_dir_operations = {
2093+ .open = dcache_dir_open,
2094+ .release = dcache_dir_close,
2095+ .llseek = dcache_dir_lseek,
2096+ .read = generic_read_dir,
c2e5f7c8 2097+ .iterate = devpts_readdir,
d337f35e
JR
2098+};
2099+
2380c486 2100 static const struct super_operations devpts_sops = {
d337f35e
JR
2101 .statfs = simple_statfs,
2102 .remount_fs = devpts_remount,
cc23e853
AM
2103@@ -415,8 +459,10 @@ devpts_fill_super(struct super_block *s,
2104 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
d337f35e
JR
2105 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
2106 inode->i_op = &simple_dir_inode_operations;
2107- inode->i_fop = &simple_dir_operations;
2108+ inode->i_fop = &devpts_dir_operations;
f6c5ef8b 2109 set_nlink(inode, 2);
d337f35e 2110+ /* devpts is xid tagged */
61333608 2111+ i_tag_write(inode, (vtag_t)vx_current_xid());
d337f35e 2112
1e8b8f9b 2113 s->s_root = d_make_root(inode);
cc23e853
AM
2114 if (!s->s_root) {
2115@@ -542,6 +588,9 @@ struct dentry *devpts_pty_new(struct pts
ec22aa5c 2116 inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
cc23e853
AM
2117 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
2118 init_special_inode(inode, S_IFCHR|opts->mode, MKDEV(UNIX98_PTY_SLAVE_MAJOR, index));
d337f35e 2119+ /* devpts is xid tagged */
61333608 2120+ i_tag_write(inode, (vtag_t)vx_current_xid());
d337f35e 2121+ inode->i_op = &devpts_file_inode_operations;
d337f35e 2122
b00e13aa 2123 sprintf(s, "%d", index);
cc23e853 2124
09a55596
AM
2125diff -NurpP --minimal linux-4.9.135/fs/ext2/balloc.c linux-4.9.135-vs2.3.9.8/fs/ext2/balloc.c
2126--- linux-4.9.135/fs/ext2/balloc.c 2016-12-11 19:17:54.000000000 +0000
2127+++ linux-4.9.135-vs2.3.9.8/fs/ext2/balloc.c 2018-10-20 04:58:13.000000000 +0000
b00e13aa 2128@@ -693,7 +693,6 @@ ext2_try_to_allocate(struct super_block
2380c486
JR
2129 start = 0;
2130 end = EXT2_BLOCKS_PER_GROUP(sb);
d337f35e 2131 }
2380c486
JR
2132-
2133 BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb));
2134
2135 repeat:
09a55596
AM
2136diff -NurpP --minimal linux-4.9.135/fs/ext2/ext2.h linux-4.9.135-vs2.3.9.8/fs/ext2/ext2.h
2137--- linux-4.9.135/fs/ext2/ext2.h 2016-12-11 19:17:54.000000000 +0000
2138+++ linux-4.9.135-vs2.3.9.8/fs/ext2/ext2.h 2018-10-20 04:58:13.000000000 +0000
cc23e853 2139@@ -247,8 +247,12 @@ struct ext2_group_desc
1e8b8f9b
AM
2140 #define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
2141 #define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
2142 #define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
2143+#define EXT2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
2144 #define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
2145
2146+#define EXT2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
2147+#define EXT2_COW_FL FS_COW_FL /* Copy on Write marker */
2148+
2149 #define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
2150 #define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
2151
cc23e853 2152@@ -332,7 +336,8 @@ struct ext2_inode {
1e8b8f9b
AM
2153 __u16 i_pad1;
2154 __le16 l_i_uid_high; /* these 2 fields */
2155 __le16 l_i_gid_high; /* were reserved2[0] */
2156- __u32 l_i_reserved2;
2157+ __le16 l_i_tag; /* Context Tag */
2158+ __u16 l_i_reserved2;
2159 } linux2;
2160 struct {
2161 __u8 h_i_frag; /* Fragment number */
cc23e853 2162@@ -360,6 +365,7 @@ struct ext2_inode {
1e8b8f9b
AM
2163 #define i_gid_low i_gid
2164 #define i_uid_high osd2.linux2.l_i_uid_high
2165 #define i_gid_high osd2.linux2.l_i_gid_high
2166+#define i_raw_tag osd2.linux2.l_i_tag
2167 #define i_reserved2 osd2.linux2.l_i_reserved2
2168
2169 /*
cc23e853
AM
2170@@ -393,6 +399,7 @@ struct ext2_inode {
2171 #else
2172 #define EXT2_MOUNT_DAX 0
2173 #endif
2174+#define EXT2_MOUNT_TAGGED 0x200000 /* Enable Context Tags */
1e8b8f9b
AM
2175
2176
2177 #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
cc23e853 2178@@ -782,6 +789,7 @@ extern void ext2_set_inode_flags(struct
93de0823
AM
2179 extern void ext2_get_inode_flags(struct ext2_inode_info *);
2180 extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2181 u64 start, u64 len);
d4263eb0
JR
2182+extern int ext2_sync_flags(struct inode *, int, int);
2183
2184 /* ioctl.c */
2185 extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
09a55596
AM
2186diff -NurpP --minimal linux-4.9.135/fs/ext2/file.c linux-4.9.135-vs2.3.9.8/fs/ext2/file.c
2187--- linux-4.9.135/fs/ext2/file.c 2016-12-11 19:17:54.000000000 +0000
2188+++ linux-4.9.135-vs2.3.9.8/fs/ext2/file.c 2018-10-20 04:58:13.000000000 +0000
cc23e853 2189@@ -247,4 +247,5 @@ const struct inode_operations ext2_file_
a168f21d 2190 .get_acl = ext2_get_acl,
bb20add7 2191 .set_acl = ext2_set_acl,
ec22aa5c 2192 .fiemap = ext2_fiemap,
d337f35e
JR
2193+ .sync_flags = ext2_sync_flags,
2194 };
09a55596
AM
2195diff -NurpP --minimal linux-4.9.135/fs/ext2/ialloc.c linux-4.9.135-vs2.3.9.8/fs/ext2/ialloc.c
2196--- linux-4.9.135/fs/ext2/ialloc.c 2016-12-11 19:17:54.000000000 +0000
2197+++ linux-4.9.135-vs2.3.9.8/fs/ext2/ialloc.c 2018-10-20 04:58:13.000000000 +0000
e22b5178
AM
2198@@ -17,6 +17,7 @@
2199 #include <linux/backing-dev.h>
2200 #include <linux/buffer_head.h>
2201 #include <linux/random.h>
2202+#include <linux/vs_tag.h>
2203 #include "ext2.h"
2204 #include "xattr.h"
2205 #include "acl.h"
cc23e853 2206@@ -551,6 +552,7 @@ got:
76514441
AM
2207 inode->i_mode = mode;
2208 inode->i_uid = current_fsuid();
2209 inode->i_gid = dir->i_gid;
a4a22af8 2210+ i_tag_write(inode, dx_current_fstag(sb));
e22b5178 2211 } else
76514441 2212 inode_init_owner(inode, dir, mode);
e22b5178 2213
09a55596
AM
2214diff -NurpP --minimal linux-4.9.135/fs/ext2/inode.c linux-4.9.135-vs2.3.9.8/fs/ext2/inode.c
2215--- linux-4.9.135/fs/ext2/inode.c 2018-10-20 10:39:17.000000000 +0000
2216+++ linux-4.9.135-vs2.3.9.8/fs/ext2/inode.c 2018-10-20 04:58:13.000000000 +0000
cc23e853
AM
2217@@ -35,6 +35,7 @@
2218 #include <linux/iomap.h>
ec22aa5c 2219 #include <linux/namei.h>
cc23e853 2220 #include <linux/uio.h>
d337f35e
JR
2221+#include <linux/vs_tag.h>
2222 #include "ext2.h"
2223 #include "acl.h"
cc23e853 2224 #include "xattr.h"
09a55596 2225@@ -1357,39 +1358,61 @@ void ext2_set_inode_flags(struct inode *
d337f35e
JR
2226 {
2227 unsigned int flags = EXT2_I(inode)->i_flags;
2228
cc23e853
AM
2229- inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
2230- S_DIRSYNC | S_DAX);
2231+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK | S_DAX |
d337f35e
JR
2232+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2233+
2234+ if (flags & EXT2_IMMUTABLE_FL)
2235+ inode->i_flags |= S_IMMUTABLE;
2380c486
JR
2236+ if (flags & EXT2_IXUNLINK_FL)
2237+ inode->i_flags |= S_IXUNLINK;
d337f35e
JR
2238 if (flags & EXT2_SYNC_FL)
2239 inode->i_flags |= S_SYNC;
2240 if (flags & EXT2_APPEND_FL)
2241 inode->i_flags |= S_APPEND;
2242- if (flags & EXT2_IMMUTABLE_FL)
2243- inode->i_flags |= S_IMMUTABLE;
2244 if (flags & EXT2_NOATIME_FL)
2245 inode->i_flags |= S_NOATIME;
2246 if (flags & EXT2_DIRSYNC_FL)
2247 inode->i_flags |= S_DIRSYNC;
cc23e853
AM
2248 if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
2249 inode->i_flags |= S_DAX;
2380c486
JR
2250+
2251+ inode->i_vflags &= ~(V_BARRIER | V_COW);
2252+
2253+ if (flags & EXT2_BARRIER_FL)
2254+ inode->i_vflags |= V_BARRIER;
2255+ if (flags & EXT2_COW_FL)
2256+ inode->i_vflags |= V_COW;
2257 }
2258
2259 /* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
2260 void ext2_get_inode_flags(struct ext2_inode_info *ei)
2261 {
2262 unsigned int flags = ei->vfs_inode.i_flags;
2263+ unsigned int vflags = ei->vfs_inode.i_vflags;
2264+
2265+ ei->i_flags &= ~(EXT2_SYNC_FL | EXT2_APPEND_FL |
2266+ EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL |
2267+ EXT2_NOATIME_FL | EXT2_DIRSYNC_FL |
2268+ EXT2_BARRIER_FL | EXT2_COW_FL);
2269+
2270+ if (flags & S_IMMUTABLE)
2271+ ei->i_flags |= EXT2_IMMUTABLE_FL;
2272+ if (flags & S_IXUNLINK)
2273+ ei->i_flags |= EXT2_IXUNLINK_FL;
2274
2275- ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
2276- EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
2277 if (flags & S_SYNC)
2278 ei->i_flags |= EXT2_SYNC_FL;
2279 if (flags & S_APPEND)
2280 ei->i_flags |= EXT2_APPEND_FL;
2281- if (flags & S_IMMUTABLE)
2282- ei->i_flags |= EXT2_IMMUTABLE_FL;
2283 if (flags & S_NOATIME)
2284 ei->i_flags |= EXT2_NOATIME_FL;
2285 if (flags & S_DIRSYNC)
2286 ei->i_flags |= EXT2_DIRSYNC_FL;
2287+
2288+ if (vflags & V_BARRIER)
2289+ ei->i_flags |= EXT2_BARRIER_FL;
2290+ if (vflags & V_COW)
2291+ ei->i_flags |= EXT2_COW_FL;
d337f35e
JR
2292 }
2293
2380c486 2294 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
09a55596 2295@@ -1425,8 +1448,10 @@ struct inode *ext2_iget (struct super_bl
42bc425c
AM
2296 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2297 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
d337f35e 2298 }
42bc425c
AM
2299- i_uid_write(inode, i_uid);
2300- i_gid_write(inode, i_gid);
2301+ i_uid_write(inode, INOTAG_UID(DX_TAG(inode), i_uid, i_gid));
2302+ i_gid_write(inode, INOTAG_GID(DX_TAG(inode), i_uid, i_gid));
537831f9
AM
2303+ i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), i_uid, i_gid,
2304+ le16_to_cpu(raw_inode->i_raw_tag)));
f6c5ef8b 2305 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
d337f35e 2306 inode->i_size = le32_to_cpu(raw_inode->i_size);
2380c486 2307 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
09a55596 2308@@ -1533,8 +1558,10 @@ static int __ext2_write_inode(struct ino
d337f35e
JR
2309 struct ext2_inode_info *ei = EXT2_I(inode);
2310 struct super_block *sb = inode->i_sb;
2311 ino_t ino = inode->i_ino;
42bc425c
AM
2312- uid_t uid = i_uid_read(inode);
2313- gid_t gid = i_gid_read(inode);
a4a22af8
AM
2314+ uid_t uid = from_kuid(&init_user_ns,
2315+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
2316+ gid_t gid = from_kgid(&init_user_ns,
2317+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
d337f35e
JR
2318 struct buffer_head * bh;
2319 struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
2320 int n;
09a55596 2321@@ -1570,6 +1597,9 @@ static int __ext2_write_inode(struct ino
d337f35e
JR
2322 raw_inode->i_uid_high = 0;
2323 raw_inode->i_gid_high = 0;
2324 }
2325+#ifdef CONFIG_TAGGING_INTERN
537831f9 2326+ raw_inode->i_raw_tag = cpu_to_le16(i_tag_read(inode));
d337f35e
JR
2327+#endif
2328 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2329 raw_inode->i_size = cpu_to_le32(inode->i_size);
2330 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
09a55596 2331@@ -1653,7 +1683,8 @@ int ext2_setattr(struct dentry *dentry,
cc23e853
AM
2332 return error;
2333 }
42bc425c
AM
2334 if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
2335- (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
2336+ (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) ||
537831f9 2337+ (iattr->ia_valid & ATTR_TAG && !tag_eq(iattr->ia_tag, inode->i_tag))) {
78865d5b 2338 error = dquot_transfer(inode, iattr);
d337f35e
JR
2339 if (error)
2340 return error;
09a55596
AM
2341diff -NurpP --minimal linux-4.9.135/fs/ext2/ioctl.c linux-4.9.135-vs2.3.9.8/fs/ext2/ioctl.c
2342--- linux-4.9.135/fs/ext2/ioctl.c 2016-12-11 19:17:54.000000000 +0000
2343+++ linux-4.9.135-vs2.3.9.8/fs/ext2/ioctl.c 2018-10-20 04:58:13.000000000 +0000
d4263eb0
JR
2344@@ -17,6 +17,16 @@
2345 #include <asm/uaccess.h>
2346
2347
2348+int ext2_sync_flags(struct inode *inode, int flags, int vflags)
2349+{
2350+ inode->i_flags = flags;
2351+ inode->i_vflags = vflags;
2352+ ext2_get_inode_flags(EXT2_I(inode));
2353+ inode->i_ctime = CURRENT_TIME_SEC;
2354+ mark_inode_dirty(inode);
2355+ return 0;
2356+}
2357+
2358 long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2359 {
b00e13aa 2360 struct inode *inode = file_inode(filp);
d4263eb0 2361@@ -51,6 +61,11 @@ long ext2_ioctl(struct file *filp, unsig
d337f35e 2362
ec22aa5c 2363 flags = ext2_mask_flags(inode->i_mode, flags);
d337f35e 2364
2380c486
JR
2365+ if (IS_BARRIER(inode)) {
2366+ vxwprintk_task(1, "messing with the barrier.");
2367+ return -EACCES;
2368+ }
2369+
cc23e853 2370 inode_lock(inode);
2380c486
JR
2371 /* Is it quota file? Do not allow user to mess with it */
2372 if (IS_NOQUOTA(inode)) {
d4263eb0 2373@@ -66,7 +81,9 @@ long ext2_ioctl(struct file *filp, unsig
d337f35e
JR
2374 *
2375 * This test looks nicer. Thanks to Pauline Middelink
2376 */
2377- if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
2378+ if ((oldflags & EXT2_IMMUTABLE_FL) ||
2379+ ((flags ^ oldflags) & (EXT2_APPEND_FL |
2380c486
JR
2380+ EXT2_IMMUTABLE_FL | EXT2_IXUNLINK_FL))) {
2381 if (!capable(CAP_LINUX_IMMUTABLE)) {
cc23e853 2382 inode_unlock(inode);
2380c486 2383 ret = -EPERM;
d4263eb0
JR
2384@@ -74,7 +91,7 @@ long ext2_ioctl(struct file *filp, unsig
2385 }
2386 }
2387
2388- flags = flags & EXT2_FL_USER_MODIFIABLE;
2389+ flags &= EXT2_FL_USER_MODIFIABLE;
2390 flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
2391 ei->i_flags = flags;
db55b927 2392
09a55596
AM
2393diff -NurpP --minimal linux-4.9.135/fs/ext2/namei.c linux-4.9.135-vs2.3.9.8/fs/ext2/namei.c
2394--- linux-4.9.135/fs/ext2/namei.c 2018-10-20 10:39:17.000000000 +0000
2395+++ linux-4.9.135-vs2.3.9.8/fs/ext2/namei.c 2018-10-20 04:58:13.000000000 +0000
78865d5b 2396@@ -32,6 +32,7 @@
d337f35e
JR
2397
2398 #include <linux/pagemap.h>
78865d5b 2399 #include <linux/quotaops.h>
d337f35e
JR
2400+#include <linux/vs_tag.h>
2401 #include "ext2.h"
2402 #include "xattr.h"
2403 #include "acl.h"
09a55596 2404@@ -71,6 +72,7 @@ static struct dentry *ext2_lookup(struct
a168f21d
AM
2405 (unsigned long) ino);
2406 return ERR_PTR(-EIO);
ec22aa5c 2407 }
a168f21d 2408+ dx_propagate_tag(nd, inode);
d337f35e 2409 }
a168f21d
AM
2410 return d_splice_alias(inode, dentry);
2411 }
09a55596 2412@@ -443,6 +445,7 @@ const struct inode_operations ext2_speci
cc23e853 2413 .listxattr = ext2_listxattr,
d337f35e
JR
2414 #endif
2415 .setattr = ext2_setattr,
d337f35e 2416+ .sync_flags = ext2_sync_flags,
a168f21d 2417 .get_acl = ext2_get_acl,
bb20add7 2418 .set_acl = ext2_set_acl,
d337f35e 2419 };
09a55596
AM
2420diff -NurpP --minimal linux-4.9.135/fs/ext2/super.c linux-4.9.135-vs2.3.9.8/fs/ext2/super.c
2421--- linux-4.9.135/fs/ext2/super.c 2016-12-11 19:17:54.000000000 +0000
2422+++ linux-4.9.135-vs2.3.9.8/fs/ext2/super.c 2018-10-20 04:58:13.000000000 +0000
cc23e853 2423@@ -411,7 +411,8 @@ enum {
d337f35e
JR
2424 Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug,
2425 Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr,
cc23e853 2426 Opt_acl, Opt_noacl, Opt_xip, Opt_dax, Opt_ignore, Opt_err, Opt_quota,
2380c486
JR
2427- Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
2428+ Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation,
2429+ Opt_tag, Opt_notag, Opt_tagid
d337f35e
JR
2430 };
2431
ec22aa5c 2432 static const match_table_t tokens = {
cc23e853 2433@@ -439,6 +440,9 @@ static const match_table_t tokens = {
d337f35e
JR
2434 {Opt_acl, "acl"},
2435 {Opt_noacl, "noacl"},
2436 {Opt_xip, "xip"},
2437+ {Opt_tag, "tag"},
2438+ {Opt_notag, "notag"},
2439+ {Opt_tagid, "tagid=%u"},
cc23e853 2440 {Opt_dax, "dax"},
d337f35e
JR
2441 {Opt_grpquota, "grpquota"},
2442 {Opt_ignore, "noquota"},
cc23e853 2443@@ -523,6 +527,20 @@ static int parse_options(char *options,
d337f35e
JR
2444 case Opt_nouid32:
2445 set_opt (sbi->s_mount_opt, NO_UID32);
2446 break;
2447+#ifndef CONFIG_TAGGING_NONE
2448+ case Opt_tag:
2449+ set_opt (sbi->s_mount_opt, TAGGED);
2450+ break;
2451+ case Opt_notag:
2452+ clear_opt (sbi->s_mount_opt, TAGGED);
2453+ break;
2454+#endif
2455+#ifdef CONFIG_PROPAGATE
2456+ case Opt_tagid:
2457+ /* use args[0] */
2458+ set_opt (sbi->s_mount_opt, TAGGED);
2459+ break;
2460+#endif
2461 case Opt_nocheck:
2462 clear_opt (sbi->s_mount_opt, CHECK);
2463 break;
cc23e853 2464@@ -887,6 +905,8 @@ static int ext2_fill_super(struct super_
2bf5ad28 2465 if (!parse_options((char *) data, sb))
d337f35e
JR
2466 goto failed_mount;
2467
cc23e853
AM
2468+ if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAGGED)
2469+ sb->s_flags |= MS_TAGGED;
2470 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2471 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
2472 MS_POSIXACL : 0);
2473@@ -1300,6 +1320,14 @@ static int ext2_remount (struct super_bl
2474 err = -EINVAL;
2475 goto restore_opts;
2476 }
2477+
2478+ if ((sbi->s_mount_opt & EXT2_MOUNT_TAGGED) &&
d337f35e 2479+ !(sb->s_flags & MS_TAGGED)) {
cc23e853
AM
2480+ printk("EXT2-fs: %s: tagging not permitted on remount.\n",
2481+ sb->s_id);
d4263eb0
JR
2482+ err = -EINVAL;
2483+ goto restore_opts;
d337f35e 2484+ }
78865d5b 2485
cc23e853
AM
2486 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2487 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
09a55596
AM
2488diff -NurpP --minimal linux-4.9.135/fs/ext4/ext4.h linux-4.9.135-vs2.3.9.8/fs/ext4/ext4.h
2489--- linux-4.9.135/fs/ext4/ext4.h 2018-10-20 10:39:17.000000000 +0000
2490+++ linux-4.9.135-vs2.3.9.8/fs/ext4/ext4.h 2018-10-20 11:46:17.000000000 +0000
cc23e853 2491@@ -392,8 +392,11 @@ struct flex_groups {
2380c486 2492 #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
78865d5b
AM
2493 #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
2494 #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
b00e13aa 2495+#define EXT4_BARRIER_FL 0x04000000 /* Barrier for chroot() */
2380c486 2496+#define EXT4_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
b00e13aa 2497 #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
cc23e853
AM
2498 #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
2499+#define EXT4_COW_FL 0x40000000 /* Copy on Write marker */
2380c486 2500 #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
d337f35e 2501
cc23e853
AM
2502 #define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
2503@@ -735,7 +738,7 @@ struct ext4_inode {
ec22aa5c
AM
2504 __le16 l_i_uid_high; /* these 2 fields */
2505 __le16 l_i_gid_high; /* were reserved2[0] */
42bc425c
AM
2506 __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
2507- __le16 l_i_reserved;
ec22aa5c 2508+ __le16 l_i_tag; /* Context Tag */
ec22aa5c
AM
2509 } linux2;
2510 struct {
2511 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
cc23e853 2512@@ -892,6 +895,7 @@ do { \
ec22aa5c
AM
2513 #define i_gid_low i_gid
2514 #define i_uid_high osd2.linux2.l_i_uid_high
2515 #define i_gid_high osd2.linux2.l_i_gid_high
2516+#define i_raw_tag osd2.linux2.l_i_tag
42bc425c 2517 #define i_checksum_lo osd2.linux2.l_i_checksum_lo
d337f35e 2518
ec22aa5c 2519 #elif defined(__GNU__)
cc23e853
AM
2520@@ -1133,6 +1137,7 @@ struct ext4_inode_info {
2521 #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
2522 #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
2523 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
2524+#define EXT4_MOUNT_TAGGED 0x2000000 /* Enable Context Tags */
2525 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
2526 #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
2527 #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
09a55596 2528@@ -2492,6 +2497,7 @@ extern int ext4_punch_hole(struct inode
cc23e853
AM
2529 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
2530 extern void ext4_set_inode_flags(struct inode *);
2531 extern void ext4_get_inode_flags(struct ext4_inode_info *);
d4263eb0 2532+extern int ext4_sync_flags(struct inode *, int, int);
cc23e853
AM
2533 extern int ext4_alloc_da_blocks(struct inode *inode);
2534 extern void ext4_set_aops(struct inode *inode);
2535 extern int ext4_writepage_trans_blocks(struct inode *);
09a55596
AM
2536diff -NurpP --minimal linux-4.9.135/fs/ext4/file.c linux-4.9.135-vs2.3.9.8/fs/ext4/file.c
2537--- linux-4.9.135/fs/ext4/file.c 2018-10-20 10:39:17.000000000 +0000
2538+++ linux-4.9.135-vs2.3.9.8/fs/ext4/file.c 2018-10-20 04:58:13.000000000 +0000
cc23e853 2539@@ -691,5 +691,6 @@ const struct inode_operations ext4_file_
a168f21d 2540 .get_acl = ext4_get_acl,
bb20add7 2541 .set_acl = ext4_set_acl,
ec22aa5c 2542 .fiemap = ext4_fiemap,
d337f35e
JR
2543+ .sync_flags = ext4_sync_flags,
2544 };
2545
09a55596
AM
2546diff -NurpP --minimal linux-4.9.135/fs/ext4/ialloc.c linux-4.9.135-vs2.3.9.8/fs/ext4/ialloc.c
2547--- linux-4.9.135/fs/ext4/ialloc.c 2018-10-20 10:39:17.000000000 +0000
2548+++ linux-4.9.135-vs2.3.9.8/fs/ext4/ialloc.c 2018-10-20 05:55:42.000000000 +0000
cc23e853 2549@@ -21,6 +21,7 @@
e22b5178
AM
2550 #include <linux/random.h>
2551 #include <linux/bitops.h>
2552 #include <linux/blkdev.h>
2553+#include <linux/vs_tag.h>
2554 #include <asm/byteorder.h>
2555
2556 #include "ext4.h"
09a55596 2557@@ -777,6 +778,7 @@ struct inode *__ext4_new_inode(handle_t
76514441
AM
2558 inode->i_mode = mode;
2559 inode->i_uid = current_fsuid();
2560 inode->i_gid = dir->i_gid;
a4a22af8 2561+ i_tag_write(inode, dx_current_fstag(sb));
e22b5178 2562 } else
76514441 2563 inode_init_owner(inode, dir, mode);
cc23e853 2564
09a55596
AM
2565diff -NurpP --minimal linux-4.9.135/fs/ext4/inode.c linux-4.9.135-vs2.3.9.8/fs/ext4/inode.c
2566--- linux-4.9.135/fs/ext4/inode.c 2018-10-20 10:39:17.000000000 +0000
2567+++ linux-4.9.135-vs2.3.9.8/fs/ext4/inode.c 2018-10-20 05:55:42.000000000 +0000
cc23e853
AM
2568@@ -37,6 +37,7 @@
2569 #include <linux/printk.h>
2570 #include <linux/slab.h>
52afa9bd 2571 #include <linux/bitops.h>
d337f35e 2572+#include <linux/vs_tag.h>
ec22aa5c 2573
2380c486 2574 #include "ext4_jbd2.h"
d337f35e 2575 #include "xattr.h"
09a55596 2576@@ -4380,12 +4381,15 @@ void ext4_set_inode_flags(struct inode *
d337f35e 2577 unsigned int flags = EXT4_I(inode)->i_flags;
52afa9bd 2578 unsigned int new_fl = 0;
978063ce 2579
d337f35e 2580+ if (flags & EXT4_IMMUTABLE_FL)
52afa9bd 2581+ new_fl |= S_IMMUTABLE;
2380c486 2582+ if (flags & EXT4_IXUNLINK_FL)
52afa9bd 2583+ new_fl |= S_IXUNLINK;
978063ce 2584+
d337f35e 2585 if (flags & EXT4_SYNC_FL)
52afa9bd 2586 new_fl |= S_SYNC;
d337f35e 2587 if (flags & EXT4_APPEND_FL)
52afa9bd 2588 new_fl |= S_APPEND;
d337f35e 2589- if (flags & EXT4_IMMUTABLE_FL)
52afa9bd 2590- new_fl |= S_IMMUTABLE;
d337f35e 2591 if (flags & EXT4_NOATIME_FL)
52afa9bd 2592 new_fl |= S_NOATIME;
d337f35e 2593 if (flags & EXT4_DIRSYNC_FL)
09a55596 2594@@ -4393,31 +4397,52 @@ void ext4_set_inode_flags(struct inode *
cc23e853
AM
2595 if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
2596 new_fl |= S_DAX;
ca5d134c 2597 inode_set_flags(inode, new_fl,
cc23e853
AM
2598- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
2599+ S_IXUNLINK | S_IMMUTABLE | S_DAX |
ca5d134c 2600+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
2380c486 2601+
978063ce 2602+ new_fl = 0;
2380c486 2603+ if (flags & EXT4_BARRIER_FL)
978063ce 2604+ new_fl |= V_BARRIER;
2380c486 2605+ if (flags & EXT4_COW_FL)
978063ce
JR
2606+ new_fl |= V_COW;
2607+
2608+ set_mask_bits(&inode->i_vflags,
2609+ V_BARRIER | V_COW, new_fl);
d337f35e
JR
2610 }
2611
2380c486
JR
2612 /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
2613 void ext4_get_inode_flags(struct ext4_inode_info *ei)
2614 {
76514441
AM
2615- unsigned int vfs_fl;
2616+ unsigned int vfs_fl, vfs_vf;
2617 unsigned long old_fl, new_fl;
2380c486 2618
76514441
AM
2619 do {
2620 vfs_fl = ei->vfs_inode.i_flags;
2621+ vfs_vf = ei->vfs_inode.i_vflags;
2622 old_fl = ei->i_flags;
2623 new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
2624 EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
2625- EXT4_DIRSYNC_FL);
2626+ EXT4_DIRSYNC_FL|EXT4_BARRIER_FL|
2627+ EXT4_COW_FL);
2628+
2629+ if (vfs_fl & S_IMMUTABLE)
2630+ new_fl |= EXT4_IMMUTABLE_FL;
2631+ if (vfs_fl & S_IXUNLINK)
2632+ new_fl |= EXT4_IXUNLINK_FL;
2633+
2634 if (vfs_fl & S_SYNC)
2635 new_fl |= EXT4_SYNC_FL;
2636 if (vfs_fl & S_APPEND)
2637 new_fl |= EXT4_APPEND_FL;
2638- if (vfs_fl & S_IMMUTABLE)
2639- new_fl |= EXT4_IMMUTABLE_FL;
2640 if (vfs_fl & S_NOATIME)
2641 new_fl |= EXT4_NOATIME_FL;
2642 if (vfs_fl & S_DIRSYNC)
2643 new_fl |= EXT4_DIRSYNC_FL;
2644+
2645+ if (vfs_vf & V_BARRIER)
2646+ new_fl |= EXT4_BARRIER_FL;
2647+ if (vfs_vf & V_COW)
2648+ new_fl |= EXT4_COW_FL;
2649 } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
ec22aa5c
AM
2650 }
2651
09a55596 2652@@ -4543,8 +4568,10 @@ struct inode *ext4_iget(struct super_blo
42bc425c
AM
2653 i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2654 i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
d337f35e 2655 }
42bc425c
AM
2656- i_uid_write(inode, i_uid);
2657- i_gid_write(inode, i_gid);
2658+ i_uid_write(inode, INOTAG_UID(DX_TAG(inode), i_uid, i_gid));
2659+ i_gid_write(inode, INOTAG_GID(DX_TAG(inode), i_uid, i_gid));
537831f9
AM
2660+ i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), i_uid, i_gid,
2661+ le16_to_cpu(raw_inode->i_raw_tag)));
cc23e853 2662 ei->i_projid = make_kprojid(&init_user_ns, i_projid);
f6c5ef8b 2663 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2380c486 2664
09a55596 2665@@ -4860,8 +4887,10 @@ static int ext4_do_update_inode(handle_t
d337f35e 2666
2380c486 2667 ext4_get_inode_flags(ei);
d337f35e 2668 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
42bc425c
AM
2669- i_uid = i_uid_read(inode);
2670- i_gid = i_gid_read(inode);
a4a22af8
AM
2671+ i_uid = from_kuid(&init_user_ns,
2672+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag));
2673+ i_gid = from_kgid(&init_user_ns,
2674+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag));
cc23e853 2675 i_projid = from_kprojid(&init_user_ns, ei->i_projid);
ec22aa5c 2676 if (!(test_opt(inode->i_sb, NO_UID32))) {
42bc425c 2677 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
09a55596 2678@@ -4885,6 +4914,9 @@ static int ext4_do_update_inode(handle_t
d337f35e
JR
2679 raw_inode->i_uid_high = 0;
2680 raw_inode->i_gid_high = 0;
2681 }
2682+#ifdef CONFIG_TAGGING_INTERN
537831f9 2683+ raw_inode->i_raw_tag = cpu_to_le16(i_tag_read(inode));
d337f35e
JR
2684+#endif
2685 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
2380c486
JR
2686
2687 EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
09a55596 2688@@ -5138,7 +5170,8 @@ int ext4_setattr(struct dentry *dentry,
cc23e853
AM
2689 return error;
2690 }
42bc425c
AM
2691 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
2692- (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
2693+ (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid)) ||
537831f9 2694+ (ia_valid & ATTR_TAG && !tag_eq(attr->ia_tag, inode->i_tag))) {
d337f35e
JR
2695 handle_t *handle;
2696
2697 /* (user+group)*(old+new) structure, inode write (sb,
09a55596 2698@@ -5161,6 +5194,8 @@ int ext4_setattr(struct dentry *dentry,
d337f35e
JR
2699 inode->i_uid = attr->ia_uid;
2700 if (attr->ia_valid & ATTR_GID)
2701 inode->i_gid = attr->ia_gid;
2702+ if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
2703+ inode->i_tag = attr->ia_tag;
2704 error = ext4_mark_inode_dirty(handle, inode);
2705 ext4_journal_stop(handle);
2706 }
09a55596
AM
2707diff -NurpP --minimal linux-4.9.135/fs/ext4/ioctl.c linux-4.9.135-vs2.3.9.8/fs/ext4/ioctl.c
2708--- linux-4.9.135/fs/ext4/ioctl.c 2016-12-11 19:17:54.000000000 +0000
2709+++ linux-4.9.135-vs2.3.9.8/fs/ext4/ioctl.c 2018-10-20 04:58:13.000000000 +0000
cc23e853 2710@@ -15,6 +15,7 @@
ec22aa5c 2711 #include <linux/file.h>
cc23e853
AM
2712 #include <linux/quotaops.h>
2713 #include <linux/uuid.h>
d337f35e
JR
2714+#include <linux/vs_tag.h>
2715 #include <asm/uaccess.h>
2380c486
JR
2716 #include "ext4_jbd2.h"
2717 #include "ext4.h"
cc23e853
AM
2718@@ -226,7 +227,9 @@ static int ext4_ioctl_setflags(struct in
2719 *
2720 * This test looks nicer. Thanks to Pauline Middelink
2721 */
2722- if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
2723+ if ((oldflags & EXT4_IMMUTABLE_FL) ||
2724+ ((flags ^ oldflags) & (EXT4_APPEND_FL |
2725+ EXT4_IMMUTABLE_FL | EXT4_IXUNLINK_FL))) {
2726 if (!capable(CAP_LINUX_IMMUTABLE))
2727 goto flags_out;
2728 }
2729@@ -430,6 +433,33 @@ static inline unsigned long ext4_xflags_
2730 return iflags;
09be7631 2731 }
db55b927 2732
d4263eb0
JR
2733+int ext4_sync_flags(struct inode *inode, int flags, int vflags)
2734+{
2735+ handle_t *handle = NULL;
2736+ struct ext4_iloc iloc;
2737+ int err;
2738+
b00e13aa 2739+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
d4263eb0
JR
2740+ if (IS_ERR(handle))
2741+ return PTR_ERR(handle);
2742+
2743+ if (IS_SYNC(inode))
2744+ ext4_handle_sync(handle);
2745+ err = ext4_reserve_inode_write(handle, inode, &iloc);
2746+ if (err)
2747+ goto flags_err;
2748+
2749+ inode->i_flags = flags;
2750+ inode->i_vflags = vflags;
2751+ ext4_get_inode_flags(EXT4_I(inode));
2752+ inode->i_ctime = ext4_current_time(inode);
2753+
2754+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2755+flags_err:
2756+ ext4_journal_stop(handle);
2757+ return err;
2758+}
2759+
2760 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2761 {
b00e13aa 2762 struct inode *inode = file_inode(filp);
cc23e853 2763@@ -459,6 +489,11 @@ long ext4_ioctl(struct file *filp, unsig
ec22aa5c
AM
2764
2765 flags = ext4_mask_flags(inode->i_mode, flags);
2380c486
JR
2766
2767+ if (IS_BARRIER(inode)) {
2768+ vxwprintk_task(1, "messing with the barrier.");
2769+ return -EACCES;
2770+ }
2771+
cc23e853
AM
2772 inode_lock(inode);
2773 err = ext4_ioctl_setflags(inode, flags);
2774 inode_unlock(inode);
09a55596
AM
2775diff -NurpP --minimal linux-4.9.135/fs/ext4/namei.c linux-4.9.135-vs2.3.9.8/fs/ext4/namei.c
2776--- linux-4.9.135/fs/ext4/namei.c 2018-10-20 10:39:17.000000000 +0000
2777+++ linux-4.9.135-vs2.3.9.8/fs/ext4/namei.c 2018-10-20 05:55:42.000000000 +0000
cc23e853 2778@@ -33,6 +33,7 @@
2380c486 2779 #include <linux/quotaops.h>
d337f35e
JR
2780 #include <linux/buffer_head.h>
2781 #include <linux/bio.h>
d337f35e 2782+#include <linux/vs_tag.h>
2380c486
JR
2783 #include "ext4.h"
2784 #include "ext4_jbd2.h"
d337f35e 2785
09a55596 2786@@ -1460,6 +1461,7 @@ restart:
cc23e853 2787 REQ_META | REQ_PRIO,
a168f21d 2788 1, &bh);
2380c486 2789 }
d337f35e 2790+ dx_propagate_tag(nd, inode);
2380c486
JR
2791 }
2792 if ((bh = bh_use[ra_ptr++]) == NULL)
2793 goto next;
09a55596 2794@@ -3911,6 +3913,7 @@ const struct inode_operations ext4_dir_i
a168f21d 2795 .get_acl = ext4_get_acl,
bb20add7 2796 .set_acl = ext4_set_acl,
d4263eb0 2797 .fiemap = ext4_fiemap,
d337f35e
JR
2798+ .sync_flags = ext4_sync_flags,
2799 };
d4263eb0
JR
2800
2801 const struct inode_operations ext4_special_inode_operations = {
09a55596
AM
2802diff -NurpP --minimal linux-4.9.135/fs/ext4/super.c linux-4.9.135-vs2.3.9.8/fs/ext4/super.c
2803--- linux-4.9.135/fs/ext4/super.c 2018-10-20 10:39:17.000000000 +0000
2804+++ linux-4.9.135-vs2.3.9.8/fs/ext4/super.c 2018-10-20 05:55:42.000000000 +0000
2805@@ -1281,6 +1281,7 @@ enum {
78865d5b 2806 Opt_dioread_nolock, Opt_dioread_lock,
dd5f3080 2807 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
cc23e853
AM
2808 Opt_max_dir_size_kb, Opt_nojournal_checksum,
2809+ Opt_tag, Opt_notag, Opt_tagid
d337f35e
JR
2810 };
2811
ec22aa5c 2812 static const match_table_t tokens = {
09a55596 2813@@ -1367,6 +1368,9 @@ static const match_table_t tokens = {
1e8b8f9b
AM
2814 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
2815 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
2816 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
d337f35e
JR
2817+ {Opt_tag, "tag"},
2818+ {Opt_notag, "notag"},
2819+ {Opt_tagid, "tagid=%u"},
d337f35e 2820 {Opt_err, NULL},
d337f35e 2821 };
2380c486 2822
09a55596 2823@@ -1612,6 +1616,20 @@ static int handle_mount_opt(struct super
cc23e853
AM
2824 case Opt_nolazytime:
2825 sb->s_flags &= ~MS_LAZYTIME;
1e8b8f9b 2826 return 1;
d337f35e 2827+#ifndef CONFIG_TAGGING_NONE
1e8b8f9b
AM
2828+ case Opt_tag:
2829+ set_opt(sb, TAGGED);
2830+ return 1;
2831+ case Opt_notag:
2832+ clear_opt(sb, TAGGED);
2833+ return 1;
d337f35e
JR
2834+#endif
2835+#ifdef CONFIG_PROPAGATE
1e8b8f9b
AM
2836+ case Opt_tagid:
2837+ /* use args[0] */
2838+ set_opt(sb, TAGGED);
2839+ return 1;
d337f35e 2840+#endif
1e8b8f9b
AM
2841 }
2842
b00e13aa 2843 for (m = ext4_mount_opts; m->token != Opt_err; m++)
09a55596 2844@@ -3587,6 +3605,9 @@ static int ext4_fill_super(struct super_
cc23e853 2845 sb->s_iflags |= SB_I_CGROUPWB;
f6c5ef8b 2846 }
d337f35e
JR
2847
2848+ if (EXT4_SB(sb)->s_mount_opt & EXT4_MOUNT_TAGGED)
2849+ sb->s_flags |= MS_TAGGED;
2850+
2851 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
78865d5b 2852 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
d337f35e 2853
09a55596 2854@@ -4984,6 +5005,14 @@ static int ext4_remount(struct super_blo
ec22aa5c 2855 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
93de0823 2856 ext4_abort(sb, "Abort forced by user");
2380c486 2857
d337f35e
JR
2858+ if ((sbi->s_mount_opt & EXT4_MOUNT_TAGGED) &&
2859+ !(sb->s_flags & MS_TAGGED)) {
2860+ printk("EXT4-fs: %s: tagging not permitted on remount.\n",
2861+ sb->s_id);
d4263eb0
JR
2862+ err = -EINVAL;
2863+ goto restore_opts;
d337f35e 2864+ }
2380c486 2865+
d337f35e 2866 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
78865d5b 2867 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
d337f35e 2868
09a55596
AM
2869diff -NurpP --minimal linux-4.9.135/fs/fcntl.c linux-4.9.135-vs2.3.9.8/fs/fcntl.c
2870--- linux-4.9.135/fs/fcntl.c 2018-10-20 10:39:17.000000000 +0000
2871+++ linux-4.9.135-vs2.3.9.8/fs/fcntl.c 2018-10-20 04:58:13.000000000 +0000
bb20add7 2872@@ -22,6 +22,7 @@
2380c486 2873 #include <linux/pid_namespace.h>
92598135 2874 #include <linux/user_namespace.h>
bb20add7 2875 #include <linux/shmem_fs.h>
d337f35e
JR
2876+#include <linux/vs_limit.h>
2877
2878 #include <asm/poll.h>
2879 #include <asm/siginfo.h>
5ba7a31c 2880@@ -390,6 +391,8 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, f
d337f35e 2881
537831f9 2882 if (!f.file)
2380c486
JR
2883 goto out;
2884+ if (!vx_files_avail(1))
2885+ goto out;
2886
537831f9 2887 if (unlikely(f.file->f_mode & FMODE_PATH)) {
42bc425c 2888 if (!check_fcntl_cmd(cmd))
09a55596
AM
2889diff -NurpP --minimal linux-4.9.135/fs/file.c linux-4.9.135-vs2.3.9.8/fs/file.c
2890--- linux-4.9.135/fs/file.c 2016-12-11 19:17:54.000000000 +0000
2891+++ linux-4.9.135-vs2.3.9.8/fs/file.c 2018-10-20 04:58:13.000000000 +0000
537831f9 2892@@ -22,6 +22,7 @@
2380c486
JR
2893 #include <linux/spinlock.h>
2894 #include <linux/rcupdate.h>
2895 #include <linux/workqueue.h>
2896+#include <linux/vs_limit.h>
2897
cc23e853
AM
2898 unsigned int sysctl_nr_open __read_mostly = 1024*1024;
2899 unsigned int sysctl_nr_open_min = BITS_PER_LONG;
2900@@ -357,6 +358,8 @@ struct files_struct *dup_fd(struct files
2380c486
JR
2901 struct file *f = *old_fds++;
2902 if (f) {
2903 get_file(f);
2904+ /* TODO: sum it first for check and performance */
2905+ vx_openfd_inc(open_files - i);
2906 } else {
2907 /*
2908 * The fd may be claimed in the fd bitmap but not yet
cc23e853 2909@@ -406,9 +409,11 @@ static struct fdtable *close_files(struc
537831f9 2910 filp_close(file, files);
bb20add7 2911 cond_resched_rcu_qs();
537831f9
AM
2912 }
2913+ vx_openfd_dec(i);
2914 }
2915 i++;
2916 set >>= 1;
2917+ cond_resched();
2918 }
2919 }
bb20add7 2920
cc23e853 2921@@ -539,6 +544,7 @@ repeat:
2380c486 2922 else
1e8b8f9b 2923 __clear_close_on_exec(fd, fdt);
2380c486
JR
2924 error = fd;
2925+ vx_openfd_inc(fd);
2926 #if 1
2927 /* Sanity check */
bb20add7 2928 if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
cc23e853 2929@@ -569,6 +575,7 @@ static void __put_unused_fd(struct files
537831f9
AM
2930 __clear_open_fd(fd, fdt);
2931 if (fd < files->next_fd)
2932 files->next_fd = fd;
2933+ vx_openfd_dec(fd);
2934 }
2935
2936 void put_unused_fd(unsigned int fd)
cc23e853 2937@@ -856,6 +863,8 @@ __releases(&files->file_lock)
537831f9
AM
2938
2939 if (tofree)
2940 filp_close(tofree, files);
2941+ else
2942+ vx_openfd_inc(fd); /* fd was unused */
2943
2944 return fd;
2945
09a55596
AM
2946diff -NurpP --minimal linux-4.9.135/fs/file_table.c linux-4.9.135-vs2.3.9.8/fs/file_table.c
2947--- linux-4.9.135/fs/file_table.c 2016-12-11 19:17:54.000000000 +0000
2948+++ linux-4.9.135-vs2.3.9.8/fs/file_table.c 2018-10-20 04:58:13.000000000 +0000
92598135 2949@@ -26,6 +26,8 @@
92598135 2950 #include <linux/task_work.h>
2bf5ad28 2951 #include <linux/ima.h>
cc23e853 2952 #include <linux/swap.h>
d337f35e
JR
2953+#include <linux/vs_limit.h>
2954+#include <linux/vs_context.h>
2955
a168f21d 2956 #include <linux/atomic.h>
d337f35e 2957
c2e5f7c8 2958@@ -137,6 +139,8 @@ struct file *get_empty_filp(void)
bb20add7 2959 mutex_init(&f->f_pos_lock);
d337f35e
JR
2960 eventpoll_init_file(f);
2961 /* f->f_version: 0 */
2962+ f->f_xid = vx_current_xid();
2963+ vx_files_inc(f);
2964 return f;
2965
2966 over:
bb20add7 2967@@ -219,6 +223,8 @@ static void __fput(struct file *file)
265de2f7
JR
2968 put_write_access(inode);
2969 __mnt_drop_write(mnt);
2970 }
d337f35e
JR
2971+ vx_files_dec(file);
2972+ file->f_xid = 0;
92598135
AM
2973 file->f_path.dentry = NULL;
2974 file->f_path.mnt = NULL;
b00e13aa 2975 file->f_inode = NULL;
bb20add7 2976@@ -305,6 +311,8 @@ void put_filp(struct file *file)
d337f35e 2977 {
2380c486 2978 if (atomic_long_dec_and_test(&file->f_count)) {
d337f35e
JR
2979 security_file_free(file);
2980+ vx_files_dec(file);
2981+ file->f_xid = 0;
d337f35e
JR
2982 file_free(file);
2983 }
c2e5f7c8 2984 }
09a55596
AM
2985diff -NurpP --minimal linux-4.9.135/fs/fs_struct.c linux-4.9.135-vs2.3.9.8/fs/fs_struct.c
2986--- linux-4.9.135/fs/fs_struct.c 2016-12-11 19:17:54.000000000 +0000
2987+++ linux-4.9.135-vs2.3.9.8/fs/fs_struct.c 2018-10-20 04:58:13.000000000 +0000
ec22aa5c
AM
2988@@ -4,6 +4,7 @@
2989 #include <linux/path.h>
2990 #include <linux/slab.h>
2991 #include <linux/fs_struct.h>
2992+#include <linux/vserver/global.h>
d33d7b00 2993 #include "internal.h"
ec22aa5c 2994
92598135
AM
2995 /*
2996@@ -87,6 +88,7 @@ void free_fs_struct(struct fs_struct *fs
ec22aa5c 2997 {
92598135
AM
2998 path_put(&fs->root);
2999 path_put(&fs->pwd);
ec22aa5c
AM
3000+ atomic_dec(&vs_global_fs);
3001 kmem_cache_free(fs_cachep, fs);
3002 }
3003
537831f9 3004@@ -124,6 +126,7 @@ struct fs_struct *copy_fs_struct(struct
d33d7b00 3005 fs->pwd = old->pwd;
92598135 3006 path_get(&fs->pwd);
d33d7b00 3007 spin_unlock(&old->lock);
ec22aa5c
AM
3008+ atomic_inc(&vs_global_fs);
3009 }
3010 return fs;
3011 }
09a55596
AM
3012diff -NurpP --minimal linux-4.9.135/fs/gfs2/file.c linux-4.9.135-vs2.3.9.8/fs/gfs2/file.c
3013--- linux-4.9.135/fs/gfs2/file.c 2018-10-20 10:39:17.000000000 +0000
3014+++ linux-4.9.135-vs2.3.9.8/fs/gfs2/file.c 2018-10-20 04:58:13.000000000 +0000
cc23e853 3015@@ -137,6 +137,9 @@ static const u32 fsflags_to_gfs2[32] = {
e22b5178
AM
3016 [12] = GFS2_DIF_EXHASH,
3017 [14] = GFS2_DIF_INHERIT_JDATA,
92598135 3018 [17] = GFS2_DIF_TOPDIR,
e22b5178
AM
3019+ [27] = GFS2_DIF_IXUNLINK,
3020+ [26] = GFS2_DIF_BARRIER,
3021+ [29] = GFS2_DIF_COW,
3022 };
3023
3024 static const u32 gfs2_to_fsflags[32] = {
cc23e853 3025@@ -147,6 +150,9 @@ static const u32 gfs2_to_fsflags[32] = {
e22b5178 3026 [gfs2fl_ExHash] = FS_INDEX_FL,
92598135 3027 [gfs2fl_TopLevel] = FS_TOPDIR_FL,
e22b5178
AM
3028 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
3029+ [gfs2fl_IXUnlink] = FS_IXUNLINK_FL,
3030+ [gfs2fl_Barrier] = FS_BARRIER_FL,
3031+ [gfs2fl_Cow] = FS_COW_FL,
3032 };
3033
3034 static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
cc23e853 3035@@ -178,12 +184,17 @@ void gfs2_set_inode_flags(struct inode *
e22b5178
AM
3036 {
3037 struct gfs2_inode *ip = GFS2_I(inode);
3038 unsigned int flags = inode->i_flags;
3039+ unsigned int vflags = inode->i_vflags;
3040+
3041+ flags &= ~(S_IMMUTABLE | S_IXUNLINK |
a168f21d 3042+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC | S_NOSEC);
e22b5178 3043
a168f21d
AM
3044- flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
3045 if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
cc23e853 3046 flags |= S_NOSEC;
e22b5178
AM
3047 if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
3048 flags |= S_IMMUTABLE;
3049+ if (ip->i_diskflags & GFS2_DIF_IXUNLINK)
3050+ flags |= S_IXUNLINK;
e22b5178
AM
3051 if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
3052 flags |= S_APPEND;
3053 if (ip->i_diskflags & GFS2_DIF_NOATIME)
cc23e853 3054@@ -191,6 +202,43 @@ void gfs2_set_inode_flags(struct inode *
e22b5178
AM
3055 if (ip->i_diskflags & GFS2_DIF_SYNC)
3056 flags |= S_SYNC;
3057 inode->i_flags = flags;
3058+
3059+ vflags &= ~(V_BARRIER | V_COW);
3060+
3061+ if (ip->i_diskflags & GFS2_DIF_BARRIER)
3062+ vflags |= V_BARRIER;
3063+ if (ip->i_diskflags & GFS2_DIF_COW)
3064+ vflags |= V_COW;
3065+ inode->i_vflags = vflags;
3066+}
3067+
3068+void gfs2_get_inode_flags(struct inode *inode)
3069+{
3070+ struct gfs2_inode *ip = GFS2_I(inode);
3071+ unsigned int flags = inode->i_flags;
3072+ unsigned int vflags = inode->i_vflags;
3073+
3074+ ip->i_diskflags &= ~(GFS2_DIF_APPENDONLY |
3075+ GFS2_DIF_NOATIME | GFS2_DIF_SYNC |
3076+ GFS2_DIF_IMMUTABLE | GFS2_DIF_IXUNLINK |
3077+ GFS2_DIF_BARRIER | GFS2_DIF_COW);
3078+
3079+ if (flags & S_IMMUTABLE)
3080+ ip->i_diskflags |= GFS2_DIF_IMMUTABLE;
3081+ if (flags & S_IXUNLINK)
3082+ ip->i_diskflags |= GFS2_DIF_IXUNLINK;
3083+
3084+ if (flags & S_APPEND)
3085+ ip->i_diskflags |= GFS2_DIF_APPENDONLY;
3086+ if (flags & S_NOATIME)
3087+ ip->i_diskflags |= GFS2_DIF_NOATIME;
3088+ if (flags & S_SYNC)
3089+ ip->i_diskflags |= GFS2_DIF_SYNC;
3090+
3091+ if (vflags & V_BARRIER)
3092+ ip->i_diskflags |= GFS2_DIF_BARRIER;
3093+ if (vflags & V_COW)
3094+ ip->i_diskflags |= GFS2_DIF_COW;
3095 }
3096
3097 /* Flags that can be set by user space */
cc23e853
AM
3098@@ -306,6 +354,37 @@ static int gfs2_set_flags(struct file *f
3099 return do_gfs2_set_flags(filp, gfsflags, ~(GFS2_DIF_SYSTEM | GFS2_DIF_JDATA));
e22b5178
AM
3100 }
3101
3102+int gfs2_sync_flags(struct inode *inode, int flags, int vflags)
3103+{
3104+ struct gfs2_inode *ip = GFS2_I(inode);
3105+ struct gfs2_sbd *sdp = GFS2_SB(inode);
3106+ struct buffer_head *bh;
3107+ struct gfs2_holder gh;
3108+ int error;
3109+
3110+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
3111+ if (error)
3112+ return error;
3113+ error = gfs2_trans_begin(sdp, RES_DINODE, 0);
3114+ if (error)
3115+ goto out;
3116+ error = gfs2_meta_inode_buffer(ip, &bh);
3117+ if (error)
3118+ goto out_trans_end;
b00e13aa 3119+ gfs2_trans_add_meta(ip->i_gl, bh);
e22b5178
AM
3120+ inode->i_flags = flags;
3121+ inode->i_vflags = vflags;
3122+ gfs2_get_inode_flags(inode);
3123+ gfs2_dinode_out(ip, bh->b_data);
3124+ brelse(bh);
3125+ gfs2_set_aops(inode);
3126+out_trans_end:
3127+ gfs2_trans_end(sdp);
3128+out:
3129+ gfs2_glock_dq_uninit(&gh);
3130+ return error;
3131+}
3132+
3133 static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3134 {
3135 switch(cmd) {
09a55596
AM
3136diff -NurpP --minimal linux-4.9.135/fs/gfs2/inode.h linux-4.9.135-vs2.3.9.8/fs/gfs2/inode.h
3137--- linux-4.9.135/fs/gfs2/inode.h 2016-12-11 19:17:54.000000000 +0000
3138+++ linux-4.9.135-vs2.3.9.8/fs/gfs2/inode.h 2018-10-20 04:58:13.000000000 +0000
cc23e853 3139@@ -117,6 +117,7 @@ extern const struct file_operations gfs2
e22b5178
AM
3140 extern const struct file_operations gfs2_dir_fops_nolock;
3141
3142 extern void gfs2_set_inode_flags(struct inode *inode);
3143+extern int gfs2_sync_flags(struct inode *inode, int flags, int vflags);
3144
3145 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
3146 extern const struct file_operations gfs2_file_fops;
09a55596
AM
3147diff -NurpP --minimal linux-4.9.135/fs/hostfs/hostfs.h linux-4.9.135-vs2.3.9.8/fs/hostfs/hostfs.h
3148--- linux-4.9.135/fs/hostfs/hostfs.h 2016-12-11 19:17:54.000000000 +0000
3149+++ linux-4.9.135-vs2.3.9.8/fs/hostfs/hostfs.h 2018-10-20 04:58:13.000000000 +0000
537831f9
AM
3150@@ -42,6 +42,7 @@ struct hostfs_iattr {
3151 unsigned short ia_mode;
3152 uid_t ia_uid;
3153 gid_t ia_gid;
61333608 3154+ vtag_t ia_tag;
537831f9
AM
3155 loff_t ia_size;
3156 struct timespec ia_atime;
3157 struct timespec ia_mtime;
09a55596
AM
3158diff -NurpP --minimal linux-4.9.135/fs/inode.c linux-4.9.135-vs2.3.9.8/fs/inode.c
3159--- linux-4.9.135/fs/inode.c 2018-10-20 10:39:17.000000000 +0000
3160+++ linux-4.9.135-vs2.3.9.8/fs/inode.c 2018-10-20 04:58:13.000000000 +0000
c2e5f7c8 3161@@ -18,6 +18,7 @@
763640ca 3162 #include <linux/buffer_head.h> /* for inode_has_buffers */
db55b927 3163 #include <linux/ratelimit.h>
c2e5f7c8 3164 #include <linux/list_lru.h>
76514441 3165+#include <linux/vs_tag.h>
cc23e853 3166 #include <trace/events/writeback.h>
763640ca 3167 #include "internal.h"
76514441 3168
cc23e853 3169@@ -133,6 +134,8 @@ int inode_init_always(struct super_block
ec22aa5c
AM
3170 struct address_space *const mapping = &inode->i_data;
3171
3172 inode->i_sb = sb;
3173+
3174+ /* essential because of inode slab reuse */
ec22aa5c
AM
3175 inode->i_blkbits = sb->s_blocksize_bits;
3176 inode->i_flags = 0;
3177 atomic_set(&inode->i_count, 1);
cc23e853
AM
3178@@ -144,6 +147,7 @@ int inode_init_always(struct super_block
3179 inode->i_opflags |= IOP_XATTR;
537831f9
AM
3180 i_uid_write(inode, 0);
3181 i_gid_write(inode, 0);
3182+ i_tag_write(inode, 0);
3183 atomic_set(&inode->i_writecount, 0);
3184 inode->i_size = 0;
3185 inode->i_blocks = 0;
cc23e853
AM
3186@@ -155,6 +159,7 @@ int inode_init_always(struct super_block
3187 inode->i_link = NULL;
3188 inode->i_dir_seq = 0;
ec22aa5c
AM
3189 inode->i_rdev = 0;
3190+ inode->i_mdev = 0;
3191 inode->dirtied_when = 0;
3192
cc23e853
AM
3193 #ifdef CONFIG_CGROUP_WRITEBACK
3194@@ -479,6 +484,8 @@ void __insert_inode_hash(struct inode *i
d337f35e 3195 }
763640ca 3196 EXPORT_SYMBOL(__insert_inode_hash);
d337f35e
JR
3197
3198+EXPORT_SYMBOL_GPL(__iget);
3199+
3200 /**
a168f21d 3201 * __remove_inode_hash - remove an inode from the hash
ab30d09f 3202 * @inode: inode to unhash
cc23e853 3203@@ -1977,9 +1984,11 @@ void init_special_inode(struct inode *in
2380c486
JR
3204 if (S_ISCHR(mode)) {
3205 inode->i_fop = &def_chr_fops;
3206 inode->i_rdev = rdev;
3207+ inode->i_mdev = rdev;
3208 } else if (S_ISBLK(mode)) {
3209 inode->i_fop = &def_blk_fops;
3210 inode->i_rdev = rdev;
3211+ inode->i_mdev = rdev;
3212 } else if (S_ISFIFO(mode))
09be7631 3213 inode->i_fop = &pipefifo_fops;
2380c486 3214 else if (S_ISSOCK(mode))
09a55596 3215@@ -2014,6 +2023,7 @@ void inode_init_owner(struct inode *inod
76514441
AM
3216 } else
3217 inode->i_gid = current_fsgid();
3218 inode->i_mode = mode;
8ce283e1 3219+ i_tag_write(inode, dx_current_fstag(inode->i_sb));
76514441
AM
3220 }
3221 EXPORT_SYMBOL(inode_init_owner);
763640ca 3222
09a55596
AM
3223diff -NurpP --minimal linux-4.9.135/fs/ioctl.c linux-4.9.135-vs2.3.9.8/fs/ioctl.c
3224--- linux-4.9.135/fs/ioctl.c 2016-12-11 19:17:54.000000000 +0000
3225+++ linux-4.9.135-vs2.3.9.8/fs/ioctl.c 2018-10-20 04:58:13.000000000 +0000
ab30d09f 3226@@ -15,6 +15,9 @@
ec22aa5c
AM
3227 #include <linux/writeback.h>
3228 #include <linux/buffer_head.h>
3229 #include <linux/falloc.h>
d337f35e
JR
3230+#include <linux/proc_fs.h>
3231+#include <linux/vserver/inode.h>
3232+#include <linux/vs_tag.h>
cc23e853 3233 #include "internal.h"
d337f35e 3234
d337f35e 3235 #include <asm/ioctls.h>
09a55596
AM
3236diff -NurpP --minimal linux-4.9.135/fs/jfs/file.c linux-4.9.135-vs2.3.9.8/fs/jfs/file.c
3237--- linux-4.9.135/fs/jfs/file.c 2016-12-11 19:17:54.000000000 +0000
3238+++ linux-4.9.135-vs2.3.9.8/fs/jfs/file.c 2018-10-20 04:58:13.000000000 +0000
cc23e853
AM
3239@@ -113,7 +113,8 @@ int jfs_setattr(struct dentry *dentry, s
3240 return rc;
3241 }
537831f9
AM
3242 if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
3243- (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
3244+ (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) ||
3245+ (iattr->ia_valid & ATTR_TAG && !tag_eq(iattr->ia_tag, inode->i_tag))) {
78865d5b
AM
3246 rc = dquot_transfer(inode, iattr);
3247 if (rc)
3248 return rc;
bb20add7 3249@@ -146,6 +147,7 @@ const struct inode_operations jfs_file_i
a168f21d 3250 .get_acl = jfs_get_acl,
bb20add7 3251 .set_acl = jfs_set_acl,
d337f35e
JR
3252 #endif
3253+ .sync_flags = jfs_sync_flags,
3254 };
3255
3256 const struct file_operations jfs_file_operations = {
09a55596
AM
3257diff -NurpP --minimal linux-4.9.135/fs/jfs/ioctl.c linux-4.9.135-vs2.3.9.8/fs/jfs/ioctl.c
3258--- linux-4.9.135/fs/jfs/ioctl.c 2016-12-11 19:17:54.000000000 +0000
3259+++ linux-4.9.135-vs2.3.9.8/fs/jfs/ioctl.c 2018-10-20 04:58:13.000000000 +0000
537831f9 3260@@ -12,6 +12,7 @@
d337f35e 3261 #include <linux/time.h>
2380c486 3262 #include <linux/sched.h>
537831f9 3263 #include <linux/blkdev.h>
d337f35e
JR
3264+#include <linux/mount.h>
3265 #include <asm/current.h>
3266 #include <asm/uaccess.h>
3267
537831f9 3268@@ -56,6 +57,16 @@ static long jfs_map_ext2(unsigned long f
d4263eb0
JR
3269 }
3270
3271
3272+int jfs_sync_flags(struct inode *inode, int flags, int vflags)
3273+{
3274+ inode->i_flags = flags;
3275+ inode->i_vflags = vflags;
3276+ jfs_get_inode_flags(JFS_IP(inode));
3277+ inode->i_ctime = CURRENT_TIME_SEC;
3278+ mark_inode_dirty(inode);
3279+ return 0;
3280+}
3281+
3282 long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
3283 {
b00e13aa 3284 struct inode *inode = file_inode(filp);
537831f9 3285@@ -89,6 +100,11 @@ long jfs_ioctl(struct file *filp, unsign
2380c486
JR
3286 if (!S_ISDIR(inode->i_mode))
3287 flags &= ~JFS_DIRSYNC_FL;
d337f35e 3288
2380c486
JR
3289+ if (IS_BARRIER(inode)) {
3290+ vxwprintk_task(1, "messing with the barrier.");
3291+ return -EACCES;
3292+ }
3293+
3294 /* Is it quota file? Do not allow user to mess with it */
3295 if (IS_NOQUOTA(inode)) {
3296 err = -EPERM;
537831f9 3297@@ -106,8 +122,8 @@ long jfs_ioctl(struct file *filp, unsign
d337f35e
JR
3298 * the relevant capability.
3299 */
3300 if ((oldflags & JFS_IMMUTABLE_FL) ||
3301- ((flags ^ oldflags) &
3302- (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
3303+ ((flags ^ oldflags) & (JFS_APPEND_FL |
2380c486
JR
3304+ JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL))) {
3305 if (!capable(CAP_LINUX_IMMUTABLE)) {
cc23e853 3306 inode_unlock(inode);
2380c486 3307 err = -EPERM;
537831f9 3308@@ -115,7 +131,7 @@ long jfs_ioctl(struct file *filp, unsign
d4263eb0
JR
3309 }
3310 }
3311
3312- flags = flags & JFS_FL_USER_MODIFIABLE;
3313+ flags &= JFS_FL_USER_MODIFIABLE;
3314 flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
3315 jfs_inode->mode2 = flags;
3316
09a55596
AM
3317diff -NurpP --minimal linux-4.9.135/fs/jfs/jfs_dinode.h linux-4.9.135-vs2.3.9.8/fs/jfs/jfs_dinode.h
3318--- linux-4.9.135/fs/jfs/jfs_dinode.h 2016-12-11 19:17:54.000000000 +0000
3319+++ linux-4.9.135-vs2.3.9.8/fs/jfs/jfs_dinode.h 2018-10-20 04:58:13.000000000 +0000
2380c486
JR
3320@@ -161,9 +161,13 @@ struct dinode {
3321
d337f35e
JR
3322 #define JFS_APPEND_FL 0x01000000 /* writes to file may only append */
3323 #define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */
2380c486 3324+#define JFS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
d337f35e
JR
3325
3326-#define JFS_FL_USER_VISIBLE 0x03F80000
2380c486 3327-#define JFS_FL_USER_MODIFIABLE 0x03F80000
d337f35e 3328+#define JFS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
2380c486 3329+#define JFS_COW_FL 0x20000000 /* Copy on Write marker */
d337f35e 3330+
2380c486
JR
3331+#define JFS_FL_USER_VISIBLE 0x07F80000
3332+#define JFS_FL_USER_MODIFIABLE 0x07F80000
3333 #define JFS_FL_INHERIT 0x03C80000
d337f35e
JR
3334
3335 /* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
09a55596
AM
3336diff -NurpP --minimal linux-4.9.135/fs/jfs/jfs_filsys.h linux-4.9.135-vs2.3.9.8/fs/jfs/jfs_filsys.h
3337--- linux-4.9.135/fs/jfs/jfs_filsys.h 2016-12-11 19:17:54.000000000 +0000
3338+++ linux-4.9.135-vs2.3.9.8/fs/jfs/jfs_filsys.h 2018-10-20 04:58:13.000000000 +0000
537831f9 3339@@ -266,6 +266,7 @@
ec22aa5c
AM
3340 #define JFS_NAME_MAX 255
3341 #define JFS_PATH_MAX BPSIZE
bd427b06 3342
ec22aa5c 3343+#define JFS_TAGGED 0x00800000 /* Context Tagging */
bd427b06 3344
ec22aa5c
AM
3345 /*
3346 * file system state (superblock state)
09a55596
AM
3347diff -NurpP --minimal linux-4.9.135/fs/jfs/jfs_imap.c linux-4.9.135-vs2.3.9.8/fs/jfs/jfs_imap.c
3348--- linux-4.9.135/fs/jfs/jfs_imap.c 2016-12-11 19:17:54.000000000 +0000
3349+++ linux-4.9.135-vs2.3.9.8/fs/jfs/jfs_imap.c 2018-10-20 04:58:13.000000000 +0000
78865d5b 3350@@ -46,6 +46,7 @@
ec22aa5c
AM
3351 #include <linux/pagemap.h>
3352 #include <linux/quotaops.h>
78865d5b 3353 #include <linux/slab.h>
ec22aa5c 3354+#include <linux/vs_tag.h>
bd427b06 3355
ec22aa5c
AM
3356 #include "jfs_incore.h"
3357 #include "jfs_inode.h"
cc23e853 3358@@ -3046,6 +3047,8 @@ static int copy_from_dinode(struct dinod
ec22aa5c
AM
3359 {
3360 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3361 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
a4a22af8
AM
3362+ kuid_t kuid;
3363+ kgid_t kgid;
bd427b06 3364
ec22aa5c
AM
3365 jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3366 jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
cc23e853 3367@@ -3066,14 +3069,18 @@ static int copy_from_dinode(struct dinod
d337f35e 3368 }
f6c5ef8b 3369 set_nlink(ip, le32_to_cpu(dip->di_nlink));
bd427b06 3370
537831f9 3371- jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
a4a22af8
AM
3372+ kuid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3373+ kgid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3374+ ip->i_tag = INOTAG_KTAG(DX_TAG(ip), kuid, kgid, GLOBAL_ROOT_TAG);
ec22aa5c 3375+
a4a22af8 3376+ jfs_ip->saved_uid = INOTAG_KUID(DX_TAG(ip), kuid, kgid);
537831f9 3377 if (!uid_valid(sbi->uid))
ec22aa5c
AM
3378 ip->i_uid = jfs_ip->saved_uid;
3379 else {
3380 ip->i_uid = sbi->uid;
bd427b06
AM
3381 }
3382
537831f9 3383- jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
a4a22af8 3384+ jfs_ip->saved_gid = INOTAG_KGID(DX_TAG(ip), kuid, kgid);
537831f9 3385 if (!gid_valid(sbi->gid))
d337f35e
JR
3386 ip->i_gid = jfs_ip->saved_gid;
3387 else {
cc23e853 3388@@ -3138,16 +3145,14 @@ static void copy_to_dinode(struct dinode
d337f35e
JR
3389 dip->di_size = cpu_to_le64(ip->i_size);
3390 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3391 dip->di_nlink = cpu_to_le32(ip->i_nlink);
537831f9
AM
3392- if (!uid_valid(sbi->uid))
3393- dip->di_uid = cpu_to_le32(i_uid_read(ip));
d337f35e 3394- else
537831f9
AM
3395- dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns,
3396- jfs_ip->saved_uid));
3397- if (!gid_valid(sbi->gid))
3398- dip->di_gid = cpu_to_le32(i_gid_read(ip));
d337f35e 3399- else
537831f9
AM
3400- dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3401- jfs_ip->saved_gid));
3402+ dip->di_uid = cpu_to_le32(from_kuid(&init_user_ns,
a4a22af8 3403+ TAGINO_KUID(DX_TAG(ip),
537831f9
AM
3404+ !uid_valid(sbi->uid) ? ip->i_uid : jfs_ip->saved_uid,
3405+ ip->i_tag)));
a4a22af8
AM
3406+ dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3407+ TAGINO_KGID(DX_TAG(ip),
537831f9
AM
3408+ !gid_valid(sbi->gid) ? ip->i_gid : jfs_ip->saved_gid,
3409+ ip->i_tag)));
2380c486 3410 jfs_get_inode_flags(jfs_ip);
d337f35e
JR
3411 /*
3412 * mode2 is only needed for storing the higher order bits.
09a55596
AM
3413diff -NurpP --minimal linux-4.9.135/fs/jfs/jfs_inode.c linux-4.9.135-vs2.3.9.8/fs/jfs/jfs_inode.c
3414--- linux-4.9.135/fs/jfs/jfs_inode.c 2016-12-11 19:17:54.000000000 +0000
3415+++ linux-4.9.135-vs2.3.9.8/fs/jfs/jfs_inode.c 2018-10-20 04:58:14.000000000 +0000
e22b5178
AM
3416@@ -18,6 +18,7 @@
3417
3418 #include <linux/fs.h>
3419 #include <linux/quotaops.h>
3420+#include <linux/vs_tag.h>
3421 #include "jfs_incore.h"
3422 #include "jfs_inode.h"
3423 #include "jfs_filsys.h"
cc23e853 3424@@ -33,6 +34,9 @@ void jfs_set_inode_flags(struct inode *i
d337f35e
JR
3425
3426 if (flags & JFS_IMMUTABLE_FL)
bb20add7 3427 new_fl |= S_IMMUTABLE;
2380c486 3428+ if (flags & JFS_IXUNLINK_FL)
cc23e853 3429+ new_fl |= S_IXUNLINK;
d337f35e 3430+
d337f35e 3431 if (flags & JFS_APPEND_FL)
bb20add7 3432 new_fl |= S_APPEND;
d337f35e 3433 if (flags & JFS_NOATIME_FL)
cc23e853 3434@@ -41,18 +45,35 @@ void jfs_set_inode_flags(struct inode *i
bb20add7 3435 new_fl |= S_DIRSYNC;
cc23e853
AM
3436 if (flags & JFS_SYNC_FL)
3437 new_fl |= S_SYNC;
bb20add7 3438- inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND | S_NOATIME |
cc23e853
AM
3439- S_DIRSYNC | S_SYNC);
3440+
3441+ inode_set_flags(inode, new_fl, S_IMMUTABLE | S_IXUNLINK |
3442+ S_APPEND | S_NOATIME | S_DIRSYNC | S_SYNC);
2380c486 3443+
bb20add7 3444+ new_fl = 0;
2380c486 3445+ if (flags & JFS_BARRIER_FL)
bb20add7 3446+ new_fl |= V_BARRIER;
2380c486 3447+ if (flags & JFS_COW_FL)
bb20add7
AM
3448+ new_fl |= V_COW;
3449+
3450+ set_mask_bits(&inode->i_vflags,
3451+ V_BARRIER | V_COW, new_fl);
2380c486
JR
3452 }
3453
3454 void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
3455 {
3456 unsigned int flags = jfs_ip->vfs_inode.i_flags;
3457+ unsigned int vflags = jfs_ip->vfs_inode.i_vflags;
3458+
3459+ jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_IXUNLINK_FL |
3460+ JFS_APPEND_FL | JFS_NOATIME_FL |
3461+ JFS_DIRSYNC_FL | JFS_SYNC_FL |
3462+ JFS_BARRIER_FL | JFS_COW_FL);
3463
3464- jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
3465- JFS_DIRSYNC_FL | JFS_SYNC_FL);
3466 if (flags & S_IMMUTABLE)
3467 jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
3468+ if (flags & S_IXUNLINK)
3469+ jfs_ip->mode2 |= JFS_IXUNLINK_FL;
3470+
3471 if (flags & S_APPEND)
3472 jfs_ip->mode2 |= JFS_APPEND_FL;
3473 if (flags & S_NOATIME)
cc23e853 3474@@ -61,6 +82,11 @@ void jfs_get_inode_flags(struct jfs_inod
2380c486
JR
3475 jfs_ip->mode2 |= JFS_DIRSYNC_FL;
3476 if (flags & S_SYNC)
3477 jfs_ip->mode2 |= JFS_SYNC_FL;
3478+
3479+ if (vflags & V_BARRIER)
3480+ jfs_ip->mode2 |= JFS_BARRIER_FL;
3481+ if (vflags & V_COW)
3482+ jfs_ip->mode2 |= JFS_COW_FL;
d337f35e
JR
3483 }
3484
3485 /*
09a55596
AM
3486diff -NurpP --minimal linux-4.9.135/fs/jfs/jfs_inode.h linux-4.9.135-vs2.3.9.8/fs/jfs/jfs_inode.h
3487--- linux-4.9.135/fs/jfs/jfs_inode.h 2016-12-11 19:17:54.000000000 +0000
3488+++ linux-4.9.135-vs2.3.9.8/fs/jfs/jfs_inode.h 2018-10-20 04:58:14.000000000 +0000
2380c486
JR
3489@@ -39,6 +39,7 @@ extern struct dentry *jfs_fh_to_dentry(s
3490 extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
3491 int fh_len, int fh_type);
d337f35e 3492 extern void jfs_set_inode_flags(struct inode *);
d4263eb0 3493+extern int jfs_sync_flags(struct inode *, int, int);
d337f35e 3494 extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
78865d5b 3495 extern int jfs_setattr(struct dentry *, struct iattr *);
d337f35e 3496
09a55596
AM
3497diff -NurpP --minimal linux-4.9.135/fs/jfs/namei.c linux-4.9.135-vs2.3.9.8/fs/jfs/namei.c
3498--- linux-4.9.135/fs/jfs/namei.c 2018-10-20 10:39:17.000000000 +0000
3499+++ linux-4.9.135-vs2.3.9.8/fs/jfs/namei.c 2018-10-20 04:58:14.000000000 +0000
d33d7b00 3500@@ -22,6 +22,7 @@
d337f35e
JR
3501 #include <linux/ctype.h>
3502 #include <linux/quotaops.h>
2380c486 3503 #include <linux/exportfs.h>
d337f35e
JR
3504+#include <linux/vs_tag.h>
3505 #include "jfs_incore.h"
3506 #include "jfs_superblock.h"
3507 #include "jfs_inode.h"
09a55596 3508@@ -1480,6 +1481,7 @@ static struct dentry *jfs_lookup(struct
a168f21d 3509 jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum);
d337f35e
JR
3510 }
3511
3512+ dx_propagate_tag(nd, ip);
d33d7b00
AM
3513 return d_splice_alias(ip, dentry);
3514 }
d337f35e 3515
09a55596 3516@@ -1542,6 +1544,7 @@ const struct inode_operations jfs_dir_in
a168f21d 3517 .get_acl = jfs_get_acl,
bb20add7 3518 .set_acl = jfs_set_acl,
d337f35e
JR
3519 #endif
3520+ .sync_flags = jfs_sync_flags,
3521 };
3522
3523 const struct file_operations jfs_dir_operations = {
09a55596
AM
3524diff -NurpP --minimal linux-4.9.135/fs/jfs/super.c linux-4.9.135-vs2.3.9.8/fs/jfs/super.c
3525--- linux-4.9.135/fs/jfs/super.c 2018-10-20 10:39:17.000000000 +0000
3526+++ linux-4.9.135-vs2.3.9.8/fs/jfs/super.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 3527@@ -206,7 +206,8 @@ enum {
d337f35e
JR
3528 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
3529 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
537831f9
AM
3530 Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
3531- Opt_discard, Opt_nodiscard, Opt_discard_minblk
3532+ Opt_discard, Opt_nodiscard, Opt_discard_minblk,
d337f35e
JR
3533+ Opt_tag, Opt_notag, Opt_tagid
3534 };
3535
ec22aa5c 3536 static const match_table_t tokens = {
cc23e853 3537@@ -216,6 +217,10 @@ static const match_table_t tokens = {
d337f35e
JR
3538 {Opt_resize, "resize=%u"},
3539 {Opt_resize_nosize, "resize"},
3540 {Opt_errors, "errors=%s"},
3541+ {Opt_tag, "tag"},
3542+ {Opt_notag, "notag"},
3543+ {Opt_tagid, "tagid=%u"},
3544+ {Opt_tag, "tagxid"},
3545 {Opt_ignore, "noquota"},
3546 {Opt_ignore, "quota"},
3547 {Opt_usrquota, "usrquota"},
cc23e853 3548@@ -405,7 +410,20 @@ static int parse_options(char *options,
bb20add7 3549 pr_err("JFS: discard option not supported on device\n");
d337f35e
JR
3550 break;
3551 }
537831f9 3552-
d337f35e
JR
3553+#ifndef CONFIG_TAGGING_NONE
3554+ case Opt_tag:
3555+ *flag |= JFS_TAGGED;
3556+ break;
3557+ case Opt_notag:
3558+ *flag &= JFS_TAGGED;
3559+ break;
3560+#endif
3561+#ifdef CONFIG_PROPAGATE
3562+ case Opt_tagid:
3563+ /* use args[0] */
3564+ *flag |= JFS_TAGGED;
3565+ break;
3566+#endif
3567 default:
bb20add7
AM
3568 printk("jfs: Unrecognized mount option \"%s\" or missing value\n",
3569 p);
cc23e853 3570@@ -437,6 +455,12 @@ static int jfs_remount(struct super_bloc
bb20add7 3571 if (!parse_options(data, sb, &newLVSize, &flag))
d337f35e 3572 return -EINVAL;
ab30d09f 3573
d337f35e
JR
3574+ if ((flag & JFS_TAGGED) && !(sb->s_flags & MS_TAGGED)) {
3575+ printk(KERN_ERR "JFS: %s: tagging not permitted on remount.\n",
3576+ sb->s_id);
3577+ return -EINVAL;
3578+ }
3579+
3580 if (newLVSize) {
3581 if (sb->s_flags & MS_RDONLY) {
bb20add7
AM
3582 pr_err("JFS: resize requires volume to be mounted read-write\n");
3583@@ -517,6 +541,9 @@ static int jfs_fill_super(struct super_b
d337f35e
JR
3584 #ifdef CONFIG_JFS_POSIX_ACL
3585 sb->s_flags |= MS_POSIXACL;
3586 #endif
3587+ /* map mount option tagxid */
3588+ if (sbi->flag & JFS_TAGGED)
3589+ sb->s_flags |= MS_TAGGED;
3590
3591 if (newLVSize) {
537831f9 3592 pr_err("resize option for remount only\n");
09a55596
AM
3593diff -NurpP --minimal linux-4.9.135/fs/libfs.c linux-4.9.135-vs2.3.9.8/fs/libfs.c
3594--- linux-4.9.135/fs/libfs.c 2018-10-20 10:39:17.000000000 +0000
3595+++ linux-4.9.135-vs2.3.9.8/fs/libfs.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 3596@@ -180,7 +180,8 @@ static inline unsigned char dt_type(stru
d337f35e
JR
3597 * both impossible due to the lock on directory.
3598 */
3599
c2e5f7c8 3600-int dcache_readdir(struct file *file, struct dir_context *ctx)
cc23e853 3601+static inline int do_dcache_readdir_filter(struct file *file,
c2e5f7c8 3602+ struct dir_context *ctx, int (*filter)(struct dentry *dentry))
d337f35e 3603 {
cc23e853
AM
3604 struct dentry *dentry = file->f_path.dentry;
3605 struct dentry *cursor = file->private_data;
3606@@ -194,9 +195,10 @@ int dcache_readdir(struct file *file, st
d12544d8
JR
3607 p = &cursor->d_child;
3608
3609 while ((p = scan_positives(cursor, p, 1, &next)) != anchor) {
cc23e853
AM
3610- if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
3611+ if (!filter || filter(next))
3612+ if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
3613 d_inode(next)->i_ino, dt_type(d_inode(next))))
3614- break;
3615+ break;
cc23e853 3616 ctx->pos++;
d12544d8
JR
3617 }
3618 spin_lock(&dentry->d_lock);
cc23e853
AM
3619@@ -205,8 +207,22 @@ int dcache_readdir(struct file *file, st
3620 move_cursor(cursor, p);
d337f35e
JR
3621 return 0;
3622 }
c2e5f7c8
JR
3623+
3624 EXPORT_SYMBOL(dcache_readdir);
d337f35e 3625
c2e5f7c8 3626+int dcache_readdir(struct file *filp, struct dir_context *ctx)
d337f35e 3627+{
c2e5f7c8 3628+ return do_dcache_readdir_filter(filp, ctx, NULL);
d337f35e
JR
3629+}
3630+
c2e5f7c8
JR
3631+EXPORT_SYMBOL(dcache_readdir_filter);
3632+
3633+int dcache_readdir_filter(struct file *filp, struct dir_context *ctx,
d337f35e
JR
3634+ int (*filter)(struct dentry *))
3635+{
c2e5f7c8 3636+ return do_dcache_readdir_filter(filp, ctx, filter);
d337f35e 3637+}
d337f35e
JR
3638+
3639 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
3640 {
3641 return -EISDIR;
09a55596
AM
3642diff -NurpP --minimal linux-4.9.135/fs/locks.c linux-4.9.135-vs2.3.9.8/fs/locks.c
3643--- linux-4.9.135/fs/locks.c 2016-12-11 19:17:54.000000000 +0000
3644+++ linux-4.9.135-vs2.3.9.8/fs/locks.c 2018-10-20 04:58:14.000000000 +0000
cc23e853
AM
3645@@ -127,6 +127,8 @@
3646 #include <linux/pid_namespace.h>
c2e5f7c8
JR
3647 #include <linux/hashtable.h>
3648 #include <linux/percpu.h>
d337f35e
JR
3649+#include <linux/vs_base.h>
3650+#include <linux/vs_limit.h>
3651
bb20add7
AM
3652 #define CREATE_TRACE_POINTS
3653 #include <trace/events/filelock.h>
cc23e853 3654@@ -292,11 +294,15 @@ static void locks_init_lock_heads(struct
d337f35e 3655 /* Allocate an empty lock structure. */
ab30d09f 3656 struct file_lock *locks_alloc_lock(void)
d337f35e 3657 {
a168f21d 3658- struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
a0a3e0cf 3659+ struct file_lock *fl;
a168f21d
AM
3660
3661- if (fl)
3662- locks_init_lock_heads(fl);
a168f21d 3663+ fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
cc23e853 3664
a168f21d
AM
3665+ if (fl) {
3666+ locks_init_lock_heads(fl);
cc23e853 3667+ vx_locks_inc(fl);
a168f21d
AM
3668+ fl->fl_xid = -1;
3669+ }
3670 return fl;
3671 }
3672 EXPORT_SYMBOL_GPL(locks_alloc_lock);
cc23e853 3673@@ -348,6 +354,7 @@ void locks_init_lock(struct file_lock *f
a168f21d
AM
3674 {
3675 memset(fl, 0, sizeof(struct file_lock));
3676 locks_init_lock_heads(fl);
3677+ fl->fl_xid = -1;
3678 }
3679
3680 EXPORT_SYMBOL(locks_init_lock);
cc23e853 3681@@ -365,6 +372,7 @@ void locks_copy_conflock(struct file_loc
bb20add7
AM
3682 new->fl_start = fl->fl_start;
3683 new->fl_end = fl->fl_end;
d337f35e
JR
3684 new->fl_lmops = fl->fl_lmops;
3685+ new->fl_xid = fl->fl_xid;
bb20add7 3686 new->fl_ops = NULL;
d337f35e 3687
bb20add7 3688 if (fl->fl_lmops) {
cc23e853 3689@@ -426,7 +434,10 @@ flock_make_lock(struct file *filp, unsig
d337f35e
JR
3690 fl->fl_flags = FL_FLOCK;
3691 fl->fl_type = type;
3692 fl->fl_end = OFFSET_MAX;
cc23e853 3693-
d337f35e
JR
3694+
3695+ vxd_assert(filp->f_xid == vx_current_xid(),
3696+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3697+ fl->fl_xid = filp->f_xid;
bb20add7
AM
3698 return fl;
3699 }
cc23e853
AM
3700
3701@@ -548,6 +559,7 @@ static int lease_init(struct file *filp,
d337f35e 3702
bb20add7 3703 fl->fl_owner = filp;
d337f35e
JR
3704 fl->fl_pid = current->tgid;
3705+ fl->fl_xid = vx_current_xid();
3706
3707 fl->fl_file = filp;
3708 fl->fl_flags = FL_LEASE;
cc23e853 3709@@ -567,6 +579,10 @@ static struct file_lock *lease_alloc(str
d337f35e 3710 if (fl == NULL)
2380c486 3711 return ERR_PTR(error);
d337f35e
JR
3712
3713+ fl->fl_xid = vx_current_xid();
3714+ if (filp)
3715+ vxd_assert(filp->f_xid == fl->fl_xid,
3716+ "f_xid(%d) == fl_xid(%d)", filp->f_xid, fl->fl_xid);
d337f35e
JR
3717 error = lease_init(filp, type, fl);
3718 if (error) {
3719 locks_free_lock(fl);
cc23e853
AM
3720@@ -956,6 +972,7 @@ static int flock_lock_inode(struct inode
3721 goto out;
ab30d09f 3722 }
2380c486
JR
3723
3724+ new_fl->fl_xid = -1;
3725 find_conflict:
cc23e853
AM
3726 list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
3727 if (!flock_locks_conflict(request, fl))
3728@@ -984,7 +1001,7 @@ out:
d337f35e
JR
3729 }
3730
cc23e853
AM
3731 static int posix_lock_inode(struct inode *inode, struct file_lock *request,
3732- struct file_lock *conflock)
3733+ struct file_lock *conflock, vxid_t xid)
d337f35e 3734 {
cc23e853 3735 struct file_lock *fl, *tmp;
d337f35e 3736 struct file_lock *new_fl = NULL;
cc23e853
AM
3737@@ -1000,6 +1017,9 @@ static int posix_lock_inode(struct inode
3738 if (!ctx)
3739 return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
d337f35e 3740
cc23e853
AM
3741+ if (xid)
3742+ vxd_assert(xid == vx_current_xid(),
3743+ "xid(%d) == current(%d)", xid, vx_current_xid());
d337f35e
JR
3744 /*
3745 * We may need two file_lock structures for this operation,
3746 * so we get them in advance to avoid races.
cc23e853 3747@@ -1010,7 +1030,11 @@ static int posix_lock_inode(struct inode
d337f35e
JR
3748 (request->fl_type != F_UNLCK ||
3749 request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
3750 new_fl = locks_alloc_lock();
3751+ new_fl->fl_xid = xid;
cc23e853 3752+ // vx_locks_inc(new_fl);
d337f35e
JR
3753 new_fl2 = locks_alloc_lock();
3754+ new_fl2->fl_xid = xid;
cc23e853 3755+ // vx_locks_inc(new_fl2);
d337f35e
JR
3756 }
3757
cc23e853
AM
3758 percpu_down_read_preempt_disable(&file_rwsem);
3759@@ -1216,7 +1240,7 @@ static int posix_lock_inode(struct inode
2380c486 3760 int posix_lock_file(struct file *filp, struct file_lock *fl,
d337f35e
JR
3761 struct file_lock *conflock)
3762 {
cc23e853
AM
3763- return posix_lock_inode(locks_inode(filp), fl, conflock);
3764+ return posix_lock_inode(locks_inode(filp), fl, conflock, filp->f_xid);
d337f35e 3765 }
2380c486 3766 EXPORT_SYMBOL(posix_lock_file);
d337f35e 3767
cc23e853
AM
3768@@ -1232,7 +1256,7 @@ static int posix_lock_inode_wait(struct
3769 int error;
3770 might_sleep ();
3771 for (;;) {
3772- error = posix_lock_inode(inode, fl, NULL);
3773+ error = posix_lock_inode(inode, fl, NULL, 0);
3774 if (error != FILE_LOCK_DEFERRED)
3775 break;
3776 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
3777@@ -1308,10 +1332,13 @@ int locks_mandatory_area(struct inode *i
3778 fl.fl_end = end;
3779
3780 for (;;) {
3781+ vxid_t f_xid = 0;
3782+
ca5d134c 3783 if (filp) {
bb20add7 3784 fl.fl_owner = filp;
ca5d134c 3785 fl.fl_flags &= ~FL_SLEEP;
cc23e853
AM
3786- error = posix_lock_inode(inode, &fl, NULL);
3787+ f_xid = filp->f_xid;
3788+ error = posix_lock_inode(inode, &fl, NULL, f_xid);
ca5d134c
JR
3789 if (!error)
3790 break;
3791 }
cc23e853 3792@@ -1319,7 +1346,7 @@ int locks_mandatory_area(struct inode *i
ca5d134c
JR
3793 if (sleep)
3794 fl.fl_flags |= FL_SLEEP;
3795 fl.fl_owner = current->files;
cc23e853
AM
3796- error = posix_lock_inode(inode, &fl, NULL);
3797+ error = posix_lock_inode(inode, &fl, NULL, f_xid);
2380c486 3798 if (error != FILE_LOCK_DEFERRED)
d337f35e 3799 break;
2380c486 3800 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
cc23e853 3801@@ -2374,6 +2401,16 @@ int fcntl_setlk64(unsigned int fd, struc
d337f35e
JR
3802 if (file_lock == NULL)
3803 return -ENOLCK;
3804
3805+ vxd_assert(filp->f_xid == vx_current_xid(),
3806+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3807+ file_lock->fl_xid = filp->f_xid;
cc23e853 3808+ // vx_locks_inc(file_lock);
d337f35e 3809+
d337f35e
JR
3810+ vxd_assert(filp->f_xid == vx_current_xid(),
3811+ "f_xid(%d) == current(%d)", filp->f_xid, vx_current_xid());
3812+ file_lock->fl_xid = filp->f_xid;
cc23e853 3813+ // vx_locks_inc(file_lock);
d337f35e
JR
3814+
3815 /*
3816 * This might block, so we do it before checking the inode.
3817 */
cc23e853 3818@@ -2710,8 +2747,11 @@ static int locks_show(struct seq_file *f
2380c486 3819
c2e5f7c8 3820 lock_get_status(f, fl, iter->li_pos, "");
2380c486
JR
3821
3822- list_for_each_entry(bfl, &fl->fl_block, fl_block)
3823+ list_for_each_entry(bfl, &fl->fl_block, fl_block) {
3824+ if (!vx_check(fl->fl_xid, VS_WATCH_P | VS_IDENT))
d337f35e 3825+ continue;
bb20add7 3826 lock_get_status(f, bfl, iter->li_pos, " ->");
2380c486 3827+ }
d337f35e 3828
2380c486 3829 return 0;
ab30d09f 3830 }
09a55596
AM
3831diff -NurpP --minimal linux-4.9.135/fs/mount.h linux-4.9.135-vs2.3.9.8/fs/mount.h
3832--- linux-4.9.135/fs/mount.h 2018-10-20 10:39:17.000000000 +0000
3833+++ linux-4.9.135-vs2.3.9.8/fs/mount.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 3834@@ -69,6 +69,7 @@ struct mount {
bb20add7 3835 struct hlist_head mnt_pins;
cc23e853
AM
3836 struct fs_pin mnt_umount;
3837 struct dentry *mnt_ex_mountpoint;
61333608 3838+ vtag_t mnt_tag; /* tagging used for vfsmount */
db55b927
AM
3839 };
3840
92598135 3841 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
09a55596
AM
3842diff -NurpP --minimal linux-4.9.135/fs/namei.c linux-4.9.135-vs2.3.9.8/fs/namei.c
3843--- linux-4.9.135/fs/namei.c 2018-10-20 10:39:17.000000000 +0000
3844+++ linux-4.9.135-vs2.3.9.8/fs/namei.c 2018-10-20 04:58:14.000000000 +0000
cc23e853
AM
3845@@ -37,9 +37,19 @@
3846 #include <linux/hash.h>
3847 #include <linux/bitops.h>
3848 #include <linux/init_task.h>
d337f35e 3849+#include <linux/proc_fs.h>
09be7631 3850+#include <linux/magic.h>
d337f35e
JR
3851+#include <linux/vserver/inode.h>
3852+#include <linux/vs_base.h>
3853+#include <linux/vs_tag.h>
3854+#include <linux/vs_cowbl.h>
2380c486
JR
3855+#include <linux/vs_device.h>
3856+#include <linux/vs_context.h>
3857+#include <linux/pid_namespace.h>
d337f35e
JR
3858 #include <asm/uaccess.h>
3859
2bf5ad28 3860 #include "internal.h"
09be7631
JR
3861+#include "proc/internal.h"
3862 #include "mount.h"
3863
3864 /* [Feb-1997 T. Schoebel-Theuer]
09a55596 3865@@ -286,6 +296,93 @@ static int check_acl(struct inode *inode
a168f21d
AM
3866 return -EAGAIN;
3867 }
d337f35e 3868
7e46296a 3869+static inline int dx_barrier(const struct inode *inode)
d337f35e 3870+{
2380c486
JR
3871+ if (IS_BARRIER(inode) && !vx_check(0, VS_ADMIN | VS_WATCH)) {
3872+ vxwprintk_task(1, "did hit the barrier.");
d337f35e
JR
3873+ return 1;
3874+ }
3875+ return 0;
3876+}
3877+
7e46296a 3878+static int __dx_permission(const struct inode *inode, int mask)
d337f35e
JR
3879+{
3880+ if (dx_barrier(inode))
3881+ return -EACCES;
d337f35e 3882+
2380c486
JR
3883+ if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) {
3884+ /* devpts is xid tagged */
3885+ if (S_ISDIR(inode->i_mode) ||
61333608 3886+ vx_check((vxid_t)i_tag_read(inode), VS_IDENT | VS_WATCH_P))
2380c486 3887+ return 0;
ba86f833 3888+
adc1caaa 3889+ /* just pretend we didn't find anything */
ba86f833 3890+ return -ENOENT;
2380c486
JR
3891+ }
3892+ else if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
3893+ struct proc_dir_entry *de = PDE(inode);
3894+
bb20add7
AM
3895+ if (de && !vx_hide_check(0, de->vx_flags)) {
3896+ vxdprintk(VXD_CBIT(misc, 9),
3897+ VS_Q("%*s") " hidden by _dx_permission",
3898+ de->namelen, de->name);
2380c486 3899+ goto out;
bb20add7 3900+ }
2380c486
JR
3901+
3902+ if ((mask & (MAY_WRITE | MAY_APPEND))) {
3903+ struct pid *pid;
3904+ struct task_struct *tsk;
3905+
3906+ if (vx_check(0, VS_ADMIN | VS_WATCH_P) ||
3907+ vx_flags(VXF_STATE_SETUP, 0))
3908+ return 0;
3909+
3910+ pid = PROC_I(inode)->pid;
3911+ if (!pid)
3912+ goto out;
3913+
c6ceaf95 3914+ rcu_read_lock();
2380c486
JR
3915+ tsk = pid_task(pid, PIDTYPE_PID);
3916+ vxdprintk(VXD_CBIT(tag, 0), "accessing %p[#%u]",
3917+ tsk, (tsk ? vx_task_xid(tsk) : 0));
c6ceaf95
AM
3918+ if (tsk &&
3919+ vx_check(vx_task_xid(tsk), VS_IDENT | VS_WATCH_P)) {
3920+ rcu_read_unlock();
2380c486 3921+ return 0;
c6ceaf95
AM
3922+ }
3923+ rcu_read_unlock();
2380c486
JR
3924+ }
3925+ else {
3926+ /* FIXME: Should we block some entries here? */
3927+ return 0;
3928+ }
3929+ }
3930+ else {
3931+ if (dx_notagcheck(inode->i_sb) ||
61333608 3932+ dx_check((vxid_t)i_tag_read(inode),
537831f9 3933+ DX_HOSTID | DX_ADMIN | DX_WATCH | DX_IDENT))
2380c486
JR
3934+ return 0;
3935+ }
3936+
3937+out:
d337f35e
JR
3938+ return -EACCES;
3939+}
3940+
7e46296a 3941+int dx_permission(const struct inode *inode, int mask)
2380c486
JR
3942+{
3943+ int ret = __dx_permission(inode, mask);
3944+ if (unlikely(ret)) {
ba86f833
AM
3945+#ifndef CONFIG_VSERVER_WARN_DEVPTS
3946+ if (inode->i_sb->s_magic != DEVPTS_SUPER_MAGIC)
3947+#endif
3948+ vxwprintk_task(1,
3949+ "denied [0x%x] access to inode %s:%p[#%d,%lu]",
8ce283e1
AM
3950+ mask, inode->i_sb->s_id, inode,
3951+ i_tag_read(inode), inode->i_ino);
2380c486
JR
3952+ }
3953+ return ret;
3954+}
3955+
7e46296a 3956 /*
f6c5ef8b 3957 * This does the basic permission checking
7e46296a 3958 */
09a55596 3959@@ -410,7 +507,7 @@ int __inode_permission(struct inode *ino
d337f35e
JR
3960 /*
3961 * Nobody gets write access to an immutable file.
3962 */
3963- if (IS_IMMUTABLE(inode))
3964+ if (IS_IMMUTABLE(inode) && !IS_COW(inode))
cc23e853
AM
3965 return -EPERM;
3966
3967 /*
09a55596 3968@@ -422,6 +519,10 @@ int __inode_permission(struct inode *ino
d337f35e
JR
3969 return -EACCES;
3970 }
3971
2380c486
JR
3972+ retval = dx_permission(inode, mask);
3973+ if (retval)
d337f35e 3974+ return retval;
2380c486 3975+
a168f21d
AM
3976 retval = do_inode_permission(inode, mask);
3977 if (retval)
3978 return retval;
09a55596 3979@@ -2783,7 +2884,7 @@ static int may_delete(struct inode *dir,
d337f35e 3980 return -EPERM;
c2e5f7c8
JR
3981
3982 if (check_sticky(dir, inode) || IS_APPEND(inode) ||
cc23e853
AM
3983- IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(inode))
3984+ IS_IXORUNLINK(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(inode))
d337f35e
JR
3985 return -EPERM;
3986 if (isdir) {
bb20add7 3987 if (!d_is_dir(victim))
09a55596 3988@@ -2871,19 +2972,25 @@ int vfs_create(struct inode *dir, struct
92598135 3989 bool want_excl)
a168f21d
AM
3990 {
3991 int error = may_create(dir, dentry);
a168f21d
AM
3992- if (error)
3993+ if (error) {
3994+ vxdprintk(VXD_CBIT(misc, 3), "may_create failed with %d", error);
537831f9 3995 return error;
a168f21d
AM
3996+ }
3997
3998 if (!dir->i_op->create)
3999 return -EACCES; /* shouldn't it be ENOSYS? */
4000 mode &= S_IALLUGO;
4001 mode |= S_IFREG;
4002 error = security_inode_create(dir, dentry, mode);
4003- if (error)
4004+ if (error) {
4005+ vxdprintk(VXD_CBIT(misc, 3), "security_inode_create failed with %d", error);
537831f9 4006 return error;
a168f21d 4007+ }
92598135 4008 error = dir->i_op->create(dir, dentry, mode, want_excl);
a168f21d
AM
4009 if (!error)
4010 fsnotify_create(dir, dentry);
4011+ else
4012+ vxdprintk(VXD_CBIT(misc, 3), "i_op->create failed with %d", error);
4013 return error;
4014 }
bb20add7 4015 EXPORT_SYMBOL(vfs_create);
09a55596 4016@@ -2921,6 +3028,15 @@ static int may_open(struct path *path, i
ec22aa5c 4017 break;
2380c486 4018 }
d337f35e
JR
4019
4020+#ifdef CONFIG_VSERVER_COWBL
763640ca
JR
4021+ if (IS_COW(inode) &&
4022+ ((flag & O_ACCMODE) != O_RDONLY)) {
d337f35e
JR
4023+ if (IS_COW_LINK(inode))
4024+ return -EMLINK;
2380c486 4025+ inode->i_flags &= ~(S_IXUNLINK|S_IMMUTABLE);
d337f35e
JR
4026+ mark_inode_dirty(inode);
4027+ }
4028+#endif
cc23e853 4029 error = inode_permission(inode, MAY_OPEN | acc_mode);
d337f35e
JR
4030 if (error)
4031 return error;
09a55596 4032@@ -3373,6 +3489,16 @@ finish_open:
7b17263b 4033 }
92598135 4034 finish_open_created:
7b17263b
AM
4035 error = may_open(&nd->path, acc_mode, open_flag);
4036+#ifdef CONFIG_VSERVER_COWBL
4037+ if (error == -EMLINK) {
4038+ struct dentry *dentry;
cc23e853 4039+ dentry = cow_break_link(nd->name->name);
7b17263b
AM
4040+ if (IS_ERR(dentry))
4041+ error = PTR_ERR(dentry);
4042+ else
4043+ dput(dentry);
4044+ }
4045+#endif
4046 if (error)
92598135 4047 goto out;
cc23e853 4048 BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
09a55596 4049@@ -3476,6 +3602,9 @@ static struct file *path_openat(struct n
92598135 4050 int opened = 0;
7b17263b
AM
4051 int error;
4052
cc23e853 4053+#ifdef CONFIG_VSERVER_COWBL
7b17263b 4054+restart:
cc23e853 4055+#endif
92598135 4056 file = get_empty_filp();
b00e13aa
AM
4057 if (IS_ERR(file))
4058 return file;
09a55596 4059@@ -3509,6 +3638,12 @@ static struct file *path_openat(struct n
cc23e853 4060 }
7b17263b 4061 }
cc23e853 4062 terminate_walk(nd);
7b17263b 4063+#ifdef CONFIG_VSERVER_COWBL
e915af4e 4064+ if (error == -EMLINK) {
cc23e853 4065+ // path_cleanup(nd);
7b17263b
AM
4066+ goto restart;
4067+ }
4068+#endif
cc23e853
AM
4069 out2:
4070 if (!(opened & FILE_OPENED)) {
4071 BUG_ON(!error);
09a55596 4072@@ -3629,6 +3764,11 @@ static struct dentry *filename_create(in
a168f21d
AM
4073 goto fail;
4074 }
cc23e853
AM
4075 putname(name);
4076+ vxdprintk(VXD_CBIT(misc, 3), "filename_create path.dentry = %p (%.*s), dentry = %p (%.*s), d_inode = %p",
a168f21d
AM
4077+ path->dentry, path->dentry->d_name.len,
4078+ path->dentry->d_name.name, dentry,
4079+ dentry->d_name.len, dentry->d_name.name,
4080+ path->dentry->d_inode);
4081 return dentry;
92598135 4082 fail:
a168f21d 4083 dput(dentry);
09a55596 4084@@ -3747,6 +3887,7 @@ retry:
cc23e853
AM
4085 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
4086 break;
4087 }
4088+
4089 out:
4090 done_path_create(&path, dentry);
4091 if (retry_estale(error, lookup_flags)) {
09a55596 4092@@ -4168,7 +4309,7 @@ int vfs_link(struct dentry *old_dentry,
d337f35e
JR
4093 /*
4094 * A link to an append-only or immutable file cannot be created.
4095 */
4096- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4097+ if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
4098 return -EPERM;
cc23e853
AM
4099 /*
4100 * Updating the link count will likely cause i_uid and i_gid to
09a55596 4101@@ -4698,6 +4839,326 @@ const char *vfs_get_link(struct dentry *
d337f35e 4102 }
cc23e853 4103 EXPORT_SYMBOL(vfs_get_link);
d337f35e
JR
4104
4105+
4106+#ifdef CONFIG_VSERVER_COWBL
4107+
2380c486 4108+static inline
cc23e853
AM
4109+void dump_path(const char *name, struct path *path)
4110+{
4111+ vxdprintk(VXD_CBIT(misc, 3),
4112+ "%s: path=%p mnt=%p dentry=%p", name, path,
4113+ path ? path->mnt : NULL,
4114+ path ? path->dentry : NULL);
4115+
4116+ if (path && path->mnt)
4117+ vxdprintk(VXD_CBIT(misc, 3),
4118+ "%s: path mnt_sb=%p[#%d,#%d] mnt_root=%p[#%d]", name,
4119+ path->mnt->mnt_sb,
4120+ path->mnt->mnt_sb ? path->mnt->mnt_sb->s_count : -1,
4121+ path->mnt->mnt_sb ? atomic_read(&path->mnt->mnt_sb->s_active) : -1,
4122+ path->mnt->mnt_root,
4123+ path->mnt->mnt_root ? path->mnt->mnt_root->d_lockref.count : -1);
4124+
4125+ if (path && path->dentry)
4126+ vxdprintk(VXD_CBIT(misc, 3),
4127+ "%s: path dentry=%p[#%d]", name,
4128+ path->dentry,
4129+ path->dentry ? path->dentry->d_lockref.count : -1);
4130+}
4131+
4132+static inline
2380c486
JR
4133+long do_cow_splice(struct file *in, struct file *out, size_t len)
4134+{
4135+ loff_t ppos = 0;
09be7631 4136+ loff_t opos = 0;
2380c486 4137+
09be7631 4138+ return do_splice_direct(in, &ppos, out, &opos, len, 0);
2380c486
JR
4139+}
4140+
d337f35e
JR
4141+struct dentry *cow_break_link(const char *pathname)
4142+{
b00e13aa 4143+ int ret, mode, pathlen, redo = 0, drop = 1;
cc23e853 4144+ struct path old_path = {}, par_path = {}, dir_path = {}, *new_path = NULL;
a168f21d 4145+ struct dentry *dir, *old_dentry, *new_dentry = NULL;
d337f35e
JR
4146+ struct file *old_file;
4147+ struct file *new_file;
cc23e853
AM
4148+ struct qstr new_qstr;
4149+ int new_type;
d337f35e
JR
4150+ char *to, *path, pad='\251';
4151+ loff_t size;
cc23e853
AM
4152+ struct filename *filename = getname_kernel(pathname);
4153+ struct filename *to_filename;
d337f35e 4154+
ba86f833
AM
4155+ vxdprintk(VXD_CBIT(misc, 1),
4156+ "cow_break_link(" VS_Q("%s") ")", pathname);
e915af4e 4157+
d337f35e 4158+ path = kmalloc(PATH_MAX, GFP_KERNEL);
2380c486 4159+ ret = -ENOMEM;
cc23e853 4160+ if (!path || IS_ERR(filename))
2380c486 4161+ goto out;
d337f35e 4162+
cc23e853
AM
4163+ /* old_path will have refs to dentry and mnt */
4164+ ret = filename_lookup(AT_FDCWD, filename, LOOKUP_FOLLOW, &old_path, NULL);
a168f21d 4165+ vxdprintk(VXD_CBIT(misc, 2),
e915af4e 4166+ "do_path_lookup(old): %d", ret);
2380c486
JR
4167+ if (ret < 0)
4168+ goto out_free_path;
d337f35e 4169+
cc23e853
AM
4170+ dump_path("cow (old)", &old_path);
4171+
e915af4e 4172+ /* no explicit reference for old_dentry here */
cc23e853 4173+ old_dentry = old_path.dentry;
2380c486 4174+
e915af4e 4175+ mode = old_dentry->d_inode->i_mode;
cc23e853 4176+ to = d_path(&old_path, path, PATH_MAX-2);
d337f35e 4177+ pathlen = strlen(to);
ba86f833 4178+ vxdprintk(VXD_CBIT(misc, 2),
a168f21d
AM
4179+ "old path " VS_Q("%s") " [%p:" VS_Q("%.*s") ":%d]", to,
4180+ old_dentry,
4181+ old_dentry->d_name.len, old_dentry->d_name.name,
4182+ old_dentry->d_name.len);
d337f35e 4183+
2380c486 4184+ to[pathlen + 1] = 0;
d337f35e 4185+retry:
a168f21d 4186+ new_dentry = NULL;
d337f35e 4187+ to[pathlen] = pad--;
a168f21d 4188+ ret = -ELOOP;
d337f35e
JR
4189+ if (pad <= '\240')
4190+ goto out_rel_old;
4191+
ba86f833 4192+ vxdprintk(VXD_CBIT(misc, 1), "temp copy " VS_Q("%s"), to);
e915af4e 4193+
cc23e853
AM
4194+ /* dir_path will have refs to dentry and mnt */
4195+ to_filename = getname_kernel(to);
4196+ to_filename = filename_parentat(AT_FDCWD, to_filename,
4197+ LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE, &par_path, &new_qstr, &new_type);
4198+ vxdprintk(VXD_CBIT(misc, 2), "filename_parentat(new): %p", to_filename);
4199+ dump_path("cow (par)", &par_path);
4200+ if (IS_ERR(to_filename))
2380c486
JR
4201+ goto retry;
4202+
cc23e853
AM
4203+ vxdprintk(VXD_CBIT(misc, 2), "to_filename refcnt=%d", to_filename->refcnt);
4204+
e915af4e
AM
4205+ /* this puppy downs the dir inode mutex if successful.
4206+ dir_path will hold refs to dentry and mnt and
b00e13aa 4207+ we'll have write access to the mnt */
cc23e853 4208+ new_dentry = filename_create(AT_FDCWD, to_filename, &dir_path, 0);
a168f21d 4209+ if (!new_dentry || IS_ERR(new_dentry)) {
cc23e853 4210+ path_put(&par_path);
a168f21d 4211+ vxdprintk(VXD_CBIT(misc, 2),
cc23e853 4212+ "filename_create(new) failed with %ld",
a168f21d 4213+ PTR_ERR(new_dentry));
d337f35e
JR
4214+ goto retry;
4215+ }
2380c486 4216+ vxdprintk(VXD_CBIT(misc, 2),
cc23e853 4217+ "filename_create(new): %p [" VS_Q("%.*s") ":%d]",
a168f21d
AM
4218+ new_dentry,
4219+ new_dentry->d_name.len, new_dentry->d_name.name,
4220+ new_dentry->d_name.len);
4221+
cc23e853
AM
4222+ dump_path("cow (dir)", &dir_path);
4223+
e915af4e
AM
4224+ /* take a reference on new_dentry */
4225+ dget(new_dentry);
4226+
4227+ /* dentry/mnt refs handed over to new_path */
4228+ new_path = &dir_path;
4229+
4230+ /* dentry for old/new dir */
cc23e853 4231+ dir = par_path.dentry;
d337f35e 4232+
e915af4e
AM
4233+ /* give up reference on dir */
4234+ dput(new_path->dentry);
4235+
4236+ /* new_dentry already has a reference */
4237+ new_path->dentry = new_dentry;
4238+
4239+ ret = vfs_create(dir->d_inode, new_dentry, mode, 1);
d337f35e
JR
4240+ vxdprintk(VXD_CBIT(misc, 2),
4241+ "vfs_create(new): %d", ret);
4242+ if (ret == -EEXIST) {
cc23e853
AM
4243+ path_put(&par_path);
4244+ inode_unlock(dir->d_inode);
e915af4e
AM
4245+ mnt_drop_write(new_path->mnt);
4246+ path_put(new_path);
4247+ new_dentry = NULL;
d337f35e
JR
4248+ goto retry;
4249+ }
2380c486
JR
4250+ else if (ret < 0)
4251+ goto out_unlock_new;
4252+
cc23e853 4253+ /* the old file went away */
2380c486 4254+ ret = -ENOENT;
a168f21d 4255+ if ((redo = d_unhashed(old_dentry)))
2380c486
JR
4256+ goto out_unlock_new;
4257+
e915af4e 4258+ /* doesn't change refs for old_path */
cc23e853 4259+ old_file = dentry_open(&old_path, O_RDONLY, current_cred());
d337f35e
JR
4260+ vxdprintk(VXD_CBIT(misc, 2),
4261+ "dentry_open(old): %p", old_file);
a168f21d
AM
4262+ if (IS_ERR(old_file)) {
4263+ ret = PTR_ERR(old_file);
2380c486
JR
4264+ goto out_unlock_new;
4265+ }
d337f35e 4266+
e915af4e
AM
4267+ /* doesn't change refs for new_path */
4268+ new_file = dentry_open(new_path, O_WRONLY, current_cred());
d337f35e
JR
4269+ vxdprintk(VXD_CBIT(misc, 2),
4270+ "dentry_open(new): %p", new_file);
a168f21d
AM
4271+ if (IS_ERR(new_file)) {
4272+ ret = PTR_ERR(new_file);
d337f35e 4273+ goto out_fput_old;
a168f21d 4274+ }
d337f35e 4275+
cc23e853
AM
4276+ /* unlock the inode from filename_create() */
4277+ inode_unlock(dir->d_inode);
b00e13aa
AM
4278+
4279+ /* drop write access to mnt */
4280+ mnt_drop_write(new_path->mnt);
4281+
4282+ drop = 0;
4283+
cc23e853 4284+ size = i_size_read(old_file->f_path.dentry->d_inode);
2380c486
JR
4285+ ret = do_cow_splice(old_file, new_file, size);
4286+ vxdprintk(VXD_CBIT(misc, 2), "do_splice_direct: %d", ret);
4287+ if (ret < 0) {
d337f35e 4288+ goto out_fput_both;
2380c486
JR
4289+ } else if (ret < size) {
4290+ ret = -ENOSPC;
4291+ goto out_fput_both;
4292+ } else {
a168f21d
AM
4293+ struct inode *old_inode = old_dentry->d_inode;
4294+ struct inode *new_inode = new_dentry->d_inode;
2380c486
JR
4295+ struct iattr attr = {
4296+ .ia_uid = old_inode->i_uid,
4297+ .ia_gid = old_inode->i_gid,
4298+ .ia_valid = ATTR_UID | ATTR_GID
4299+ };
4300+
93de0823
AM
4301+ setattr_copy(new_inode, &attr);
4302+ mark_inode_dirty(new_inode);
2380c486 4303+ }
d337f35e 4304+
e915af4e 4305+ /* lock rename mutex */
a168f21d 4306+ mutex_lock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
2380c486
JR
4307+
4308+ /* drop out late */
4309+ ret = -ENOENT;
a168f21d 4310+ if ((redo = d_unhashed(old_dentry)))
2380c486
JR
4311+ goto out_unlock;
4312+
4313+ vxdprintk(VXD_CBIT(misc, 2),
ba86f833 4314+ "vfs_rename: [" VS_Q("%*s") ":%d] -> [" VS_Q("%*s") ":%d]",
a168f21d
AM
4315+ new_dentry->d_name.len, new_dentry->d_name.name,
4316+ new_dentry->d_name.len,
4317+ old_dentry->d_name.len, old_dentry->d_name.name,
4318+ old_dentry->d_name.len);
cc23e853 4319+ ret = vfs_rename(par_path.dentry->d_inode, new_dentry,
eafa5b1d 4320+ old_dentry->d_parent->d_inode, old_dentry, NULL, 0);
d337f35e 4321+ vxdprintk(VXD_CBIT(misc, 2), "vfs_rename: %d", ret);
2380c486
JR
4322+
4323+out_unlock:
a168f21d 4324+ mutex_unlock(&old_dentry->d_inode->i_sb->s_vfs_rename_mutex);
d337f35e
JR
4325+
4326+out_fput_both:
4327+ vxdprintk(VXD_CBIT(misc, 3),
2380c486 4328+ "fput(new_file=%p[#%ld])", new_file,
4a036bed 4329+ atomic_long_read(&new_file->f_count));
d337f35e
JR
4330+ fput(new_file);
4331+
4332+out_fput_old:
4333+ vxdprintk(VXD_CBIT(misc, 3),
2380c486 4334+ "fput(old_file=%p[#%ld])", old_file,
4a036bed 4335+ atomic_long_read(&old_file->f_count));
d337f35e
JR
4336+ fput(old_file);
4337+
2380c486 4338+out_unlock_new:
cc23e853
AM
4339+ /* drop references from par_path */
4340+ path_put(&par_path);
e915af4e 4341+
b00e13aa 4342+ if (drop) {
cc23e853
AM
4343+ /* unlock the inode from filename_create() */
4344+ inode_unlock(dir->d_inode);
b00e13aa
AM
4345+
4346+ /* drop write access to mnt */
4347+ mnt_drop_write(new_path->mnt);
4348+ }
e915af4e 4349+
2380c486
JR
4350+ if (!ret)
4351+ goto out_redo;
4352+
4353+ /* error path cleanup */
c2e5f7c8 4354+ vfs_unlink(dir->d_inode, new_dentry, NULL);
2380c486
JR
4355+
4356+out_redo:
4357+ if (!redo)
4358+ goto out_rel_both;
e915af4e
AM
4359+
4360+ /* lookup dentry once again
cc23e853
AM
4361+ old_path will be freed as old_path in out_rel_old */
4362+ ret = filename_lookup(AT_FDCWD, filename, LOOKUP_FOLLOW, &old_path, NULL);
2380c486
JR
4363+ if (ret)
4364+ goto out_rel_both;
d337f35e 4365+
e915af4e 4366+ /* drop reference on new_dentry */
a168f21d 4367+ dput(new_dentry);
cc23e853 4368+ new_dentry = old_path.dentry;
e915af4e 4369+ dget(new_dentry);
2380c486 4370+ vxdprintk(VXD_CBIT(misc, 2),
763640ca 4371+ "do_path_lookup(redo): %p [" VS_Q("%.*s") ":%d]",
a168f21d
AM
4372+ new_dentry,
4373+ new_dentry->d_name.len, new_dentry->d_name.name,
4374+ new_dentry->d_name.len);
2380c486
JR
4375+
4376+out_rel_both:
cc23e853 4377+ dump_path("put (new)", new_path);
e915af4e
AM
4378+ if (new_path)
4379+ path_put(new_path);
d337f35e 4380+out_rel_old:
cc23e853
AM
4381+ dump_path("put (old)", &old_path);
4382+ path_put(&old_path);
2380c486 4383+out_free_path:
d337f35e 4384+ kfree(path);
2380c486 4385+out:
a168f21d
AM
4386+ if (ret) {
4387+ dput(new_dentry);
4388+ new_dentry = ERR_PTR(ret);
4389+ }
cc23e853
AM
4390+ // if (!IS_ERR(filename))
4391+ // putname(filename);
a168f21d 4392+ vxdprintk(VXD_CBIT(misc, 3),
e915af4e 4393+ "cow_break_link returning with %p", new_dentry);
a168f21d 4394+ return new_dentry;
d337f35e
JR
4395+}
4396+
4397+#endif
1e8b8f9b
AM
4398+
4399+int vx_info_mnt_namespace(struct mnt_namespace *ns, char *buffer)
4400+{
4401+ struct path path;
4402+ struct vfsmount *vmnt;
4403+ char *pstr, *root;
4404+ int length = 0;
4405+
4406+ pstr = kmalloc(PATH_MAX, GFP_KERNEL);
4407+ if (!pstr)
4408+ return 0;
4409+
4410+ vmnt = &ns->root->mnt;
4411+ path.mnt = vmnt;
4412+ path.dentry = vmnt->mnt_root;
4413+ root = d_path(&path, pstr, PATH_MAX - 2);
4414+ length = sprintf(buffer + length,
4415+ "Namespace:\t%p [#%u]\n"
4416+ "RootPath:\t%s\n",
4417+ ns, atomic_read(&ns->count),
4418+ root);
4419+ kfree(pstr);
4420+ return length;
4421+}
bb20add7 4422+
265de2f7 4423+EXPORT_SYMBOL(vx_info_mnt_namespace);
d337f35e
JR
4424+
4425 /* get the link contents into pagecache */
cc23e853
AM
4426 const char *page_get_link(struct dentry *dentry, struct inode *inode,
4427 struct delayed_call *callback)
09a55596
AM
4428diff -NurpP --minimal linux-4.9.135/fs/namespace.c linux-4.9.135-vs2.3.9.8/fs/namespace.c
4429--- linux-4.9.135/fs/namespace.c 2018-10-20 10:39:17.000000000 +0000
4430+++ linux-4.9.135-vs2.3.9.8/fs/namespace.c 2018-10-20 05:55:43.000000000 +0000
978063ce 4431@@ -24,6 +24,11 @@
09be7631 4432 #include <linux/magic.h>
52afa9bd 4433 #include <linux/bootmem.h>
bb20add7 4434 #include <linux/task_work.h>
d337f35e 4435+#include <linux/vs_base.h>
d337f35e
JR
4436+#include <linux/vs_context.h>
4437+#include <linux/vs_tag.h>
2380c486
JR
4438+#include <linux/vserver/space.h>
4439+#include <linux/vserver/global.h>
d337f35e 4440 #include "pnode.h"
db55b927
AM
4441 #include "internal.h"
4442
09a55596 4443@@ -980,6 +985,10 @@ vfs_kern_mount(struct file_system_type *
be261992
AM
4444 if (!type)
4445 return ERR_PTR(-ENODEV);
4446
4447+ if ((type->fs_flags & FS_BINARY_MOUNTDATA) &&
4448+ !vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT))
4449+ return ERR_PTR(-EPERM);
4450+
4451 mnt = alloc_vfsmnt(name);
4452 if (!mnt)
4453 return ERR_PTR(-ENOMEM);
09a55596 4454@@ -1071,6 +1080,7 @@ static struct mount *clone_mnt(struct mo
92598135
AM
4455 mnt->mnt.mnt_root = dget(root);
4456 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
4457 mnt->mnt_parent = mnt;
c2e5f7c8
JR
4458+ mnt->mnt_tag = old->mnt_tag;
4459 lock_mount_hash();
92598135 4460 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
c2e5f7c8 4461 unlock_mount_hash();
09a55596 4462@@ -1660,7 +1670,8 @@ out_unlock:
c2e5f7c8
JR
4463 */
4464 static inline bool may_mount(void)
4465 {
4466- return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
4467+ return vx_ns_capable(current->nsproxy->mnt_ns->user_ns,
4468+ CAP_SYS_ADMIN, VXC_SECURE_MOUNT);
4469 }
4470
cc23e853 4471 static inline bool may_mandlock(void)
09a55596 4472@@ -2169,6 +2180,7 @@ static int do_change_type(struct path *p
763640ca
JR
4473 if (err)
4474 goto out_unlock;
4475 }
4476+ // mnt->mnt_flags = mnt_flags;
4477
c2e5f7c8 4478 lock_mount_hash();
763640ca 4479 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
09a55596 4480@@ -2197,12 +2209,14 @@ static bool has_locked_children(struct m
ec22aa5c 4481 * do loopback mount.
d337f35e 4482 */
537831f9 4483 static int do_loopback(struct path *path, const char *old_name,
2380c486 4484- int recurse)
61333608 4485+ vtag_t tag, unsigned long flags, int mnt_flags)
d337f35e 4486 {
ec22aa5c 4487 struct path old_path;
09be7631
JR
4488 struct mount *mnt = NULL, *old, *parent;
4489 struct mountpoint *mp;
d337f35e 4490+ int recurse = flags & MS_REC;
b00e13aa 4491 int err;
2380c486 4492+
d337f35e 4493 if (!old_name || !*old_name)
b00e13aa
AM
4494 return -EINVAL;
4495 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
09a55596 4496@@ -2282,7 +2296,7 @@ static int change_mount_flags(struct vfs
ec22aa5c 4497 * on it - tough luck.
d337f35e 4498 */
ec22aa5c 4499 static int do_remount(struct path *path, int flags, int mnt_flags,
d337f35e 4500- void *data)
61333608 4501+ void *data, vxid_t xid)
d337f35e
JR
4502 {
4503 int err;
ec22aa5c 4504 struct super_block *sb = path->mnt->mnt_sb;
09a55596 4505@@ -2767,6 +2781,7 @@ long do_mount(const char *dev_name, cons
ec22aa5c 4506 struct path path;
d337f35e
JR
4507 int retval = 0;
4508 int mnt_flags = 0;
61333608 4509+ vtag_t tag = 0;
d337f35e
JR
4510
4511 /* Discard magic */
4512 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
09a55596 4513@@ -2794,6 +2809,12 @@ long do_mount(const char *dev_name, cons
ec22aa5c
AM
4514 if (!(flags & MS_NOATIME))
4515 mnt_flags |= MNT_RELATIME;
d337f35e 4516
2380c486
JR
4517+ if (dx_parse_tag(data_page, &tag, 1, &mnt_flags, &flags)) {
4518+ /* FIXME: bind and re-mounts get the tag flag? */
d337f35e
JR
4519+ if (flags & (MS_BIND|MS_REMOUNT))
4520+ flags |= MS_TAGID;
4521+ }
d337f35e
JR
4522+
4523 /* Separate the per-mountpoint flags */
d337f35e
JR
4524 if (flags & MS_NOSUID)
4525 mnt_flags |= MNT_NOSUID;
09a55596 4526@@ -2818,15 +2839,18 @@ long do_mount(const char *dev_name, cons
bb20add7
AM
4527 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
4528 }
d337f35e 4529
b00e13aa 4530+ if (!vx_capable(CAP_SYS_ADMIN, VXC_DEV_MOUNT))
d337f35e 4531+ mnt_flags |= MNT_NODEV;
cc23e853 4532+
c146dd73 4533 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
ec22aa5c 4534 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
cc23e853 4535 MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT);
d337f35e
JR
4536
4537 if (flags & MS_REMOUNT)
ec22aa5c 4538 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
d337f35e
JR
4539- data_page);
4540+ data_page, tag);
4541 else if (flags & MS_BIND)
ec22aa5c
AM
4542- retval = do_loopback(&path, dev_name, flags & MS_REC);
4543+ retval = do_loopback(&path, dev_name, tag, flags, mnt_flags);
d337f35e 4544 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
ec22aa5c 4545 retval = do_change_type(&path, flags);
d337f35e 4546 else if (flags & MS_MOVE)
09a55596 4547@@ -2967,6 +2991,7 @@ struct mnt_namespace *copy_mnt_ns(unsign
c2e5f7c8 4548 p = next_mnt(p, old);
d337f35e 4549 }
09be7631 4550 namespace_unlock();
2380c486
JR
4551+ atomic_inc(&vs_global_mnt_ns);
4552
4553 if (rootmnt)
4554 mntput(rootmnt);
09a55596 4555@@ -3142,9 +3167,10 @@ SYSCALL_DEFINE2(pivot_root, const char _
db55b927
AM
4556 new_mnt = real_mount(new.mnt);
4557 root_mnt = real_mount(root.mnt);
09be7631
JR
4558 old_mnt = real_mount(old.mnt);
4559- if (IS_MNT_SHARED(old_mnt) ||
4560+ if ((IS_MNT_SHARED(old_mnt) ||
db55b927
AM
4561 IS_MNT_SHARED(new_mnt->mnt_parent) ||
4562- IS_MNT_SHARED(root_mnt->mnt_parent))
4563+ IS_MNT_SHARED(root_mnt->mnt_parent)) &&
50e68740 4564+ !vx_flags(VXF_STATE_SETUP, 0))
763640ca 4565 goto out4;
db55b927 4566 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
763640ca 4567 goto out4;
09a55596 4568@@ -3282,6 +3308,7 @@ void put_mnt_ns(struct mnt_namespace *ns
c2e5f7c8
JR
4569 if (!atomic_dec_and_test(&ns->count))
4570 return;
4571 drop_collected_mounts(&ns->root->mnt);
2380c486 4572+ atomic_dec(&vs_global_mnt_ns);
b00e13aa 4573 free_mnt_ns(ns);
2380c486 4574 }
db55b927 4575
09a55596
AM
4576diff -NurpP --minimal linux-4.9.135/fs/nfs/client.c linux-4.9.135-vs2.3.9.8/fs/nfs/client.c
4577--- linux-4.9.135/fs/nfs/client.c 2016-12-11 19:17:54.000000000 +0000
4578+++ linux-4.9.135-vs2.3.9.8/fs/nfs/client.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 4579@@ -586,6 +586,9 @@ int nfs_init_server_rpcclient(struct nfs
2380c486
JR
4580 if (server->flags & NFS_MOUNT_SOFT)
4581 server->client->cl_softrtry = 1;
d337f35e
JR
4582
4583+ server->client->cl_tag = 0;
4584+ if (server->flags & NFS_MOUNT_TAGGED)
4585+ server->client->cl_tag = 1;
4586 return 0;
4587 }
92598135 4588 EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);
cc23e853 4589@@ -762,6 +765,10 @@ static void nfs_server_set_fsinfo(struct
d337f35e
JR
4590 server->acdirmin = server->acdirmax = 0;
4591 }
4592
4593+ /* FIXME: needs fsinfo
4594+ if (server->flags & NFS_MOUNT_TAGGED)
4595+ sb->s_flags |= MS_TAGGED; */
4596+
4597 server->maxfilesize = fsinfo->maxfilesize;
4598
ab30d09f 4599 server->time_delta = fsinfo->time_delta;
09a55596
AM
4600diff -NurpP --minimal linux-4.9.135/fs/nfs/dir.c linux-4.9.135-vs2.3.9.8/fs/nfs/dir.c
4601--- linux-4.9.135/fs/nfs/dir.c 2018-10-20 10:39:17.000000000 +0000
4602+++ linux-4.9.135-vs2.3.9.8/fs/nfs/dir.c 2018-10-20 04:58:14.000000000 +0000
c2e5f7c8 4603@@ -37,6 +37,7 @@
2380c486 4604 #include <linux/sched.h>
ab30d09f 4605 #include <linux/kmemleak.h>
d33d7b00 4606 #include <linux/xattr.h>
d337f35e
JR
4607+#include <linux/vs_tag.h>
4608
d337f35e 4609 #include "delegation.h"
ab30d09f 4610 #include "iostat.h"
cc23e853 4611@@ -1420,6 +1421,7 @@ struct dentry *nfs_lookup(struct inode *
42bc425c
AM
4612 /* Success: notify readdir to use READDIRPLUS */
4613 nfs_advise_use_readdirplus(dir);
d337f35e
JR
4614
4615+ dx_propagate_tag(nd, inode);
4616 no_entry:
cc23e853 4617 res = d_splice_alias(inode, dentry);
d337f35e 4618 if (res != NULL) {
09a55596
AM
4619diff -NurpP --minimal linux-4.9.135/fs/nfs/inode.c linux-4.9.135-vs2.3.9.8/fs/nfs/inode.c
4620--- linux-4.9.135/fs/nfs/inode.c 2018-10-20 10:39:17.000000000 +0000
4621+++ linux-4.9.135-vs2.3.9.8/fs/nfs/inode.c 2018-10-20 04:58:14.000000000 +0000
c2e5f7c8
JR
4622@@ -38,6 +38,7 @@
4623 #include <linux/slab.h>
d33d7b00 4624 #include <linux/compat.h>
db55b927 4625 #include <linux/freezer.h>
d337f35e
JR
4626+#include <linux/vs_tag.h>
4627
d337f35e 4628 #include <asm/uaccess.h>
1e8b8f9b 4629
cc23e853 4630@@ -383,6 +384,8 @@ nfs_fhget(struct super_block *sb, struct
ec22aa5c
AM
4631 if (inode->i_state & I_NEW) {
4632 struct nfs_inode *nfsi = NFS_I(inode);
4633 unsigned long now = jiffies;
a4a22af8
AM
4634+ kuid_t kuid;
4635+ kgid_t kgid;
ec22aa5c
AM
4636
4637 /* We set i_ino for the few things that still rely on it,
4638 * such as stat(2) */
cc23e853 4639@@ -427,8 +430,8 @@ nfs_fhget(struct super_block *sb, struct
f6c5ef8b 4640 inode->i_version = 0;
ec22aa5c 4641 inode->i_size = 0;
f6c5ef8b 4642 clear_nlink(inode);
b00e13aa
AM
4643- inode->i_uid = make_kuid(&init_user_ns, -2);
4644- inode->i_gid = make_kgid(&init_user_ns, -2);
a4a22af8
AM
4645+ kuid = make_kuid(&init_user_ns, -2);
4646+ kgid = make_kgid(&init_user_ns, -2);
ec22aa5c
AM
4647 inode->i_blocks = 0;
4648 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
42bc425c 4649 nfsi->write_io = 0;
cc23e853 4650@@ -463,11 +466,11 @@ nfs_fhget(struct super_block *sb, struct
7e46296a 4651 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
bb20add7 4652 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
ec22aa5c
AM
4653 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
4654- inode->i_uid = fattr->uid;
a4a22af8 4655+ kuid = fattr->uid;
7e46296a 4656 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
bb20add7 4657 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
ec22aa5c
AM
4658 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
4659- inode->i_gid = fattr->gid;
a4a22af8 4660+ kgid = fattr->gid;
7e46296a 4661 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
bb20add7 4662 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
42bc425c 4663 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
cc23e853 4664@@ -478,6 +481,10 @@ nfs_fhget(struct super_block *sb, struct
ec22aa5c
AM
4665 */
4666 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
4667 }
a4a22af8
AM
4668+ inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
4669+ inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
4670+ inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, GLOBAL_ROOT_TAG);
ec22aa5c 4671+ /* maybe fattr->xid someday */
c2e5f7c8
JR
4672
4673 nfs_setsecurity(inode, fattr, label);
4674
cc23e853 4675@@ -619,6 +626,8 @@ void nfs_setattr_update_inode(struct ino
d337f35e
JR
4676 inode->i_uid = attr->ia_uid;
4677 if ((attr->ia_valid & ATTR_GID) != 0)
4678 inode->i_gid = attr->ia_gid;
4679+ if ((attr->ia_valid & ATTR_TAG) && IS_TAGGED(inode))
4680+ inode->i_tag = attr->ia_tag;
bb20add7
AM
4681 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
4682 | NFS_INO_INVALID_ACL);
cc23e853
AM
4683 }
4684@@ -1272,7 +1281,9 @@ static int nfs_check_inode_attributes(st
d337f35e
JR
4685 struct nfs_inode *nfsi = NFS_I(inode);
4686 loff_t cur_size, new_isize;
2380c486 4687 unsigned long invalid = 0;
a4a22af8 4688-
b00e13aa
AM
4689+ kuid_t kuid;
4690+ kgid_t kgid;
4691+ ktag_t ktag;
d337f35e 4692
42bc425c 4693 if (nfs_have_delegated_attributes(inode))
a4a22af8 4694 return 0;
cc23e853
AM
4695@@ -1301,13 +1312,18 @@ static int nfs_check_inode_attributes(st
4696 }
ec22aa5c 4697 }
d337f35e 4698
a4a22af8
AM
4699+ kuid = INOTAG_KUID(DX_TAG(inode), fattr->uid, fattr->gid);
4700+ kgid = INOTAG_KGID(DX_TAG(inode), fattr->uid, fattr->gid);
4701+ ktag = INOTAG_KTAG(DX_TAG(inode), fattr->uid, fattr->gid, GLOBAL_ROOT_TAG);
d337f35e
JR
4702+
4703 /* Have any file permissions changed? */
ec22aa5c 4704 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
9474138d 4705 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
b00e13aa
AM
4706- if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
4707+ if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, kuid))
ec22aa5c 4708 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
b00e13aa
AM
4709- if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
4710+ if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, kgid))
ec22aa5c
AM
4711 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
4712+ /* maybe check for tag too? */
d337f35e
JR
4713
4714 /* Has the link count changed? */
ec22aa5c 4715 if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
cc23e853 4716@@ -1666,6 +1682,9 @@ static int nfs_update_inode(struct inode
7e46296a 4717 unsigned long save_cache_validity;
cc23e853
AM
4718 bool have_writers = nfs_file_has_buffered_writers(nfsi);
4719 bool cache_revalidated = true;
a4a22af8
AM
4720+ kuid_t kuid;
4721+ kgid_t kgid;
4722+ ktag_t ktag;
d337f35e 4723
bb20add7 4724 dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n",
2380c486 4725 __func__, inode->i_sb->s_id, inode->i_ino,
cc23e853
AM
4726@@ -1785,6 +1804,9 @@ static int nfs_update_inode(struct inode
4727 cache_revalidated = false;
4728 }
d337f35e 4729
a4a22af8
AM
4730+ kuid = TAGINO_KUID(DX_TAG(inode), inode->i_uid, inode->i_tag);
4731+ kgid = TAGINO_KGID(DX_TAG(inode), inode->i_gid, inode->i_tag);
4732+ ktag = TAGINO_KTAG(DX_TAG(inode), inode->i_tag);
ec22aa5c
AM
4733
4734 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
4735 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
cc23e853
AM
4736@@ -1839,6 +1861,10 @@ static int nfs_update_inode(struct inode
4737 cache_revalidated = false;
4738 }
ec22aa5c 4739
a4a22af8
AM
4740+ inode->i_uid = INOTAG_KUID(DX_TAG(inode), kuid, kgid);
4741+ inode->i_gid = INOTAG_KGID(DX_TAG(inode), kuid, kgid);
4742+ inode->i_tag = INOTAG_KTAG(DX_TAG(inode), kuid, kgid, ktag);
ec22aa5c
AM
4743+
4744 if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
4745 if (inode->i_nlink != fattr->nlink) {
4746 invalid |= NFS_INO_INVALID_ATTR;
09a55596
AM
4747diff -NurpP --minimal linux-4.9.135/fs/nfs/nfs3xdr.c linux-4.9.135-vs2.3.9.8/fs/nfs/nfs3xdr.c
4748--- linux-4.9.135/fs/nfs/nfs3xdr.c 2016-12-11 19:17:54.000000000 +0000
4749+++ linux-4.9.135-vs2.3.9.8/fs/nfs/nfs3xdr.c 2018-10-20 04:58:14.000000000 +0000
78865d5b 4750@@ -20,6 +20,7 @@
d337f35e
JR
4751 #include <linux/nfs3.h>
4752 #include <linux/nfs_fs.h>
4753 #include <linux/nfsacl.h>
4754+#include <linux/vs_tag.h>
4755 #include "internal.h"
4756
4757 #define NFSDBG_FACILITY NFSDBG_XDR
b00e13aa 4758@@ -558,7 +559,8 @@ static __be32 *xdr_decode_nfstime3(__be3
d33d7b00
AM
4759 * set_mtime mtime;
4760 * };
4761 */
4762-static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
4763+static void encode_sattr3(struct xdr_stream *xdr,
4764+ const struct iattr *attr, int tag)
d337f35e 4765 {
d33d7b00
AM
4766 u32 nbytes;
4767 __be32 *p;
b00e13aa 4768@@ -590,15 +592,19 @@ static void encode_sattr3(struct xdr_str
d33d7b00 4769 } else
d337f35e 4770 *p++ = xdr_zero;
d33d7b00 4771
d337f35e
JR
4772- if (attr->ia_valid & ATTR_UID) {
4773+ if (attr->ia_valid & ATTR_UID ||
4774+ (tag && (attr->ia_valid & ATTR_TAG))) {
4775 *p++ = xdr_one;
b00e13aa 4776- *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
a4a22af8
AM
4777+ *p++ = cpu_to_be32(from_kuid(&init_user_ns,
4778+ TAGINO_KUID(tag, attr->ia_uid, attr->ia_tag)));
d33d7b00 4779 } else
d337f35e 4780 *p++ = xdr_zero;
d33d7b00 4781
d337f35e
JR
4782- if (attr->ia_valid & ATTR_GID) {
4783+ if (attr->ia_valid & ATTR_GID ||
4784+ (tag && (attr->ia_valid & ATTR_TAG))) {
4785 *p++ = xdr_one;
b00e13aa 4786- *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
a4a22af8
AM
4787+ *p++ = cpu_to_be32(from_kgid(&init_user_ns,
4788+ TAGINO_KGID(tag, attr->ia_gid, attr->ia_tag)));
d33d7b00 4789 } else
d337f35e 4790 *p++ = xdr_zero;
d33d7b00 4791
b00e13aa 4792@@ -887,7 +893,7 @@ static void nfs3_xdr_enc_setattr3args(st
d33d7b00 4793 const struct nfs3_sattrargs *args)
d337f35e 4794 {
d33d7b00
AM
4795 encode_nfs_fh3(xdr, args->fh);
4796- encode_sattr3(xdr, args->sattr);
4797+ encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
4798 encode_sattrguard3(xdr, args);
4799 }
d337f35e 4800
b00e13aa 4801@@ -1037,13 +1043,13 @@ static void nfs3_xdr_enc_write3args(stru
d33d7b00
AM
4802 * };
4803 */
4804 static void encode_createhow3(struct xdr_stream *xdr,
4805- const struct nfs3_createargs *args)
4806+ const struct nfs3_createargs *args, int tag)
d337f35e 4807 {
d33d7b00
AM
4808 encode_uint32(xdr, args->createmode);
4809 switch (args->createmode) {
4810 case NFS3_CREATE_UNCHECKED:
4811 case NFS3_CREATE_GUARDED:
4812- encode_sattr3(xdr, args->sattr);
4813+ encode_sattr3(xdr, args->sattr, tag);
4814 break;
4815 case NFS3_CREATE_EXCLUSIVE:
4816 encode_createverf3(xdr, args->verifier);
b00e13aa 4817@@ -1058,7 +1064,7 @@ static void nfs3_xdr_enc_create3args(str
d33d7b00
AM
4818 const struct nfs3_createargs *args)
4819 {
4820 encode_diropargs3(xdr, args->fh, args->name, args->len);
4821- encode_createhow3(xdr, args);
4822+ encode_createhow3(xdr, args, req->rq_task->tk_client->cl_tag);
4823 }
4824
4825 /*
b00e13aa 4826@@ -1074,7 +1080,7 @@ static void nfs3_xdr_enc_mkdir3args(stru
d33d7b00
AM
4827 const struct nfs3_mkdirargs *args)
4828 {
4829 encode_diropargs3(xdr, args->fh, args->name, args->len);
4830- encode_sattr3(xdr, args->sattr);
4831+ encode_sattr3(xdr, args->sattr, req->rq_task->tk_client->cl_tag);
d337f35e 4832 }
d33d7b00
AM
4833
4834 /*
b00e13aa 4835@@ -1091,9 +1097,9 @@ static void nfs3_xdr_enc_mkdir3args(stru
d33d7b00
AM
4836 * };
4837 */
4838 static void encode_symlinkdata3(struct xdr_stream *xdr,
4839- const struct nfs3_symlinkargs *args)
4840+ const struct nfs3_symlinkargs *args, int tag)
4841 {
4842- encode_sattr3(xdr, args->sattr);
4843+ encode_sattr3(xdr, args->sattr, tag);
4844 encode_nfspath3(xdr, args->pages, args->pathlen);
4845 }
4846
b00e13aa 4847@@ -1102,7 +1108,7 @@ static void nfs3_xdr_enc_symlink3args(st
d33d7b00
AM
4848 const struct nfs3_symlinkargs *args)
4849 {
4850 encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
4851- encode_symlinkdata3(xdr, args);
4852+ encode_symlinkdata3(xdr, args, req->rq_task->tk_client->cl_tag);
cc23e853 4853 xdr->buf->flags |= XDRBUF_WRITE;
d33d7b00
AM
4854 }
4855
cc23e853 4856@@ -1131,24 +1137,24 @@ static void nfs3_xdr_enc_symlink3args(st
d33d7b00
AM
4857 * };
4858 */
4859 static void encode_devicedata3(struct xdr_stream *xdr,
4860- const struct nfs3_mknodargs *args)
4861+ const struct nfs3_mknodargs *args, int tag)
4862 {
4863- encode_sattr3(xdr, args->sattr);
4864+ encode_sattr3(xdr, args->sattr, tag);
4865 encode_specdata3(xdr, args->rdev);
4866 }
4867
4868 static void encode_mknoddata3(struct xdr_stream *xdr,
4869- const struct nfs3_mknodargs *args)
4870+ const struct nfs3_mknodargs *args, int tag)
4871 {
4872 encode_ftype3(xdr, args->type);
4873 switch (args->type) {
4874 case NF3CHR:
4875 case NF3BLK:
4876- encode_devicedata3(xdr, args);
4877+ encode_devicedata3(xdr, args, tag);
4878 break;
4879 case NF3SOCK:
4880 case NF3FIFO:
4881- encode_sattr3(xdr, args->sattr);
4882+ encode_sattr3(xdr, args->sattr, tag);
4883 break;
4884 case NF3REG:
4885 case NF3DIR:
cc23e853 4886@@ -1163,7 +1169,7 @@ static void nfs3_xdr_enc_mknod3args(stru
d33d7b00 4887 const struct nfs3_mknodargs *args)
d337f35e 4888 {
d33d7b00
AM
4889 encode_diropargs3(xdr, args->fh, args->name, args->len);
4890- encode_mknoddata3(xdr, args);
4891+ encode_mknoddata3(xdr, args, req->rq_task->tk_client->cl_tag);
4892 }
4893
4894 /*
09a55596
AM
4895diff -NurpP --minimal linux-4.9.135/fs/nfs/super.c linux-4.9.135-vs2.3.9.8/fs/nfs/super.c
4896--- linux-4.9.135/fs/nfs/super.c 2018-10-20 10:39:17.000000000 +0000
4897+++ linux-4.9.135-vs2.3.9.8/fs/nfs/super.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 4898@@ -54,6 +54,7 @@
b00e13aa 4899 #include <linux/parser.h>
1e8b8f9b
AM
4900 #include <linux/nsproxy.h>
4901 #include <linux/rcupdate.h>
d337f35e
JR
4902+#include <linux/vs_tag.h>
4903
d337f35e 4904 #include <asm/uaccess.h>
1e8b8f9b 4905
cc23e853 4906@@ -102,6 +103,7 @@ enum {
1e8b8f9b 4907 Opt_mountport,
ab30d09f 4908 Opt_mountvers,
ab30d09f
AM
4909 Opt_minorversion,
4910+ Opt_tagid,
4911
4912 /* Mount options that take string arguments */
1e8b8f9b 4913 Opt_nfsvers,
cc23e853 4914@@ -114,6 +116,9 @@ enum {
537831f9
AM
4915 /* Special mount options */
4916 Opt_userspace, Opt_deprecated, Opt_sloppy,
4917
4918+ /* Linux-VServer tagging options */
4919+ Opt_tag, Opt_notag,
4920+
4921 Opt_err
4922 };
4923
cc23e853 4924@@ -183,6 +188,10 @@ static const match_table_t nfs_mount_opt
537831f9
AM
4925 { Opt_fscache_uniq, "fsc=%s" },
4926 { Opt_local_lock, "local_lock=%s" },
ab30d09f
AM
4927
4928+ { Opt_tag, "tag" },
4929+ { Opt_notag, "notag" },
4930+ { Opt_tagid, "tagid=%u" },
4931+
537831f9
AM
4932 /* The following needs to be listed after all other options */
4933 { Opt_nfsvers, "v%s" },
ab30d09f 4934
cc23e853 4935@@ -644,6 +653,7 @@ static void nfs_show_mount_options(struc
2380c486 4936 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
ec22aa5c
AM
4937 { NFS_MOUNT_UNSHARED, ",nosharecache", "" },
4938 { NFS_MOUNT_NORESVPORT, ",noresvport", "" },
d337f35e
JR
4939+ { NFS_MOUNT_TAGGED, ",tag", "" },
4940 { 0, NULL, NULL }
4941 };
4942 const struct proc_nfs_info *nfs_infop;
cc23e853 4943@@ -1341,6 +1351,14 @@ static int nfs_parse_mount_options(char
537831f9 4944 case Opt_nomigration:
cc23e853 4945 mnt->options &= ~NFS_OPTION_MIGRATION;
ab30d09f
AM
4946 break;
4947+#ifndef CONFIG_TAGGING_NONE
4948+ case Opt_tag:
4949+ mnt->flags |= NFS_MOUNT_TAGGED;
4950+ break;
4951+ case Opt_notag:
4952+ mnt->flags &= ~NFS_MOUNT_TAGGED;
4953+ break;
4954+#endif
4955
4956 /*
4957 * options that take numeric values
cc23e853 4958@@ -1427,6 +1445,12 @@ static int nfs_parse_mount_options(char
ab30d09f
AM
4959 goto out_invalid_value;
4960 mnt->minorversion = option;
4961 break;
4962+#ifdef CONFIG_PROPAGATE
4963+ case Opt_tagid:
4964+ /* use args[0] */
4965+ nfs_data.flags |= NFS_MOUNT_TAGGED;
4966+ break;
4967+#endif
4968
4969 /*
4970 * options that take text values
09a55596
AM
4971diff -NurpP --minimal linux-4.9.135/fs/nfsd/auth.c linux-4.9.135-vs2.3.9.8/fs/nfsd/auth.c
4972--- linux-4.9.135/fs/nfsd/auth.c 2018-10-20 10:39:18.000000000 +0000
4973+++ linux-4.9.135-vs2.3.9.8/fs/nfsd/auth.c 2018-10-20 04:58:14.000000000 +0000
bb20add7
AM
4974@@ -1,6 +1,7 @@
4975 /* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
2bf5ad28
AM
4976
4977 #include <linux/sched.h>
d337f35e 4978+#include <linux/vs_tag.h>
2bf5ad28 4979 #include "nfsd.h"
2380c486 4980 #include "auth.h"
d337f35e 4981
bb20add7 4982@@ -35,6 +36,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
d337f35e 4983
ec22aa5c
AM
4984 new->fsuid = rqstp->rq_cred.cr_uid;
4985 new->fsgid = rqstp->rq_cred.cr_gid;
4986+ /* FIXME: this desperately needs a tag :)
61333608 4987+ new->xid = (vxid_t)INOTAG_TAG(DX_TAG_NFSD, cred.cr_uid, cred.cr_gid, 0);
ec22aa5c 4988+ */
d337f35e 4989
ec22aa5c
AM
4990 rqgi = rqstp->rq_cred.cr_group_info;
4991
09a55596
AM
4992diff -NurpP --minimal linux-4.9.135/fs/nfsd/nfs3xdr.c linux-4.9.135-vs2.3.9.8/fs/nfsd/nfs3xdr.c
4993--- linux-4.9.135/fs/nfsd/nfs3xdr.c 2018-10-20 10:39:18.000000000 +0000
4994+++ linux-4.9.135-vs2.3.9.8/fs/nfsd/nfs3xdr.c 2018-10-20 04:58:14.000000000 +0000
b00e13aa 4995@@ -8,6 +8,7 @@
2bf5ad28
AM
4996
4997 #include <linux/namei.h>
b00e13aa 4998 #include <linux/sunrpc/svc_xprt.h>
d337f35e 4999+#include <linux/vs_tag.h>
2bf5ad28 5000 #include "xdr3.h"
2380c486 5001 #include "auth.h"
b00e13aa
AM
5002 #include "netns.h"
5003@@ -98,6 +99,8 @@ static __be32 *
d337f35e
JR
5004 decode_sattr3(__be32 *p, struct iattr *iap)
5005 {
5006 u32 tmp;
a4a22af8
AM
5007+ kuid_t kuid = GLOBAL_ROOT_UID;
5008+ kgid_t kgid = GLOBAL_ROOT_GID;
d337f35e
JR
5009
5010 iap->ia_valid = 0;
5011
b00e13aa
AM
5012@@ -106,15 +109,18 @@ decode_sattr3(__be32 *p, struct iattr *i
5013 iap->ia_mode = ntohl(*p++);
d337f35e
JR
5014 }
5015 if (*p++) {
b00e13aa 5016- iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++));
a4a22af8 5017+ kuid = make_kuid(&init_user_ns, ntohl(*p++));
b00e13aa
AM
5018 if (uid_valid(iap->ia_uid))
5019 iap->ia_valid |= ATTR_UID;
d337f35e
JR
5020 }
5021 if (*p++) {
b00e13aa 5022- iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++));
a4a22af8 5023+ kgid = make_kgid(&init_user_ns, ntohl(*p++));
b00e13aa
AM
5024 if (gid_valid(iap->ia_gid))
5025 iap->ia_valid |= ATTR_GID;
d337f35e 5026 }
a4a22af8
AM
5027+ iap->ia_uid = INOTAG_KUID(DX_TAG_NFSD, kuid, kgid);
5028+ iap->ia_gid = INOTAG_KGID(DX_TAG_NFSD, kuid, kgid);
5029+ iap->ia_tag = INOTAG_KTAG(DX_TAG_NFSD, kuid, kgid, GLOBAL_ROOT_TAG);
d337f35e
JR
5030 if (*p++) {
5031 u64 newsize;
5032
bb20add7 5033@@ -167,8 +173,12 @@ encode_fattr3(struct svc_rqst *rqstp, __
d337f35e 5034 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
bb20add7 5035 *p++ = htonl((u32) (stat->mode & S_IALLUGO));
d337f35e 5036 *p++ = htonl((u32) stat->nlink);
b00e13aa
AM
5037- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
5038- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
5039+ *p++ = htonl((u32) from_kuid(&init_user_ns,
a4a22af8 5040+ TAGINO_KUID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
2380c486 5041+ stat->uid, stat->tag)));
b00e13aa 5042+ *p++ = htonl((u32) from_kgid(&init_user_ns,
a4a22af8 5043+ TAGINO_KGID(0 /* FIXME: DX_TAG(dentry->d_inode) */,
2380c486 5044+ stat->gid, stat->tag)));
d337f35e
JR
5045 if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
5046 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
5047 } else {
09a55596
AM
5048diff -NurpP --minimal linux-4.9.135/fs/nfsd/nfs4xdr.c linux-4.9.135-vs2.3.9.8/fs/nfsd/nfs4xdr.c
5049--- linux-4.9.135/fs/nfsd/nfs4xdr.c 2018-10-20 10:39:18.000000000 +0000
5050+++ linux-4.9.135-vs2.3.9.8/fs/nfsd/nfs4xdr.c 2018-10-20 05:55:43.000000000 +0000
cc23e853 5051@@ -40,6 +40,7 @@
d33d7b00 5052 #include <linux/utsname.h>
a168f21d 5053 #include <linux/pagemap.h>
2380c486 5054 #include <linux/sunrpc/svcauth_gss.h>
d337f35e
JR
5055+#include <linux/vs_tag.h>
5056
d33d7b00
AM
5057 #include "idmap.h"
5058 #include "acl.h"
09a55596 5059@@ -2679,12 +2680,16 @@ out_acl:
bb20add7 5060 *p++ = cpu_to_be32(stat.nlink);
d337f35e
JR
5061 }
5062 if (bmval1 & FATTR4_WORD1_OWNER) {
bb20add7
AM
5063- status = nfsd4_encode_user(xdr, rqstp, stat.uid);
5064+ status = nfsd4_encode_user(xdr, rqstp,
a4a22af8 5065+ TAGINO_KUID(DX_TAG(dentry->d_inode),
bb20add7 5066+ stat.uid, stat.tag));
d337f35e
JR
5067 if (status)
5068 goto out;
5069 }
5070 if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
bb20add7
AM
5071- status = nfsd4_encode_group(xdr, rqstp, stat.gid);
5072+ status = nfsd4_encode_group(xdr, rqstp,
a4a22af8 5073+ TAGINO_KGID(DX_TAG(dentry->d_inode),
bb20add7 5074+ stat.gid, stat.tag));
d337f35e 5075 if (status)
f15949f2
JR
5076 goto out;
5077 }
09a55596
AM
5078diff -NurpP --minimal linux-4.9.135/fs/nfsd/nfsxdr.c linux-4.9.135-vs2.3.9.8/fs/nfsd/nfsxdr.c
5079--- linux-4.9.135/fs/nfsd/nfsxdr.c 2018-10-20 10:39:18.000000000 +0000
5080+++ linux-4.9.135-vs2.3.9.8/fs/nfsd/nfsxdr.c 2018-10-20 04:58:14.000000000 +0000
b00e13aa
AM
5081@@ -7,6 +7,7 @@
5082 #include "vfs.h"
2bf5ad28 5083 #include "xdr.h"
2380c486 5084 #include "auth.h"
2bf5ad28 5085+#include <linux/vs_tag.h>
d337f35e
JR
5086
5087 #define NFSDDBG_FACILITY NFSDDBG_XDR
2bf5ad28 5088
b00e13aa 5089@@ -89,6 +90,8 @@ static __be32 *
d337f35e
JR
5090 decode_sattr(__be32 *p, struct iattr *iap)
5091 {
5092 u32 tmp, tmp1;
a4a22af8
AM
5093+ kuid_t kuid = GLOBAL_ROOT_UID;
5094+ kgid_t kgid = GLOBAL_ROOT_GID;
d337f35e
JR
5095
5096 iap->ia_valid = 0;
5097
b00e13aa
AM
5098@@ -101,15 +104,18 @@ decode_sattr(__be32 *p, struct iattr *ia
5099 iap->ia_mode = tmp;
d337f35e
JR
5100 }
5101 if ((tmp = ntohl(*p++)) != (u32)-1) {
b00e13aa 5102- iap->ia_uid = make_kuid(&init_user_ns, tmp);
a4a22af8 5103+ kuid = make_kuid(&init_user_ns, tmp);
b00e13aa
AM
5104 if (uid_valid(iap->ia_uid))
5105 iap->ia_valid |= ATTR_UID;
d337f35e
JR
5106 }
5107 if ((tmp = ntohl(*p++)) != (u32)-1) {
b00e13aa 5108- iap->ia_gid = make_kgid(&init_user_ns, tmp);
a4a22af8 5109+ kgid = make_kgid(&init_user_ns, tmp);
b00e13aa
AM
5110 if (gid_valid(iap->ia_gid))
5111 iap->ia_valid |= ATTR_GID;
d337f35e 5112 }
a4a22af8
AM
5113+ iap->ia_uid = INOTAG_KUID(DX_TAG_NFSD, kuid, kgid);
5114+ iap->ia_gid = INOTAG_KGID(DX_TAG_NFSD, kuid, kgid);
5115+ iap->ia_tag = INOTAG_KTAG(DX_TAG_NFSD, kuid, kgid, GLOBAL_ROOT_TAG);
d337f35e
JR
5116 if ((tmp = ntohl(*p++)) != (u32)-1) {
5117 iap->ia_valid |= ATTR_SIZE;
5118 iap->ia_size = tmp;
b00e13aa 5119@@ -154,8 +160,10 @@ encode_fattr(struct svc_rqst *rqstp, __b
d337f35e
JR
5120 *p++ = htonl(nfs_ftypes[type >> 12]);
5121 *p++ = htonl((u32) stat->mode);
5122 *p++ = htonl((u32) stat->nlink);
b00e13aa
AM
5123- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
5124- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
5125+ *p++ = htonl((u32) from_kuid(&init_user_ns,
a4a22af8 5126+ TAGINO_KUID(DX_TAG(dentry->d_inode), stat->uid, stat->tag)));
b00e13aa 5127+ *p++ = htonl((u32) from_kgid(&init_user_ns,
a4a22af8 5128+ TAGINO_KGID(DX_TAG(dentry->d_inode), stat->gid, stat->tag)));
d337f35e
JR
5129
5130 if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
5131 *p++ = htonl(NFS_MAXPATHLEN);
09a55596
AM
5132diff -NurpP --minimal linux-4.9.135/fs/ocfs2/dlmglue.c linux-4.9.135-vs2.3.9.8/fs/ocfs2/dlmglue.c
5133--- linux-4.9.135/fs/ocfs2/dlmglue.c 2018-10-20 10:39:18.000000000 +0000
5134+++ linux-4.9.135-vs2.3.9.8/fs/ocfs2/dlmglue.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 5135@@ -2120,6 +2120,7 @@ static void __ocfs2_stuff_meta_lvb(struc
d337f35e 5136 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
b00e13aa
AM
5137 lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode));
5138 lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
a4a22af8 5139+ lvb->lvb_itag = cpu_to_be16(i_tag_read(inode));
d337f35e
JR
5140 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
5141 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
5142 lvb->lvb_iatime_packed =
cc23e853 5143@@ -2170,6 +2171,7 @@ static void ocfs2_refresh_inode_from_lvb
d337f35e 5144
b00e13aa
AM
5145 i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
5146 i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
5147+ i_tag_write(inode, be16_to_cpu(lvb->lvb_itag));
d337f35e 5148 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
f6c5ef8b 5149 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
d337f35e 5150 ocfs2_unpack_timespec(&inode->i_atime,
09a55596
AM
5151diff -NurpP --minimal linux-4.9.135/fs/ocfs2/dlmglue.h linux-4.9.135-vs2.3.9.8/fs/ocfs2/dlmglue.h
5152--- linux-4.9.135/fs/ocfs2/dlmglue.h 2018-10-20 10:39:18.000000000 +0000
5153+++ linux-4.9.135-vs2.3.9.8/fs/ocfs2/dlmglue.h 2018-10-20 04:58:14.000000000 +0000
2380c486
JR
5154@@ -46,7 +46,8 @@ struct ocfs2_meta_lvb {
5155 __be16 lvb_inlink;
5156 __be32 lvb_iattr;
5157 __be32 lvb_igeneration;
5158- __be32 lvb_reserved2;
d337f35e 5159+ __be16 lvb_itag;
2380c486
JR
5160+ __be16 lvb_reserved2;
5161 };
5162
ec22aa5c 5163 #define OCFS2_QINFO_LVB_VERSION 1
09a55596
AM
5164diff -NurpP --minimal linux-4.9.135/fs/ocfs2/file.c linux-4.9.135-vs2.3.9.8/fs/ocfs2/file.c
5165--- linux-4.9.135/fs/ocfs2/file.c 2018-10-20 10:39:18.000000000 +0000
5166+++ linux-4.9.135-vs2.3.9.8/fs/ocfs2/file.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 5167@@ -1151,7 +1151,7 @@ int ocfs2_setattr(struct dentry *dentry,
763640ca 5168 attr->ia_valid &= ~ATTR_SIZE;
d337f35e
JR
5169
5170 #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \
5171- | ATTR_GID | ATTR_UID | ATTR_MODE)
5172+ | ATTR_GID | ATTR_UID | ATTR_TAG | ATTR_MODE)
763640ca 5173 if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
d337f35e 5174 return 0;
763640ca 5175
09a55596
AM
5176diff -NurpP --minimal linux-4.9.135/fs/ocfs2/inode.c linux-4.9.135-vs2.3.9.8/fs/ocfs2/inode.c
5177--- linux-4.9.135/fs/ocfs2/inode.c 2016-12-11 19:17:54.000000000 +0000
5178+++ linux-4.9.135-vs2.3.9.8/fs/ocfs2/inode.c 2018-10-20 04:58:14.000000000 +0000
78865d5b 5179@@ -28,6 +28,7 @@
d337f35e
JR
5180 #include <linux/highmem.h>
5181 #include <linux/pagemap.h>
ec22aa5c 5182 #include <linux/quotaops.h>
d337f35e
JR
5183+#include <linux/vs_tag.h>
5184
5185 #include <asm/byteorder.h>
5186
cc23e853 5187@@ -87,11 +88,13 @@ void ocfs2_set_inode_flags(struct inode
2380c486
JR
5188 {
5189 unsigned int flags = OCFS2_I(inode)->ip_attr;
5190
5191- inode->i_flags &= ~(S_IMMUTABLE |
5192+ inode->i_flags &= ~(S_IMMUTABLE | S_IXUNLINK |
5193 S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
d337f35e
JR
5194
5195 if (flags & OCFS2_IMMUTABLE_FL)
5196 inode->i_flags |= S_IMMUTABLE;
2380c486
JR
5197+ if (flags & OCFS2_IXUNLINK_FL)
5198+ inode->i_flags |= S_IXUNLINK;
d337f35e
JR
5199
5200 if (flags & OCFS2_SYNC_FL)
5201 inode->i_flags |= S_SYNC;
cc23e853 5202@@ -101,25 +104,44 @@ void ocfs2_set_inode_flags(struct inode
2380c486
JR
5203 inode->i_flags |= S_NOATIME;
5204 if (flags & OCFS2_DIRSYNC_FL)
d337f35e 5205 inode->i_flags |= S_DIRSYNC;
2380c486
JR
5206+
5207+ inode->i_vflags &= ~(V_BARRIER | V_COW);
5208+
5209+ if (flags & OCFS2_BARRIER_FL)
5210+ inode->i_vflags |= V_BARRIER;
5211+ if (flags & OCFS2_COW_FL)
5212+ inode->i_vflags |= V_COW;
d337f35e
JR
5213 }
5214
2380c486
JR
5215 /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
5216 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
5217 {
5218 unsigned int flags = oi->vfs_inode.i_flags;
5219+ unsigned int vflags = oi->vfs_inode.i_vflags;
5220+
5221+ oi->ip_attr &= ~(OCFS2_SYNC_FL | OCFS2_APPEND_FL |
5222+ OCFS2_IMMUTABLE_FL | OCFS2_IXUNLINK_FL |
5223+ OCFS2_NOATIME_FL | OCFS2_DIRSYNC_FL |
5224+ OCFS2_BARRIER_FL | OCFS2_COW_FL);
5225+
5226+ if (flags & S_IMMUTABLE)
5227+ oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5228+ if (flags & S_IXUNLINK)
5229+ oi->ip_attr |= OCFS2_IXUNLINK_FL;
5230
5231- oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
5232- OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
5233 if (flags & S_SYNC)
5234 oi->ip_attr |= OCFS2_SYNC_FL;
5235 if (flags & S_APPEND)
5236 oi->ip_attr |= OCFS2_APPEND_FL;
5237- if (flags & S_IMMUTABLE)
5238- oi->ip_attr |= OCFS2_IMMUTABLE_FL;
5239 if (flags & S_NOATIME)
5240 oi->ip_attr |= OCFS2_NOATIME_FL;
5241 if (flags & S_DIRSYNC)
5242 oi->ip_attr |= OCFS2_DIRSYNC_FL;
5243+
5244+ if (vflags & V_BARRIER)
5245+ oi->ip_attr |= OCFS2_BARRIER_FL;
5246+ if (vflags & V_COW)
5247+ oi->ip_attr |= OCFS2_COW_FL;
2380c486
JR
5248 }
5249
ec22aa5c 5250 struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
cc23e853 5251@@ -278,6 +300,8 @@ void ocfs2_populate_inode(struct inode *
d337f35e
JR
5252 struct super_block *sb;
5253 struct ocfs2_super *osb;
ec22aa5c 5254 int use_plocks = 1;
d337f35e
JR
5255+ uid_t uid;
5256+ gid_t gid;
5257
763640ca
JR
5258 sb = inode->i_sb;
5259 osb = OCFS2_SB(sb);
cc23e853 5260@@ -306,8 +330,12 @@ void ocfs2_populate_inode(struct inode *
d337f35e
JR
5261 inode->i_generation = le32_to_cpu(fe->i_generation);
5262 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
5263 inode->i_mode = le16_to_cpu(fe->i_mode);
b00e13aa
AM
5264- i_uid_write(inode, le32_to_cpu(fe->i_uid));
5265- i_gid_write(inode, le32_to_cpu(fe->i_gid));
d337f35e
JR
5266+ uid = le32_to_cpu(fe->i_uid);
5267+ gid = le32_to_cpu(fe->i_gid);
b00e13aa
AM
5268+ i_uid_write(inode, INOTAG_UID(DX_TAG(inode), uid, gid));
5269+ i_gid_write(inode, INOTAG_GID(DX_TAG(inode), uid, gid));
5270+ i_tag_write(inode, INOTAG_TAG(DX_TAG(inode), uid, gid,
5271+ /* le16_to_cpu(raw_inode->i_raw_tag) */ 0));
d337f35e
JR
5272
5273 /* Fast symlinks will have i_size but no allocated clusters. */
42bc425c 5274 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) {
09a55596
AM
5275diff -NurpP --minimal linux-4.9.135/fs/ocfs2/inode.h linux-4.9.135-vs2.3.9.8/fs/ocfs2/inode.h
5276--- linux-4.9.135/fs/ocfs2/inode.h 2016-12-11 19:17:54.000000000 +0000
5277+++ linux-4.9.135-vs2.3.9.8/fs/ocfs2/inode.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 5278@@ -155,6 +155,7 @@ int ocfs2_mark_inode_dirty(handle_t *han
d337f35e
JR
5279
5280 void ocfs2_set_inode_flags(struct inode *inode);
2380c486 5281 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
d4263eb0 5282+int ocfs2_sync_flags(struct inode *inode, int, int);
d337f35e 5283
2380c486
JR
5284 static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
5285 {
09a55596
AM
5286diff -NurpP --minimal linux-4.9.135/fs/ocfs2/ioctl.c linux-4.9.135-vs2.3.9.8/fs/ocfs2/ioctl.c
5287--- linux-4.9.135/fs/ocfs2/ioctl.c 2016-12-11 19:17:54.000000000 +0000
5288+++ linux-4.9.135-vs2.3.9.8/fs/ocfs2/ioctl.c 2018-10-20 04:58:14.000000000 +0000
1e8b8f9b 5289@@ -76,7 +76,41 @@ static int ocfs2_get_inode_attr(struct i
d337f35e
JR
5290 return status;
5291 }
5292
5293-static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
d4263eb0
JR
5294+int ocfs2_sync_flags(struct inode *inode, int flags, int vflags)
5295+{
5296+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5297+ struct buffer_head *bh = NULL;
5298+ handle_t *handle = NULL;
5299+ int status;
5300+
5301+ status = ocfs2_inode_lock(inode, &bh, 1);
5302+ if (status < 0) {
5303+ mlog_errno(status);
5304+ return status;
5305+ }
5306+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5307+ if (IS_ERR(handle)) {
5308+ status = PTR_ERR(handle);
5309+ mlog_errno(status);
5310+ goto bail_unlock;
5311+ }
5312+
5313+ inode->i_flags = flags;
5314+ inode->i_vflags = vflags;
5315+ ocfs2_get_inode_flags(OCFS2_I(inode));
5316+
5317+ status = ocfs2_mark_inode_dirty(handle, inode, bh);
5318+ if (status < 0)
5319+ mlog_errno(status);
5320+
5321+ ocfs2_commit_trans(osb, handle);
5322+bail_unlock:
5323+ ocfs2_inode_unlock(inode, 1);
5324+ brelse(bh);
5325+ return status;
5326+}
5327+
d337f35e
JR
5328+int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
5329 unsigned mask)
5330 {
5331 struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
09be7631
JR
5332@@ -116,6 +150,11 @@ static int ocfs2_set_inode_attr(struct i
5333 goto bail_unlock;
5334 }
2380c486
JR
5335
5336+ if (IS_BARRIER(inode)) {
5337+ vxwprintk_task(1, "messing with the barrier.");
5338+ goto bail_unlock;
5339+ }
5340+
5341 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
5342 if (IS_ERR(handle)) {
5343 status = PTR_ERR(handle);
cc23e853 5344@@ -839,6 +878,7 @@ bail:
d4263eb0
JR
5345 return status;
5346 }
d337f35e 5347
d337f35e 5348+
d4263eb0
JR
5349 long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
5350 {
b00e13aa 5351 struct inode *inode = file_inode(filp);
09a55596
AM
5352diff -NurpP --minimal linux-4.9.135/fs/ocfs2/namei.c linux-4.9.135-vs2.3.9.8/fs/ocfs2/namei.c
5353--- linux-4.9.135/fs/ocfs2/namei.c 2016-12-11 19:17:54.000000000 +0000
5354+++ linux-4.9.135-vs2.3.9.8/fs/ocfs2/namei.c 2018-10-20 04:58:14.000000000 +0000
ec22aa5c 5355@@ -41,6 +41,7 @@
d337f35e
JR
5356 #include <linux/slab.h>
5357 #include <linux/highmem.h>
ec22aa5c 5358 #include <linux/quotaops.h>
d337f35e
JR
5359+#include <linux/vs_tag.h>
5360
d337f35e 5361 #include <cluster/masklog.h>
763640ca 5362
cc23e853 5363@@ -516,6 +517,7 @@ static int __ocfs2_mknod_locked(struct i
93de0823 5364 struct ocfs2_extent_list *fel;
ec22aa5c 5365 u16 feat;
265de2f7 5366 struct ocfs2_inode_info *oi = OCFS2_I(inode);
a4a22af8 5367+ ktag_t ktag;
d337f35e 5368
7e46296a
AM
5369 *new_fe_bh = NULL;
5370
cc23e853 5371@@ -553,8 +555,13 @@ static int __ocfs2_mknod_locked(struct i
76514441 5372 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
d337f35e 5373 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
2380c486 5374 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
b00e13aa
AM
5375- fe->i_uid = cpu_to_le32(i_uid_read(inode));
5376- fe->i_gid = cpu_to_le32(i_gid_read(inode));
d337f35e 5377+
a4a22af8
AM
5378+ ktag = make_ktag(&init_user_ns, dx_current_fstag(osb->sb));
5379+ fe->i_uid = cpu_to_le32(from_kuid(&init_user_ns,
5380+ TAGINO_KUID(DX_TAG(inode), inode->i_uid, ktag)));
5381+ fe->i_gid = cpu_to_le32(from_kgid(&init_user_ns,
5382+ TAGINO_KGID(DX_TAG(inode), inode->i_gid, ktag)));
5383+ inode->i_tag = ktag; /* is this correct? */
ec22aa5c
AM
5384 fe->i_mode = cpu_to_le16(inode->i_mode);
5385 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
d337f35e 5386 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
09a55596
AM
5387diff -NurpP --minimal linux-4.9.135/fs/ocfs2/ocfs2.h linux-4.9.135-vs2.3.9.8/fs/ocfs2/ocfs2.h
5388--- linux-4.9.135/fs/ocfs2/ocfs2.h 2018-10-20 10:39:18.000000000 +0000
5389+++ linux-4.9.135-vs2.3.9.8/fs/ocfs2/ocfs2.h 2018-10-20 04:58:14.000000000 +0000
cc23e853
AM
5390@@ -289,6 +289,7 @@ enum ocfs2_mount_options
5391 OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
5392 OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
5393 OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
5394+ OCFS2_MOUNT_TAGGED = 1 << 18, /* use tagging */
d33d7b00
AM
5395 };
5396
bb20add7 5397 #define OCFS2_OSB_SOFT_RO 0x0001
09a55596
AM
5398diff -NurpP --minimal linux-4.9.135/fs/ocfs2/ocfs2_fs.h linux-4.9.135-vs2.3.9.8/fs/ocfs2/ocfs2_fs.h
5399--- linux-4.9.135/fs/ocfs2/ocfs2_fs.h 2016-12-11 19:17:54.000000000 +0000
5400+++ linux-4.9.135-vs2.3.9.8/fs/ocfs2/ocfs2_fs.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 5401@@ -275,6 +275,11 @@
93de0823
AM
5402 #define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
5403 #define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
2380c486 5404
93de0823
AM
5405+#define OCFS2_IXUNLINK_FL FS_IXUNLINK_FL /* Immutable invert on unlink */
5406+
5407+#define OCFS2_BARRIER_FL FS_BARRIER_FL /* Barrier for chroot() */
5408+#define OCFS2_COW_FL FS_COW_FL /* Copy on Write marker */
5409+
5410 #define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
5411 #define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
5412
09a55596
AM
5413diff -NurpP --minimal linux-4.9.135/fs/ocfs2/super.c linux-4.9.135-vs2.3.9.8/fs/ocfs2/super.c
5414--- linux-4.9.135/fs/ocfs2/super.c 2018-10-20 10:39:18.000000000 +0000
5415+++ linux-4.9.135-vs2.3.9.8/fs/ocfs2/super.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 5416@@ -188,6 +188,7 @@ enum {
76514441 5417 Opt_dir_resv_level,
cc23e853
AM
5418 Opt_journal_async_commit,
5419 Opt_err_cont,
d337f35e
JR
5420+ Opt_tag, Opt_notag, Opt_tagid,
5421 Opt_err,
5422 };
5423
cc23e853 5424@@ -221,6 +222,9 @@ static const match_table_t tokens = {
76514441 5425 {Opt_dir_resv_level, "dir_resv_level=%u"},
cc23e853
AM
5426 {Opt_journal_async_commit, "journal_async_commit"},
5427 {Opt_err_cont, "errors=continue"},
d337f35e 5428+ {Opt_tag, "tag"},
d337f35e
JR
5429+ {Opt_notag, "notag"},
5430+ {Opt_tagid, "tagid=%u"},
5431 {Opt_err, NULL}
5432 };
5433
09a55596 5434@@ -672,6 +676,13 @@ static int ocfs2_remount(struct super_bl
d337f35e
JR
5435 goto out;
5436 }
5437
d4263eb0
JR
5438+ if ((osb->s_mount_opt & OCFS2_MOUNT_TAGGED) !=
5439+ (parsed_options.mount_opt & OCFS2_MOUNT_TAGGED)) {
d337f35e
JR
5440+ ret = -EINVAL;
5441+ mlog(ML_ERROR, "Cannot change tagging on remount\n");
5442+ goto out;
5443+ }
5444+
ab30d09f
AM
5445 /* We're going to/from readonly mode. */
5446 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
5447 /* Disable quota accounting before remounting RO */
09a55596 5448@@ -1161,6 +1172,9 @@ static int ocfs2_fill_super(struct super
d337f35e
JR
5449
5450 ocfs2_complete_mount_recovery(osb);
5451
5452+ if (osb->s_mount_opt & OCFS2_MOUNT_TAGGED)
5453+ sb->s_flags |= MS_TAGGED;
5454+
2380c486
JR
5455 if (ocfs2_mount_local(osb))
5456 snprintf(nodestr, sizeof(nodestr), "local");
5457 else
09a55596 5458@@ -1480,6 +1494,20 @@ static int ocfs2_parse_options(struct su
cc23e853
AM
5459 case Opt_journal_async_commit:
5460 mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
d337f35e
JR
5461 break;
5462+#ifndef CONFIG_TAGGING_NONE
5463+ case Opt_tag:
2380c486 5464+ mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
d337f35e
JR
5465+ break;
5466+ case Opt_notag:
2380c486 5467+ mopt->mount_opt &= ~OCFS2_MOUNT_TAGGED;
d337f35e
JR
5468+ break;
5469+#endif
5470+#ifdef CONFIG_PROPAGATE
5471+ case Opt_tagid:
5472+ /* use args[0] */
2380c486 5473+ mopt->mount_opt |= OCFS2_MOUNT_TAGGED;
d337f35e
JR
5474+ break;
5475+#endif
5476 default:
5477 mlog(ML_ERROR,
5478 "Unrecognized mount option \"%s\" "
09a55596
AM
5479diff -NurpP --minimal linux-4.9.135/fs/open.c linux-4.9.135-vs2.3.9.8/fs/open.c
5480--- linux-4.9.135/fs/open.c 2018-10-20 10:39:18.000000000 +0000
5481+++ linux-4.9.135-vs2.3.9.8/fs/open.c 2018-10-20 04:58:14.000000000 +0000
b00e13aa 5482@@ -31,6 +31,11 @@
2bf5ad28 5483 #include <linux/ima.h>
93de0823 5484 #include <linux/dnotify.h>
b00e13aa 5485 #include <linux/compat.h>
d337f35e
JR
5486+#include <linux/vs_base.h>
5487+#include <linux/vs_limit.h>
d337f35e
JR
5488+#include <linux/vs_tag.h>
5489+#include <linux/vs_cowbl.h>
78865d5b 5490+#include <linux/vserver/dlimit.h>
d337f35e 5491
2bf5ad28
AM
5492 #include "internal.h"
5493
cc23e853
AM
5494@@ -65,12 +70,17 @@ int do_truncate(struct dentry *dentry, l
5495 return ret;
5496 }
5497
5498-long vfs_truncate(const struct path *path, loff_t length)
5499+long vfs_truncate(struct path *path, loff_t length)
5500 {
b00e13aa 5501 struct inode *inode;
cc23e853 5502 struct dentry *upperdentry;
b00e13aa
AM
5503 long error;
5504
76514441 5505+#ifdef CONFIG_VSERVER_COWBL
b00e13aa 5506+ error = cow_check_and_break(path);
d337f35e 5507+ if (error)
b00e13aa 5508+ goto out;
76514441 5509+#endif
b00e13aa 5510 inode = path->dentry->d_inode;
d337f35e 5511
a168f21d 5512 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
cc23e853 5513@@ -565,6 +575,13 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, cons
b00e13aa
AM
5514 unsigned int lookup_flags = LOOKUP_FOLLOW;
5515 retry:
5516 error = user_path_at(dfd, filename, lookup_flags, &path);
a168f21d 5517+#ifdef CONFIG_VSERVER_COWBL
b00e13aa 5518+ if (!error) {
a168f21d 5519+ error = cow_check_and_break(&path);
b00e13aa
AM
5520+ if (error)
5521+ path_put(&path);
5522+ }
a168f21d 5523+#endif
b00e13aa 5524 if (!error) {
a168f21d
AM
5525 error = chmod_common(&path, mode);
5526 path_put(&path);
cc23e853 5527@@ -599,13 +616,15 @@ retry_deleg:
42bc425c
AM
5528 if (!uid_valid(uid))
5529 return -EINVAL;
d337f35e 5530 newattrs.ia_valid |= ATTR_UID;
42bc425c 5531- newattrs.ia_uid = uid;
8ce283e1
AM
5532+ newattrs.ia_uid = make_kuid(&init_user_ns,
5533+ dx_map_uid(user));
d337f35e
JR
5534 }
5535 if (group != (gid_t) -1) {
42bc425c
AM
5536 if (!gid_valid(gid))
5537 return -EINVAL;
d337f35e 5538 newattrs.ia_valid |= ATTR_GID;
42bc425c 5539- newattrs.ia_gid = gid;
8ce283e1
AM
5540+ newattrs.ia_gid = make_kgid(&init_user_ns,
5541+ dx_map_gid(group));
d337f35e
JR
5542 }
5543 if (!S_ISDIR(inode->i_mode))
2380c486 5544 newattrs.ia_valid |=
cc23e853 5545@@ -643,6 +662,10 @@ retry:
2380c486 5546 error = mnt_want_write(path.mnt);
d337f35e 5547 if (error)
2380c486 5548 goto out_release;
d337f35e 5549+#ifdef CONFIG_VSERVER_COWBL
2380c486 5550+ error = cow_check_and_break(&path);
d337f35e 5551+ if (!error)
d337f35e 5552+#endif
2bf5ad28 5553 error = chown_common(&path, user, group);
2380c486
JR
5554 mnt_drop_write(path.mnt);
5555 out_release:
09a55596
AM
5556diff -NurpP --minimal linux-4.9.135/fs/proc/array.c linux-4.9.135-vs2.3.9.8/fs/proc/array.c
5557--- linux-4.9.135/fs/proc/array.c 2018-10-20 10:39:18.000000000 +0000
5558+++ linux-4.9.135-vs2.3.9.8/fs/proc/array.c 2018-10-20 04:58:14.000000000 +0000
5559@@ -86,6 +86,8 @@
cc23e853 5560 #include <linux/string_helpers.h>
42bc425c 5561 #include <linux/user_namespace.h>
cc23e853 5562 #include <linux/fs_struct.h>
d337f35e
JR
5563+#include <linux/vs_context.h>
5564+#include <linux/vs_network.h>
5565
d337f35e 5566 #include <asm/pgtable.h>
2380c486 5567 #include <asm/processor.h>
09a55596 5568@@ -170,6 +172,9 @@ static inline void task_state(struct seq
2380c486
JR
5569 ppid = pid_alive(p) ?
5570 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
cc23e853 5571
2380c486
JR
5572+ if (unlikely(vx_current_initpid(p->pid)))
5573+ ppid = 0;
5574+
cc23e853
AM
5575 tracer = ptrace_parent(p);
5576 if (tracer)
5577 tpid = task_pid_nr_ns(tracer, ns);
09a55596 5578@@ -307,8 +312,8 @@ static inline void task_sig(struct seq_f
bb20add7 5579 render_sigset_t(m, "SigCgt:\t", &caught);
2380c486
JR
5580 }
5581
bb20add7 5582-static void render_cap_t(struct seq_file *m, const char *header,
2380c486 5583- kernel_cap_t *a)
bb20add7 5584+void render_cap_t(struct seq_file *m, const char *header,
2380c486 5585+ struct vx_info *vxi, kernel_cap_t *a)
d337f35e 5586 {
2380c486
JR
5587 unsigned __capi;
5588
09a55596 5589@@ -335,11 +340,12 @@ static inline void task_cap(struct seq_f
cc23e853 5590 cap_ambient = cred->cap_ambient;
bb20add7 5591 rcu_read_unlock();
2380c486 5592
ec22aa5c
AM
5593- render_cap_t(m, "CapInh:\t", &cap_inheritable);
5594- render_cap_t(m, "CapPrm:\t", &cap_permitted);
5595- render_cap_t(m, "CapEff:\t", &cap_effective);
5596- render_cap_t(m, "CapBnd:\t", &cap_bset);
cc23e853 5597- render_cap_t(m, "CapAmb:\t", &cap_ambient);
ec22aa5c
AM
5598+ /* FIXME: maybe move the p->vx_info masking to __task_cred() ? */
5599+ render_cap_t(m, "CapInh:\t", p->vx_info, &cap_inheritable);
5600+ render_cap_t(m, "CapPrm:\t", p->vx_info, &cap_permitted);
5601+ render_cap_t(m, "CapEff:\t", p->vx_info, &cap_effective);
5602+ render_cap_t(m, "CapBnd:\t", p->vx_info, &cap_bset);
cc23e853 5603+ render_cap_t(m, "CapAmb:\t", p->vx_info, &cap_ambient);
d337f35e
JR
5604 }
5605
b00e13aa 5606 static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
09a55596 5607@@ -390,6 +396,43 @@ static void task_cpus_allowed(struct seq
cc23e853 5608 cpumask_pr_args(&task->cpus_allowed));
2380c486
JR
5609 }
5610
5611+int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
5612+ struct pid *pid, struct task_struct *task)
5613+{
5614+ seq_printf(m, "Proxy:\t%p(%c)\n"
5615+ "Count:\t%u\n"
5616+ "uts:\t%p(%c)\n"
5617+ "ipc:\t%p(%c)\n"
5618+ "mnt:\t%p(%c)\n"
5619+ "pid:\t%p(%c)\n"
5620+ "net:\t%p(%c)\n",
5621+ task->nsproxy,
5622+ (task->nsproxy == init_task.nsproxy ? 'I' : '-'),
5623+ atomic_read(&task->nsproxy->count),
5624+ task->nsproxy->uts_ns,
5625+ (task->nsproxy->uts_ns == init_task.nsproxy->uts_ns ? 'I' : '-'),
5626+ task->nsproxy->ipc_ns,
5627+ (task->nsproxy->ipc_ns == init_task.nsproxy->ipc_ns ? 'I' : '-'),
5628+ task->nsproxy->mnt_ns,
5629+ (task->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns ? 'I' : '-'),
c2e5f7c8
JR
5630+ task->nsproxy->pid_ns_for_children,
5631+ (task->nsproxy->pid_ns_for_children ==
5632+ init_task.nsproxy->pid_ns_for_children ? 'I' : '-'),
2380c486
JR
5633+ task->nsproxy->net_ns,
5634+ (task->nsproxy->net_ns == init_task.nsproxy->net_ns ? 'I' : '-'));
5635+ return 0;
5636+}
d337f35e 5637+
2380c486
JR
5638+void task_vs_id(struct seq_file *m, struct task_struct *task)
5639+{
d337f35e 5640+ if (task_vx_flags(task, VXF_HIDE_VINFO, 0))
2380c486
JR
5641+ return;
5642+
bb20add7
AM
5643+ seq_printf(m, "VxID:\t%d\n", vx_task_xid(task));
5644+ seq_printf(m, "NxID:\t%d\n", nx_task_nid(task));
2380c486
JR
5645+}
5646+
5647+
5648 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
5649 struct pid *pid, struct task_struct *task)
5650 {
09a55596 5651@@ -407,6 +450,7 @@ int proc_pid_status(struct seq_file *m,
b00e13aa 5652 task_seccomp(m, task);
2bf5ad28 5653 task_cpus_allowed(m, task);
2380c486
JR
5654 cpuset_task_status_allowed(m, task);
5655+ task_vs_id(m, task);
152aeb71
JR
5656 task_context_switch_counts(m, task);
5657 return 0;
5658 }
09a55596 5659@@ -522,6 +566,17 @@ static int do_task_stat(struct seq_file
d337f35e 5660 /* convert nsec -> ticks */
bb20add7 5661 start_time = nsec_to_clock_t(task->real_start_time);
d337f35e
JR
5662
5663+ /* fixup start time for virt uptime */
5664+ if (vx_flags(VXF_VIRT_UPTIME, 0)) {
5665+ unsigned long long bias =
5666+ current->vx_info->cvirt.bias_clock;
5667+
5668+ if (start_time > bias)
5669+ start_time -= bias;
5670+ else
5671+ start_time = 0;
5672+ }
5673+
1e8b8f9b 5674 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
cc23e853
AM
5675 seq_put_decimal_ll(m, " ", ppid);
5676 seq_put_decimal_ll(m, " ", pgid);
09a55596
AM
5677diff -NurpP --minimal linux-4.9.135/fs/proc/base.c linux-4.9.135-vs2.3.9.8/fs/proc/base.c
5678--- linux-4.9.135/fs/proc/base.c 2018-10-20 10:39:18.000000000 +0000
5679+++ linux-4.9.135-vs2.3.9.8/fs/proc/base.c 2018-10-20 05:55:43.000000000 +0000
09be7631 5680@@ -87,6 +87,8 @@
78865d5b 5681 #include <linux/slab.h>
db55b927 5682 #include <linux/flex_array.h>
09be7631 5683 #include <linux/posix-timers.h>
d337f35e
JR
5684+#include <linux/vs_context.h>
5685+#include <linux/vs_network.h>
763640ca
JR
5686 #ifdef CONFIG_HARDWALL
5687 #include <asm/hardwall.h>
5688 #endif
09a55596 5689@@ -1079,10 +1081,15 @@ static int __set_oom_adj(struct file *fi
cc23e853
AM
5690 mutex_lock(&oom_adj_mutex);
5691 if (legacy) {
5692 if (oom_adj < task->signal->oom_score_adj &&
5693- !capable(CAP_SYS_RESOURCE)) {
5694+ !vx_capable(CAP_SYS_RESOURCE, VXC_OOM_ADJUST)) {
5695 err = -EACCES;
5696 goto err_unlock;
5697 }
7e46296a 5698+
cc23e853
AM
5699+ /* prevent guest processes from circumventing the oom killer */
5700+ if (vx_current_xid() && (oom_adj == OOM_DISABLE))
5701+ oom_adj = OOM_ADJUST_MIN;
5702+
5703 /*
5704 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
5705 * /proc/pid/oom_score_adj instead.
09a55596 5706@@ -1712,6 +1719,8 @@ struct inode *proc_pid_make_inode(struct
ec22aa5c
AM
5707 inode->i_gid = cred->egid;
5708 rcu_read_unlock();
d337f35e
JR
5709 }
5710+ /* procfs is xid tagged */
61333608 5711+ i_tag_write(inode, (vtag_t)vx_task_xid(task));
d337f35e
JR
5712 security_task_to_inode(task, inode);
5713
5714 out:
09a55596 5715@@ -1757,6 +1766,8 @@ int pid_getattr(struct vfsmount *mnt, st
d33d7b00
AM
5716
5717 /* dentry stuff */
5718
bb20add7 5719+// static unsigned name_to_int(struct dentry *dentry);
d33d7b00
AM
5720+
5721 /*
5722 * Exceptional case: normally we are not allowed to unhash a busy
5723 * directory. In this case, however, we can do it - no aliasing problems
09a55596 5724@@ -1785,6 +1796,19 @@ int pid_revalidate(struct dentry *dentry
d33d7b00
AM
5725 task = get_proc_task(inode);
5726
5727 if (task) {
bb20add7
AM
5728+ unsigned pid = name_to_int(&dentry->d_name);
5729+
5730+ if (pid != ~0U && pid != vx_map_pid(task->pid) &&
5731+ pid != __task_pid_nr_ns(task, PIDTYPE_PID,
5732+ task_active_pid_ns(task))) {
5733+ vxdprintk(VXD_CBIT(misc, 10),
5734+ VS_Q("%*s") " dropped by pid_revalidate(%d!=%d)",
5735+ dentry->d_name.len, dentry->d_name.name,
5736+ pid, vx_map_pid(task->pid));
d33d7b00 5737+ put_task_struct(task);
bb20add7
AM
5738+ d_drop(dentry);
5739+ return 0;
d33d7b00
AM
5740+ }
5741 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
5742 task_dumpable(task)) {
5743 rcu_read_lock();
09a55596 5744@@ -2449,6 +2473,13 @@ static struct dentry *proc_pident_lookup
d337f35e
JR
5745 if (!task)
5746 goto out_no_task;
5747
2380c486 5748+ /* TODO: maybe we can come up with a generic approach? */
d337f35e
JR
5749+ if (task_vx_flags(task, VXF_HIDE_VINFO, 0) &&
5750+ (dentry->d_name.len == 5) &&
5751+ (!memcmp(dentry->d_name.name, "vinfo", 5) ||
5752+ !memcmp(dentry->d_name.name, "ninfo", 5)))
5753+ goto out;
5754+
5755 /*
5756 * Yes, it does not scale. And it should not. Don't add
5757 * new entries into /proc/<tgid>/ without very good reasons.
09a55596 5758@@ -2888,6 +2919,11 @@ static int proc_pid_personality(struct s
2380c486
JR
5759 static const struct file_operations proc_task_operations;
5760 static const struct inode_operations proc_task_inode_operations;
d337f35e 5761
bb20add7
AM
5762+extern int proc_pid_vx_info(struct seq_file *,
5763+ struct pid_namespace *, struct pid *, struct task_struct *);
5764+extern int proc_pid_nx_info(struct seq_file *,
5765+ struct pid_namespace *, struct pid *, struct task_struct *);
d337f35e 5766+
2380c486 5767 static const struct pid_entry tgid_base_stuff[] = {
ec22aa5c
AM
5768 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
5769 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
09a55596 5770@@ -2952,6 +2988,8 @@ static const struct pid_entry tgid_base_
2380c486 5771 #ifdef CONFIG_CGROUPS
bb20add7 5772 ONE("cgroup", S_IRUGO, proc_cgroup_show),
d337f35e 5773 #endif
bb20add7
AM
5774+ ONE("vinfo", S_IRUGO, proc_pid_vx_info),
5775+ ONE("ninfo", S_IRUGO, proc_pid_nx_info),
5776 ONE("oom_score", S_IRUGO, proc_oom_score),
537831f9 5777 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
93de0823 5778 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
09a55596 5779@@ -3167,7 +3205,7 @@ retry:
2380c486
JR
5780 iter.task = NULL;
5781 pid = find_ge_pid(iter.tgid, ns);
5782 if (pid) {
5783- iter.tgid = pid_nr_ns(pid, ns);
5784+ iter.tgid = pid_unmapped_nr_ns(pid, ns);
5785 iter.task = pid_task(pid, PIDTYPE_PID);
5786 /* What we to know is if the pid we have find is the
5787 * pid of a thread_group_leader. Testing for task
09a55596 5788@@ -3227,8 +3265,10 @@ int proc_pid_readdir(struct file *file,
c2e5f7c8
JR
5789 if (!has_pid_permissions(ns, iter.task, 2))
5790 continue;
db55b927 5791
c2e5f7c8
JR
5792- len = snprintf(name, sizeof(name), "%d", iter.tgid);
5793+ len = snprintf(name, sizeof(name), "%d", vx_map_tgid(iter.tgid));
5794 ctx->pos = iter.tgid + TGID_OFFSET;
2380c486 5795+ if (!vx_proc_task_visible(iter.task))
d337f35e 5796+ continue;
c2e5f7c8
JR
5797 if (!proc_fill_cache(file, ctx, name, len,
5798 proc_pid_instantiate, iter.task, NULL)) {
2380c486 5799 put_task_struct(iter.task);
09a55596 5800@@ -3365,6 +3405,7 @@ static const struct pid_entry tid_base_s
09be7631 5801 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
bb20add7 5802 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
09be7631
JR
5803 #endif
5804+ ONE("nsproxy", S_IRUGO, proc_pid_nsproxy),
5805 };
5806
c2e5f7c8 5807 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
09a55596 5808@@ -3431,6 +3472,8 @@ static struct dentry *proc_task_lookup(s
bb20add7 5809 tid = name_to_int(&dentry->d_name);
d337f35e
JR
5810 if (tid == ~0U)
5811 goto out;
5812+ if (vx_current_initpid(tid))
5813+ goto out;
5814
2380c486 5815 ns = dentry->d_sb->s_fs_info;
d337f35e 5816 rcu_read_lock();
09a55596
AM
5817diff -NurpP --minimal linux-4.9.135/fs/proc/generic.c linux-4.9.135-vs2.3.9.8/fs/proc/generic.c
5818--- linux-4.9.135/fs/proc/generic.c 2018-10-20 10:39:18.000000000 +0000
5819+++ linux-4.9.135-vs2.3.9.8/fs/proc/generic.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 5820@@ -22,6 +22,7 @@
d337f35e
JR
5821 #include <linux/bitops.h>
5822 #include <linux/spinlock.h>
2380c486 5823 #include <linux/completion.h>
d337f35e
JR
5824+#include <linux/vserver/inode.h>
5825 #include <asm/uaccess.h>
5826
5827 #include "internal.h"
cc23e853
AM
5828@@ -66,8 +67,16 @@ static struct proc_dir_entry *pde_subdir
5829 node = node->rb_left;
5830 else if (result > 0)
5831 node = node->rb_right;
5832- else
5833+ else {
5834+ if (!vx_hide_check(0, de->vx_flags)) {
5835+ vxdprintk(VXD_CBIT(misc, 9),
5836+ VS_Q("%*s")
5837+ " hidden in pde_subdir_find()",
5838+ de->namelen, de->name);
5839+ return 0;
5840+ }
5841 return de;
bb20add7 5842+ }
cc23e853
AM
5843 }
5844 return NULL;
5845 }
5846@@ -241,6 +250,8 @@ struct dentry *proc_lookup_de(struct pro
5847 return ERR_PTR(-ENOMEM);
5848 d_set_d_op(dentry, &simple_dentry_operations);
5849 d_add(dentry, inode);
ba86f833 5850+ /* generic proc entries belong to the host */
537831f9 5851+ i_tag_write(inode, 0);
cc23e853 5852 return NULL;
2380c486 5853 }
cc23e853
AM
5854 read_unlock(&proc_subdir_lock);
5855@@ -287,6 +298,12 @@ int proc_readdir_de(struct proc_dir_entr
c2e5f7c8
JR
5856 do {
5857 struct proc_dir_entry *next;
5858 pde_get(de);
bb20add7
AM
5859+ if (!vx_hide_check(0, de->vx_flags)) {
5860+ vxdprintk(VXD_CBIT(misc, 9),
5861+ VS_Q("%*s") " hidden in proc_readdir_de()",
5862+ de->namelen, de->name);
c2e5f7c8 5863+ goto skip;
bb20add7 5864+ }
cc23e853 5865 read_unlock(&proc_subdir_lock);
c2e5f7c8
JR
5866 if (!dir_emit(ctx, de->name, de->namelen,
5867 de->low_ino, de->mode >> 12)) {
cc23e853 5868@@ -294,6 +311,7 @@ int proc_readdir_de(struct proc_dir_entr
c2e5f7c8
JR
5869 return 0;
5870 }
cc23e853 5871 read_lock(&proc_subdir_lock);
c2e5f7c8
JR
5872+ skip:
5873 ctx->pos++;
cc23e853 5874 next = pde_subdir_next(de);
c2e5f7c8 5875 pde_put(de);
cc23e853 5876@@ -387,6 +405,7 @@ static struct proc_dir_entry *__proc_cre
537831f9 5877 ent->mode = mode;
d337f35e 5878 ent->nlink = nlink;
cc23e853 5879 ent->subdir = RB_ROOT;
d337f35e 5880+ ent->vx_flags = IATTR_PROC_DEFAULT;
537831f9 5881 atomic_set(&ent->count, 1);
2380c486 5882 spin_lock_init(&ent->pde_unload_lock);
2380c486 5883 INIT_LIST_HEAD(&ent->pde_openers);
cc23e853 5884@@ -413,7 +432,8 @@ struct proc_dir_entry *proc_symlink(cons
d337f35e
JR
5885 kfree(ent->data);
5886 kfree(ent);
5887 ent = NULL;
5888- }
5889+ } else
5890+ ent->vx_flags = IATTR_PROC_SYMLINK;
5891 } else {
5892 kfree(ent);
5893 ent = NULL;
09a55596
AM
5894diff -NurpP --minimal linux-4.9.135/fs/proc/inode.c linux-4.9.135-vs2.3.9.8/fs/proc/inode.c
5895--- linux-4.9.135/fs/proc/inode.c 2018-10-20 10:39:18.000000000 +0000
5896+++ linux-4.9.135-vs2.3.9.8/fs/proc/inode.c 2018-10-20 05:55:43.000000000 +0000
5897@@ -433,6 +433,8 @@ struct inode *proc_get_inode(struct supe
d337f35e
JR
5898 inode->i_uid = de->uid;
5899 inode->i_gid = de->gid;
5900 }
5901+ if (de->vx_flags)
5902+ PROC_I(inode)->vx_flags = de->vx_flags;
5903 if (de->size)
5904 inode->i_size = de->size;
5905 if (de->nlink)
09a55596
AM
5906diff -NurpP --minimal linux-4.9.135/fs/proc/internal.h linux-4.9.135-vs2.3.9.8/fs/proc/internal.h
5907--- linux-4.9.135/fs/proc/internal.h 2018-10-20 10:39:18.000000000 +0000
5908+++ linux-4.9.135-vs2.3.9.8/fs/proc/internal.h 2018-10-20 05:55:43.000000000 +0000
09be7631
JR
5909@@ -14,6 +14,7 @@
5910 #include <linux/spinlock.h>
5911 #include <linux/atomic.h>
b00e13aa 5912 #include <linux/binfmts.h>
d337f35e
JR
5913+#include <linux/vs_pid.h>
5914
09be7631
JR
5915 struct ctl_table_header;
5916 struct mempolicy;
cc23e853 5917@@ -34,6 +35,7 @@ struct proc_dir_entry {
09be7631
JR
5918 nlink_t nlink;
5919 kuid_t uid;
5920 kgid_t gid;
5921+ int vx_flags;
5922 loff_t size;
5923 const struct inode_operations *proc_iops;
5924 const struct file_operations *proc_fops;
cc23e853 5925@@ -51,15 +53,22 @@ struct proc_dir_entry {
09be7631
JR
5926 char name[];
5927 };
5928
5929+struct vx_info;
5930+struct nx_info;
2380c486 5931+
09be7631
JR
5932 union proc_op {
5933 int (*proc_get_link)(struct dentry *, struct path *);
09be7631
JR
5934 int (*proc_show)(struct seq_file *m,
5935 struct pid_namespace *ns, struct pid *pid,
5936 struct task_struct *task);
5937+ int (*proc_vs_read)(char *page);
5938+ int (*proc_vxi_read)(struct vx_info *vxi, char *page);
5939+ int (*proc_nxi_read)(struct nx_info *nxi, char *page);
5940 };
2380c486 5941
09be7631
JR
5942 struct proc_inode {
5943 struct pid *pid;
5944+ int vx_flags;
cc23e853 5945 unsigned int fd;
09be7631
JR
5946 union proc_op op;
5947 struct proc_dir_entry *pde;
09a55596 5948@@ -93,11 +102,16 @@ static inline struct pid *proc_pid(struc
d337f35e
JR
5949 return PROC_I(inode)->pid;
5950 }
5951
5952-static inline struct task_struct *get_proc_task(struct inode *inode)
5953+static inline struct task_struct *get_proc_task_real(struct inode *inode)
5954 {
5955 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
5956 }
5957
5958+static inline struct task_struct *get_proc_task(struct inode *inode)
5959+{
5960+ return vx_get_proc_task(inode, proc_pid(inode));
5961+}
5962+
09be7631 5963 static inline int task_dumpable(struct task_struct *task)
d337f35e 5964 {
09be7631 5965 int dumpable = 0;
09a55596 5966@@ -156,6 +170,8 @@ extern int proc_pid_status(struct seq_fi
09be7631
JR
5967 struct pid *, struct task_struct *);
5968 extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
5969 struct pid *, struct task_struct *);
5970+extern int proc_pid_nsproxy(struct seq_file *m, struct pid_namespace *ns,
5971+ struct pid *pid, struct task_struct *task);
5972
5973 /*
5974 * base.c
09a55596
AM
5975diff -NurpP --minimal linux-4.9.135/fs/proc/loadavg.c linux-4.9.135-vs2.3.9.8/fs/proc/loadavg.c
5976--- linux-4.9.135/fs/proc/loadavg.c 2016-12-11 19:17:54.000000000 +0000
5977+++ linux-4.9.135-vs2.3.9.8/fs/proc/loadavg.c 2018-10-20 04:58:14.000000000 +0000
ec22aa5c 5978@@ -12,15 +12,27 @@
1bc743c0 5979
ec22aa5c 5980 static int loadavg_proc_show(struct seq_file *m, void *v)
1bc743c0
JR
5981 {
5982+ unsigned long running;
5983+ unsigned int threads;
ec22aa5c 5984 unsigned long avnrun[3];
1bc743c0 5985
ec22aa5c 5986 get_avenrun(avnrun, FIXED_1/200, 0);
bd427b06 5987
ec22aa5c 5988+ if (vx_flags(VXF_VIRT_LOAD, 0)) {
eab5a9a6 5989+ struct vx_info *vxi = current_vx_info();
ec22aa5c
AM
5990+
5991+ running = atomic_read(&vxi->cvirt.nr_running);
5992+ threads = atomic_read(&vxi->cvirt.nr_threads);
5993+ } else {
5994+ running = nr_running();
5995+ threads = nr_threads;
5996+ }
5997+
5998 seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
5999 LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
6000 LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
6001 LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
1bc743c0
JR
6002- nr_running(), nr_threads,
6003+ running, threads,
6004 task_active_pid_ns(current)->last_pid);
ec22aa5c 6005 return 0;
1bc743c0 6006 }
09a55596
AM
6007diff -NurpP --minimal linux-4.9.135/fs/proc/meminfo.c linux-4.9.135-vs2.3.9.8/fs/proc/meminfo.c
6008--- linux-4.9.135/fs/proc/meminfo.c 2016-12-11 19:17:54.000000000 +0000
6009+++ linux-4.9.135-vs2.3.9.8/fs/proc/meminfo.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 6010@@ -55,7 +55,8 @@ static int meminfo_proc_show(struct seq_
c2e5f7c8
JR
6011 si_swapinfo(&i);
6012 committed = percpu_counter_read_positive(&vm_committed_as);
e3afe727 6013
cc23e853 6014- cached = global_node_page_state(NR_FILE_PAGES) -
e3afe727 6015+ cached = vx_flags(VXF_VIRT_MEM, 0) ?
cc23e853 6016+ vx_vsi_cached(&i) : global_node_page_state(NR_FILE_PAGES) -
b00e13aa 6017 total_swapcache_pages() - i.bufferram;
e3afe727 6018 if (cached < 0)
d337f35e 6019 cached = 0;
09a55596
AM
6020diff -NurpP --minimal linux-4.9.135/fs/proc/root.c linux-4.9.135-vs2.3.9.8/fs/proc/root.c
6021--- linux-4.9.135/fs/proc/root.c 2016-12-11 19:17:54.000000000 +0000
6022+++ linux-4.9.135-vs2.3.9.8/fs/proc/root.c 2018-10-20 04:58:14.000000000 +0000
b00e13aa 6023@@ -20,9 +20,14 @@
2380c486
JR
6024 #include <linux/mount.h>
6025 #include <linux/pid_namespace.h>
db55b927 6026 #include <linux/parser.h>
2380c486 6027+#include <linux/vserver/inode.h>
d337f35e 6028
2380c486 6029 #include "internal.h"
d337f35e 6030
d337f35e
JR
6031+struct proc_dir_entry *proc_virtual;
6032+
6033+extern void proc_vx_init(void);
2380c486 6034+
cc23e853
AM
6035 enum {
6036 Opt_gid, Opt_hidepid, Opt_err,
6037 };
6038@@ -145,6 +150,7 @@ void __init proc_root_init(void)
bb20add7 6039 proc_tty_init();
2380c486
JR
6040 proc_mkdir("bus", NULL);
6041 proc_sys_init();
d337f35e
JR
6042+ proc_vx_init();
6043 }
6044
6045 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
cc23e853 6046@@ -206,6 +212,7 @@ struct proc_dir_entry proc_root = {
2380c486
JR
6047 .proc_iops = &proc_root_inode_operations,
6048 .proc_fops = &proc_root_operations,
6049 .parent = &proc_root,
6050+ .vx_flags = IATTR_ADMIN | IATTR_WATCH,
cc23e853 6051 .subdir = RB_ROOT,
a168f21d 6052 .name = "/proc",
2380c486 6053 };
09a55596
AM
6054diff -NurpP --minimal linux-4.9.135/fs/proc/self.c linux-4.9.135-vs2.3.9.8/fs/proc/self.c
6055--- linux-4.9.135/fs/proc/self.c 2016-12-11 19:17:54.000000000 +0000
6056+++ linux-4.9.135-vs2.3.9.8/fs/proc/self.c 2018-10-20 04:58:14.000000000 +0000
cc23e853
AM
6057@@ -1,6 +1,7 @@
6058 #include <linux/sched.h>
09be7631
JR
6059 #include <linux/slab.h>
6060 #include <linux/pid_namespace.h>
b00e13aa 6061+#include <linux/vserver/inode.h>
09be7631 6062 #include "internal.h"
b00e13aa
AM
6063
6064 /*
c2e5f7c8 6065@@ -54,6 +55,8 @@ int proc_setup_self(struct super_block *
09be7631
JR
6066 self = d_alloc_name(s->s_root, "self");
6067 if (self) {
6068 struct inode *inode = new_inode_pseudo(s);
6069+
6070+ // self->vx_flags = IATTR_PROC_SYMLINK;
6071 if (inode) {
6072 inode->i_ino = self_inum;
cc23e853 6073 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
09a55596
AM
6074diff -NurpP --minimal linux-4.9.135/fs/proc/stat.c linux-4.9.135-vs2.3.9.8/fs/proc/stat.c
6075--- linux-4.9.135/fs/proc/stat.c 2018-10-20 10:39:18.000000000 +0000
6076+++ linux-4.9.135-vs2.3.9.8/fs/proc/stat.c 2018-10-20 11:46:17.000000000 +0000
537831f9 6077@@ -9,8 +9,10 @@
1e8b8f9b
AM
6078 #include <linux/slab.h>
6079 #include <linux/time.h>
6080 #include <linux/irqnr.h>
6081+#include <linux/vserver/cvirt.h>
265de2f7 6082 #include <linux/cputime.h>
1e8b8f9b 6083 #include <linux/tick.h>
537831f9
AM
6084+#include <linux/cpuset.h>
6085
6086 #ifndef arch_irq_stat_cpu
6087 #define arch_irq_stat_cpu(cpu) 0
cc23e853 6088@@ -86,13 +88,24 @@ static int show_stat(struct seq_file *p,
537831f9
AM
6089 u64 sum_softirq = 0;
6090 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
cc23e853 6091 struct timespec64 boottime;
537831f9
AM
6092+ cpumask_var_t cpus_allowed;
6093+ bool virt_cpu = vx_flags(VXF_VIRT_CPU, 0);
6094
6095 user = nice = system = idle = iowait =
1e8b8f9b
AM
6096 irq = softirq = steal = 0;
6097 guest = guest_nice = 0;
cc23e853
AM
6098 getboottime64(&boottime);
6099
1e8b8f9b 6100+ if (vx_flags(VXF_VIRT_UPTIME, 0))
369dbd59 6101+ vx_vsi_boottime64(&boottime);
537831f9
AM
6102+
6103+ if (virt_cpu)
6104+ cpuset_cpus_allowed(current, cpus_allowed);
1e8b8f9b 6105+
1e8b8f9b 6106 for_each_possible_cpu(i) {
537831f9
AM
6107+ if (virt_cpu && !cpumask_test_cpu(i, cpus_allowed))
6108+ continue;
6109+
6110 user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
6111 nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
6112 system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
cc23e853 6113@@ -128,6 +141,9 @@ static int show_stat(struct seq_file *p,
537831f9
AM
6114 seq_putc(p, '\n');
6115
6116 for_each_online_cpu(i) {
6117+ if (virt_cpu && !cpumask_test_cpu(i, cpus_allowed))
6118+ continue;
6119+
6120 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
6121 user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
6122 nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
09a55596
AM
6123diff -NurpP --minimal linux-4.9.135/fs/proc/uptime.c linux-4.9.135-vs2.3.9.8/fs/proc/uptime.c
6124--- linux-4.9.135/fs/proc/uptime.c 2018-10-20 10:39:18.000000000 +0000
6125+++ linux-4.9.135-vs2.3.9.8/fs/proc/uptime.c 2018-10-20 11:47:26.000000000 +0000
f6c5ef8b 6126@@ -5,6 +5,7 @@
ec22aa5c
AM
6127 #include <linux/seq_file.h>
6128 #include <linux/time.h>
f6c5ef8b 6129 #include <linux/kernel_stat.h>
ec22aa5c 6130+#include <linux/vserver/cvirt.h>
ec22aa5c
AM
6131
6132 static int uptime_proc_show(struct seq_file *m, void *v)
627cd95b 6133 {
09a55596
AM
6134@@ -21,6 +22,10 @@ static int uptime_proc_show(struct seq_f
6135 get_monotonic_boottime(&uptime);
f6c5ef8b
AM
6136 idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
6137 idle.tv_nsec = rem;
ec22aa5c
AM
6138+
6139+ if (vx_flags(VXF_VIRT_UPTIME, 0))
6140+ vx_vsi_uptime(&uptime, &idle);
6141+
6142 seq_printf(m, "%lu.%02lu %lu.%02lu\n",
6143 (unsigned long) uptime.tv_sec,
6144 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
09a55596
AM
6145diff -NurpP --minimal linux-4.9.135/fs/proc_namespace.c linux-4.9.135-vs2.3.9.8/fs/proc_namespace.c
6146--- linux-4.9.135/fs/proc_namespace.c 2016-12-11 19:17:54.000000000 +0000
6147+++ linux-4.9.135-vs2.3.9.8/fs/proc_namespace.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 6148@@ -46,6 +46,8 @@ static int show_sb_opts(struct seq_file
db55b927
AM
6149 { MS_DIRSYNC, ",dirsync" },
6150 { MS_MANDLOCK, ",mand" },
cc23e853 6151 { MS_LAZYTIME, ",lazytime" },
db55b927
AM
6152+ { MS_TAGGED, ",tag" },
6153+ { MS_NOTAGCHECK, ",notagcheck" },
6154 { 0, NULL }
6155 };
6156 const struct proc_fs_info *fs_infop;
cc23e853 6157@@ -82,6 +84,38 @@ static inline void mangle(struct seq_fil
db55b927
AM
6158 seq_escape(m, s, " \t\n\\");
6159 }
6160
61b0c03f
JR
6161+#ifdef CONFIG_VSERVER_EXTRA_MNT_CHECK
6162+
db55b927
AM
6163+static int mnt_is_reachable(struct vfsmount *vfsmnt)
6164+{
6165+ struct path root;
6166+ struct dentry *point;
6167+ struct mount *mnt = real_mount(vfsmnt);
6168+ struct mount *root_mnt;
6169+ int ret;
6170+
6171+ if (mnt == mnt->mnt_ns->root)
6172+ return 1;
6173+
98d9a5b1 6174+ rcu_read_lock();
db55b927
AM
6175+ root = current->fs->root;
6176+ root_mnt = real_mount(root.mnt);
6177+ point = root.dentry;
6178+
6179+ while ((mnt != mnt->mnt_parent) && (mnt != root_mnt)) {
6180+ point = mnt->mnt_mountpoint;
6181+ mnt = mnt->mnt_parent;
6182+ }
98d9a5b1 6183+ rcu_read_unlock();
db55b927
AM
6184+
6185+ ret = (mnt == root_mnt) && is_subdir(point, root.dentry);
db55b927
AM
6186+ return ret;
6187+}
61b0c03f
JR
6188+
6189+#else
6190+#define mnt_is_reachable(v) (1)
6191+#endif
db55b927
AM
6192+
6193 static void show_type(struct seq_file *m, struct super_block *sb)
6194 {
6195 mangle(m, sb->s_type->name);
cc23e853 6196@@ -99,6 +133,17 @@ static int show_vfsmnt(struct seq_file *
db55b927 6197 struct super_block *sb = mnt_path.dentry->d_sb;
cc23e853 6198 int err;
db55b927
AM
6199
6200+ if (vx_flags(VXF_HIDE_MOUNT, 0))
6201+ return SEQ_SKIP;
6202+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6203+ return SEQ_SKIP;
6204+
6205+ if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
6206+ mnt == current->fs->root.mnt) {
6207+ seq_puts(m, "/dev/root / ");
6208+ goto type;
6209+ }
6210+
6211 if (sb->s_op->show_devname) {
6212 err = sb->s_op->show_devname(m, mnt_path.dentry);
6213 if (err)
cc23e853
AM
6214@@ -112,6 +157,7 @@ static int show_vfsmnt(struct seq_file *
6215 if (err)
6216 goto out;
db55b927
AM
6217 seq_putc(m, ' ');
6218+type:
6219 show_type(m, sb);
6220 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
6221 err = show_sb_opts(m, sb);
cc23e853
AM
6222@@ -133,6 +179,11 @@ static int show_mountinfo(struct seq_fil
6223 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
6224 int err;
db55b927
AM
6225
6226+ if (vx_flags(VXF_HIDE_MOUNT, 0))
6227+ return SEQ_SKIP;
6228+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6229+ return SEQ_SKIP;
6230+
6231 seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
6232 MAJOR(sb->s_dev), MINOR(sb->s_dev));
cc23e853
AM
6233 if (sb->s_op->show_path) {
6234@@ -195,6 +246,17 @@ static int show_vfsstat(struct seq_file
db55b927 6235 struct super_block *sb = mnt_path.dentry->d_sb;
cc23e853 6236 int err;
db55b927
AM
6237
6238+ if (vx_flags(VXF_HIDE_MOUNT, 0))
6239+ return SEQ_SKIP;
6240+ if (!mnt_is_reachable(mnt) && !vx_check(0, VS_WATCH_P))
6241+ return SEQ_SKIP;
6242+
6243+ if (!vx_check(0, VS_ADMIN|VS_WATCH) &&
6244+ mnt == current->fs->root.mnt) {
6245+ seq_puts(m, "device /dev/root mounted on / ");
6246+ goto type;
6247+ }
6248+
6249 /* device */
6250 if (sb->s_op->show_devname) {
6251 seq_puts(m, "device ");
cc23e853
AM
6252@@ -216,7 +278,7 @@ static int show_vfsstat(struct seq_file
6253 if (err)
6254 goto out;
db55b927
AM
6255 seq_putc(m, ' ');
6256-
6257+type:
6258 /* file system type */
6259 seq_puts(m, "with fstype ");
6260 show_type(m, sb);
09a55596
AM
6261diff -NurpP --minimal linux-4.9.135/fs/quota/dquot.c linux-4.9.135-vs2.3.9.8/fs/quota/dquot.c
6262--- linux-4.9.135/fs/quota/dquot.c 2018-10-20 10:39:18.000000000 +0000
6263+++ linux-4.9.135-vs2.3.9.8/fs/quota/dquot.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 6264@@ -1658,6 +1658,9 @@ int __dquot_alloc_space(struct inode *in
76514441 6265 int reserve = flags & DQUOT_SPACE_RESERVE;
cc23e853 6266 struct dquot **dquots;
76514441
AM
6267
6268+ if ((ret = dl_alloc_space(inode, number)))
6269+ return ret;
6270+
bb20add7
AM
6271 if (!dquot_active(inode)) {
6272 inode_incr_space(inode, number, reserve);
6273 goto out;
cc23e853 6274@@ -1710,6 +1713,9 @@ int dquot_alloc_inode(struct inode *inod
1e8b8f9b 6275 struct dquot_warn warn[MAXQUOTAS];
cc23e853 6276 struct dquot * const *dquots;
76514441
AM
6277
6278+ if ((ret = dl_alloc_inode(inode)))
6279+ return ret;
6280+
93de0823 6281 if (!dquot_active(inode))
bb20add7
AM
6282 return 0;
6283 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
cc23e853
AM
6284@@ -1812,6 +1818,8 @@ void __dquot_free_space(struct inode *in
6285 struct dquot **dquots;
bb20add7 6286 int reserve = flags & DQUOT_SPACE_RESERVE, index;
76514441
AM
6287
6288+ dl_free_space(inode, number);
6289+
93de0823 6290 if (!dquot_active(inode)) {
bb20add7
AM
6291 inode_decr_space(inode, number, reserve);
6292 return;
cc23e853
AM
6293@@ -1856,6 +1864,8 @@ void dquot_free_inode(struct inode *inod
6294 struct dquot * const *dquots;
bb20add7 6295 int index;
76514441
AM
6296
6297+ dl_free_inode(inode);
6298+
93de0823 6299 if (!dquot_active(inode))
bb20add7
AM
6300 return;
6301
09a55596
AM
6302diff -NurpP --minimal linux-4.9.135/fs/quota/quota.c linux-4.9.135-vs2.3.9.8/fs/quota/quota.c
6303--- linux-4.9.135/fs/quota/quota.c 2018-10-20 10:39:18.000000000 +0000
6304+++ linux-4.9.135-vs2.3.9.8/fs/quota/quota.c 2018-10-20 05:55:43.000000000 +0000
78865d5b
AM
6305@@ -8,6 +8,7 @@
6306 #include <linux/fs.h>
6307 #include <linux/namei.h>
6308 #include <linux/slab.h>
d337f35e 6309+#include <linux/vs_context.h>
78865d5b 6310 #include <asm/current.h>
92598135 6311 #include <linux/uaccess.h>
78865d5b 6312 #include <linux/kernel.h>
09a55596 6313@@ -39,7 +40,7 @@ static int check_quotactl_permission(str
78865d5b
AM
6314 break;
6315 /*FALLTHROUGH*/
6316 default:
d337f35e
JR
6317- if (!capable(CAP_SYS_ADMIN))
6318+ if (!vx_capable(CAP_SYS_ADMIN, VXC_QUOTA_CTL))
6319 return -EPERM;
6320 }
6321
09a55596 6322@@ -770,6 +771,46 @@ static int do_quotactl(struct super_bloc
b00e13aa
AM
6323
6324 #ifdef CONFIG_BLOCK
d337f35e 6325
d337f35e
JR
6326+#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6327+
6328+#include <linux/vroot.h>
2380c486
JR
6329+#include <linux/major.h>
6330+#include <linux/module.h>
d337f35e 6331+#include <linux/kallsyms.h>
2380c486 6332+#include <linux/vserver/debug.h>
d337f35e
JR
6333+
6334+static vroot_grb_func *vroot_get_real_bdev = NULL;
6335+
763640ca 6336+static DEFINE_SPINLOCK(vroot_grb_lock);
d337f35e
JR
6337+
6338+int register_vroot_grb(vroot_grb_func *func) {
6339+ int ret = -EBUSY;
6340+
6341+ spin_lock(&vroot_grb_lock);
6342+ if (!vroot_get_real_bdev) {
6343+ vroot_get_real_bdev = func;
6344+ ret = 0;
6345+ }
6346+ spin_unlock(&vroot_grb_lock);
6347+ return ret;
6348+}
6349+EXPORT_SYMBOL(register_vroot_grb);
6350+
6351+int unregister_vroot_grb(vroot_grb_func *func) {
6352+ int ret = -EINVAL;
6353+
6354+ spin_lock(&vroot_grb_lock);
6355+ if (vroot_get_real_bdev) {
6356+ vroot_get_real_bdev = NULL;
6357+ ret = 0;
6358+ }
6359+ spin_unlock(&vroot_grb_lock);
6360+ return ret;
6361+}
6362+EXPORT_SYMBOL(unregister_vroot_grb);
6363+
6364+#endif
6365+
db55b927
AM
6366 /* Return 1 if 'cmd' will block on frozen filesystem */
6367 static int quotactl_cmd_write(int cmd)
6368 {
09a55596 6369@@ -811,6 +852,22 @@ static struct super_block *quotactl_bloc
2380c486
JR
6370 putname(tmp);
6371 if (IS_ERR(bdev))
6372 return ERR_CAST(bdev);
6373+#if defined(CONFIG_BLK_DEV_VROOT) || defined(CONFIG_BLK_DEV_VROOT_MODULE)
6374+ if (bdev && bdev->bd_inode &&
537831f9 6375+ imajor(bdev->bd_inode) == VROOT_MAJOR) {
2380c486
JR
6376+ struct block_device *bdnew = (void *)-EINVAL;
6377+
6378+ if (vroot_get_real_bdev)
6379+ bdnew = vroot_get_real_bdev(bdev);
6380+ else
6381+ vxdprintk(VXD_CBIT(misc, 0),
6382+ "vroot_get_real_bdev not set");
6383+ bdput(bdev);
6384+ if (IS_ERR(bdnew))
6385+ return ERR_PTR(PTR_ERR(bdnew));
6386+ bdev = bdnew;
6387+ }
6388+#endif
db55b927
AM
6389 if (quotactl_cmd_write(cmd))
6390 sb = get_super_thawed(bdev);
6391 else
09a55596
AM
6392diff -NurpP --minimal linux-4.9.135/fs/stat.c linux-4.9.135-vs2.3.9.8/fs/stat.c
6393--- linux-4.9.135/fs/stat.c 2018-10-20 10:39:18.000000000 +0000
6394+++ linux-4.9.135-vs2.3.9.8/fs/stat.c 2018-10-20 04:58:14.000000000 +0000
2380c486 6395@@ -26,6 +26,7 @@ void generic_fillattr(struct inode *inod
d337f35e
JR
6396 stat->nlink = inode->i_nlink;
6397 stat->uid = inode->i_uid;
6398 stat->gid = inode->i_gid;
6399+ stat->tag = inode->i_tag;
6400 stat->rdev = inode->i_rdev;
a168f21d 6401 stat->size = i_size_read(inode);
d337f35e 6402 stat->atime = inode->i_atime;
09a55596
AM
6403diff -NurpP --minimal linux-4.9.135/fs/statfs.c linux-4.9.135-vs2.3.9.8/fs/statfs.c
6404--- linux-4.9.135/fs/statfs.c 2016-12-11 19:17:54.000000000 +0000
6405+++ linux-4.9.135-vs2.3.9.8/fs/statfs.c 2018-10-20 04:58:14.000000000 +0000
93de0823 6406@@ -7,6 +7,8 @@
76514441
AM
6407 #include <linux/statfs.h>
6408 #include <linux/security.h>
6409 #include <linux/uaccess.h>
6410+#include <linux/vs_base.h>
6411+#include <linux/vs_dlimit.h>
db55b927 6412 #include "internal.h"
76514441 6413
93de0823 6414 static int flags_by_mnt(int mnt_flags)
db55b927 6415@@ -60,6 +62,8 @@ static int statfs_by_dentry(struct dentr
93de0823
AM
6416 retval = dentry->d_sb->s_op->statfs(dentry, buf);
6417 if (retval == 0 && buf->f_frsize == 0)
6418 buf->f_frsize = buf->f_bsize;
6419+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
6420+ vx_vsi_statfs(dentry->d_sb, buf);
76514441
AM
6421 return retval;
6422 }
93de0823 6423
09a55596
AM
6424diff -NurpP --minimal linux-4.9.135/fs/super.c linux-4.9.135-vs2.3.9.8/fs/super.c
6425--- linux-4.9.135/fs/super.c 2018-10-20 10:39:18.000000000 +0000
6426+++ linux-4.9.135-vs2.3.9.8/fs/super.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 6427@@ -34,6 +34,8 @@
1e8b8f9b 6428 #include <linux/fsnotify.h>
92598135 6429 #include <linux/lockdep.h>
cc23e853 6430 #include <linux/user_namespace.h>
1e8b8f9b 6431+#include <linux/magic.h>
be261992
AM
6432+#include <linux/vs_context.h>
6433 #include "internal.h"
6434
6435
09a55596 6436@@ -985,7 +987,8 @@ struct dentry *mount_ns(struct file_syst
cc23e853
AM
6437 /* Don't allow mounting unless the caller has CAP_SYS_ADMIN
6438 * over the namespace.
6439 */
6440- if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
6441+ if (!(flags & MS_KERNMOUNT) &&
6442+ !vx_ns_capable(user_ns, CAP_SYS_ADMIN, VXC_SECURE_MOUNT))
6443 return ERR_PTR(-EPERM);
6444
6445 sb = sget_userns(fs_type, ns_test_super, ns_set_super, flags,
09a55596 6446@@ -1195,6 +1198,13 @@ mount_fs(struct file_system_type *type,
cc23e853 6447 WARN_ON(!sb->s_bdi);
be261992
AM
6448 sb->s_flags |= MS_BORN;
6449
6450+ error = -EPERM;
6451+ if (!vx_capable(CAP_SYS_ADMIN, VXC_BINARY_MOUNT) &&
6452+ !sb->s_bdev &&
6453+ (sb->s_magic != PROC_SUPER_MAGIC) &&
6454+ (sb->s_magic != DEVPTS_SUPER_MAGIC))
6455+ goto out_sb;
6456+
6457 error = security_sb_kern_mount(sb, flags, secdata);
6458 if (error)
6459 goto out_sb;
09a55596
AM
6460diff -NurpP --minimal linux-4.9.135/fs/utimes.c linux-4.9.135-vs2.3.9.8/fs/utimes.c
6461--- linux-4.9.135/fs/utimes.c 2016-12-11 19:17:54.000000000 +0000
6462+++ linux-4.9.135-vs2.3.9.8/fs/utimes.c 2018-10-20 04:58:14.000000000 +0000
2380c486
JR
6463@@ -8,6 +8,8 @@
6464 #include <linux/stat.h>
d337f35e 6465 #include <linux/utime.h>
2380c486 6466 #include <linux/syscalls.h>
d337f35e
JR
6467+#include <linux/mount.h>
6468+#include <linux/vs_cowbl.h>
6469 #include <asm/uaccess.h>
6470 #include <asm/unistd.h>
6471
c2e5f7c8 6472@@ -52,13 +54,19 @@ static int utimes_common(struct path *pa
76514441
AM
6473 {
6474 int error;
6475 struct iattr newattrs;
6476- struct inode *inode = path->dentry->d_inode;
c2e5f7c8 6477 struct inode *delegated_inode = NULL;
76514441 6478+ struct inode *inode;
b00e13aa
AM
6479+
6480+ error = cow_check_and_break(path);
6481+ if (error)
6482+ goto out;
76514441
AM
6483
6484 error = mnt_want_write(path->mnt);
6485 if (error)
6486 goto out;
6487
76514441
AM
6488+ inode = path->dentry->d_inode;
6489+
6490 if (times && times[0].tv_nsec == UTIME_NOW &&
6491 times[1].tv_nsec == UTIME_NOW)
6492 times = NULL;
09a55596
AM
6493diff -NurpP --minimal linux-4.9.135/fs/xattr.c linux-4.9.135-vs2.3.9.8/fs/xattr.c
6494--- linux-4.9.135/fs/xattr.c 2018-10-20 10:39:18.000000000 +0000
6495+++ linux-4.9.135-vs2.3.9.8/fs/xattr.c 2018-10-20 05:55:43.000000000 +0000
537831f9 6496@@ -21,6 +21,7 @@
d337f35e 6497 #include <linux/audit.h>
1e8b8f9b 6498 #include <linux/vmalloc.h>
537831f9 6499 #include <linux/posix_acl_xattr.h>
d337f35e 6500+#include <linux/mount.h>
d337f35e 6501
1e8b8f9b 6502 #include <asm/uaccess.h>
d337f35e 6503
cc23e853 6504@@ -112,7 +113,7 @@ xattr_permission(struct inode *inode, co
763640ca 6505 * The trusted.* namespace can only be accessed by privileged users.
e03b8c3c 6506 */
763640ca
JR
6507 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
6508- if (!capable(CAP_SYS_ADMIN))
a168f21d
AM
6509+ if (!vx_capable(CAP_SYS_ADMIN, VXC_FS_TRUSTED))
6510 return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
6511 return 0;
6512 }
09a55596
AM
6513diff -NurpP --minimal linux-4.9.135/include/linux/capability.h linux-4.9.135-vs2.3.9.8/include/linux/capability.h
6514--- linux-4.9.135/include/linux/capability.h 2018-10-20 10:39:19.000000000 +0000
6515+++ linux-4.9.135-vs2.3.9.8/include/linux/capability.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 6516@@ -78,7 +78,8 @@ extern const kernel_cap_t __cap_init_eff
bb20add7
AM
6517 #else /* HAND-CODED capability initializers */
6518
6519 #define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1)
6520-#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1)
6521+#define CAP_LAST_U32_VALID_MASK ((CAP_TO_MASK(CAP_LAST_CAP + 1) -1) \
6522+ | CAP_TO_MASK(CAP_CONTEXT))
6523
6524 # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }})
6525 # define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }})
09a55596
AM
6526diff -NurpP --minimal linux-4.9.135/include/linux/cred.h linux-4.9.135-vs2.3.9.8/include/linux/cred.h
6527--- linux-4.9.135/include/linux/cred.h 2018-10-20 10:39:19.000000000 +0000
6528+++ linux-4.9.135-vs2.3.9.8/include/linux/cred.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 6529@@ -152,6 +152,7 @@ extern void exit_creds(struct task_struc
1163e6ab
AM
6530 extern int copy_creds(struct task_struct *, unsigned long);
6531 extern const struct cred *get_task_cred(struct task_struct *);
6532 extern struct cred *cred_alloc_blank(void);
6533+extern struct cred *__prepare_creds(const struct cred *);
6534 extern struct cred *prepare_creds(void);
6535 extern struct cred *prepare_exec_creds(void);
6536 extern int commit_creds(struct cred *);
cc23e853
AM
6537@@ -212,6 +213,31 @@ static inline bool cap_ambient_invariant
6538 cred->cap_inheritable));
3bac966d 6539 }
3bac966d
AM
6540
6541+static inline void set_cred_subscribers(struct cred *cred, int n)
6542+{
6543+#ifdef CONFIG_DEBUG_CREDENTIALS
6544+ atomic_set(&cred->subscribers, n);
6545+#endif
6546+}
6547+
6548+static inline int read_cred_subscribers(const struct cred *cred)
6549+{
6550+#ifdef CONFIG_DEBUG_CREDENTIALS
6551+ return atomic_read(&cred->subscribers);
6552+#else
6553+ return 0;
6554+#endif
6555+}
6556+
6557+static inline void alter_cred_subscribers(const struct cred *_cred, int n)
6558+{
6559+#ifdef CONFIG_DEBUG_CREDENTIALS
6560+ struct cred *cred = (struct cred *) _cred;
6561+
6562+ atomic_add(n, &cred->subscribers);
6563+#endif
6564+}
6565+
6566 /**
6567 * get_new_cred - Get a reference on a new set of credentials
6568 * @cred: The new credentials to reference
09a55596
AM
6569diff -NurpP --minimal linux-4.9.135/include/linux/dcache.h linux-4.9.135-vs2.3.9.8/include/linux/dcache.h
6570--- linux-4.9.135/include/linux/dcache.h 2018-10-20 10:39:19.000000000 +0000
6571+++ linux-4.9.135-vs2.3.9.8/include/linux/dcache.h 2018-10-20 04:58:14.000000000 +0000
6572@@ -308,8 +308,10 @@ extern char *dentry_path(struct dentry *
cc23e853
AM
6573 */
6574 static inline struct dentry *dget_dlock(struct dentry *dentry)
6575 {
6576- if (dentry)
6577+ if (dentry) {
6578 dentry->d_lockref.count++;
6579+ // vx_dentry_inc(dentry);
6580+ }
6581 return dentry;
6582 }
6583
09a55596
AM
6584diff -NurpP --minimal linux-4.9.135/include/linux/devpts_fs.h linux-4.9.135-vs2.3.9.8/include/linux/devpts_fs.h
6585--- linux-4.9.135/include/linux/devpts_fs.h 2016-12-11 19:17:54.000000000 +0000
6586+++ linux-4.9.135-vs2.3.9.8/include/linux/devpts_fs.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 6587@@ -34,5 +34,4 @@ void devpts_pty_kill(struct dentry *);
2380c486
JR
6588
6589 #endif
d337f35e 6590
2380c486 6591-
d337f35e 6592 #endif /* _LINUX_DEVPTS_FS_H */
09a55596
AM
6593diff -NurpP --minimal linux-4.9.135/include/linux/fs.h linux-4.9.135-vs2.3.9.8/include/linux/fs.h
6594--- linux-4.9.135/include/linux/fs.h 2018-10-20 10:39:19.000000000 +0000
6595+++ linux-4.9.135-vs2.3.9.8/include/linux/fs.h 2018-10-20 04:58:14.000000000 +0000
6596@@ -226,6 +226,7 @@ typedef int (dio_iodone_t)(struct kiocb
2380c486
JR
6597 #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
6598 #define ATTR_TIMES_SET (1 << 16)
cc23e853
AM
6599 #define ATTR_TOUCH (1 << 17)
6600+#define ATTR_TAG (1 << 18)
d337f35e
JR
6601
6602 /*
bb20add7 6603 * Whiteout is represented by a char device. The following constants define the
09a55596 6604@@ -248,6 +249,7 @@ struct iattr {
d337f35e 6605 umode_t ia_mode;
42bc425c
AM
6606 kuid_t ia_uid;
6607 kgid_t ia_gid;
537831f9 6608+ ktag_t ia_tag;
d337f35e
JR
6609 loff_t ia_size;
6610 struct timespec ia_atime;
6611 struct timespec ia_mtime;
09a55596 6612@@ -607,7 +609,9 @@ struct inode {
a168f21d 6613 unsigned short i_opflags;
42bc425c
AM
6614 kuid_t i_uid;
6615 kgid_t i_gid;
2380c486 6616- unsigned int i_flags;
537831f9 6617+ ktag_t i_tag;
2380c486
JR
6618+ unsigned short i_flags;
6619+ unsigned short i_vflags;
a168f21d
AM
6620
6621 #ifdef CONFIG_FS_POSIX_ACL
6622 struct posix_acl *i_acl;
09a55596 6623@@ -636,6 +640,7 @@ struct inode {
f6c5ef8b
AM
6624 unsigned int __i_nlink;
6625 };
d33d7b00
AM
6626 dev_t i_rdev;
6627+ dev_t i_mdev;
42bc425c 6628 loff_t i_size;
a168f21d
AM
6629 struct timespec i_atime;
6630 struct timespec i_mtime;
09a55596 6631@@ -840,14 +845,19 @@ static inline void i_size_write(struct i
cc23e853 6632 #endif
537831f9 6633 }
2380c486 6634
61333608 6635+static inline void i_tag_write(struct inode *inode, vtag_t tag)
537831f9
AM
6636+{
6637+ inode->i_tag = make_ktag(&init_user_ns, tag);
6638+}
6639+
2380c486
JR
6640 static inline unsigned iminor(const struct inode *inode)
6641 {
6642- return MINOR(inode->i_rdev);
6643+ return MINOR(inode->i_mdev);
6644 }
6645
6646 static inline unsigned imajor(const struct inode *inode)
6647 {
6648- return MAJOR(inode->i_rdev);
6649+ return MAJOR(inode->i_mdev);
6650 }
6651
6652 extern struct block_device *I_BDEV(struct inode *inode);
09a55596 6653@@ -904,6 +914,7 @@ struct file {
d337f35e
JR
6654 loff_t f_pos;
6655 struct fown_struct f_owner;
ec22aa5c 6656 const struct cred *f_cred;
61333608 6657+ vxid_t f_xid;
d337f35e
JR
6658 struct file_ra_state f_ra;
6659
2380c486 6660 u64 f_version;
09a55596 6661@@ -1038,6 +1049,7 @@ struct file_lock {
2380c486 6662 struct file *fl_file;
d337f35e
JR
6663 loff_t fl_start;
6664 loff_t fl_end;
61333608 6665+ vxid_t fl_xid;
d337f35e
JR
6666
6667 struct fasync_struct * fl_fasync; /* for lease break notifications */
f6c5ef8b 6668 /* for lease breaks: */
09a55596 6669@@ -1471,6 +1483,11 @@ static inline gid_t i_gid_read(const str
cc23e853
AM
6670 return from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
6671 }
6672
6673+static inline vtag_t i_tag_read(const struct inode *inode)
6674+{
6675+ return from_ktag(&init_user_ns, inode->i_tag);
6676+}
6677+
6678 static inline void i_uid_write(struct inode *inode, uid_t uid)
6679 {
6680 inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid);
09a55596 6681@@ -1760,6 +1777,7 @@ struct inode_operations {
cc23e853
AM
6682 int (*setattr) (struct dentry *, struct iattr *);
6683 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
d4263eb0 6684 ssize_t (*listxattr) (struct dentry *, char *, size_t);
d4263eb0 6685+ int (*sync_flags) (struct inode *, int, int);
d33d7b00
AM
6686 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
6687 u64 len);
42bc425c 6688 int (*update_time)(struct inode *, struct timespec *, int);
09a55596 6689@@ -1774,6 +1792,7 @@ ssize_t rw_copy_check_uvector(int type,
537831f9
AM
6690 unsigned long nr_segs, unsigned long fast_segs,
6691 struct iovec *fast_pointer,
6692 struct iovec **ret_pointer);
d337f35e
JR
6693+ssize_t vfs_sendfile(struct file *, struct file *, loff_t *, size_t, loff_t);
6694
cc23e853
AM
6695 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
6696 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
09a55596 6697@@ -1845,6 +1864,14 @@ struct super_operations {
cc23e853
AM
6698 #else
6699 #define S_DAX 0 /* Make all the DAX code disappear */
6700 #endif
6701+#define S_IXUNLINK 16384 /* Immutable Invert on unlink */
537831f9
AM
6702+
6703+/* Linux-VServer related Inode flags */
6704+
6705+#define V_VALID 1
6706+#define V_XATTR 2
6707+#define V_BARRIER 4 /* Barrier for chroot() */
6708+#define V_COW 8 /* Copy on Write */
6709
6710 /*
6711 * Note that nosuid etc flags are inode-specific: setting some file-system
09a55596 6712@@ -1869,10 +1896,13 @@ struct super_operations {
537831f9
AM
6713 #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
6714 #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
6715 #define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
6716+#define IS_TAGGED(inode) __IS_FLG(inode, MS_TAGGED)
6717
6718 #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
6719 #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
6720 #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
6721+#define IS_IXUNLINK(inode) ((inode)->i_flags & S_IXUNLINK)
6722+#define IS_IXORUNLINK(inode) ((IS_IXUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
6723 #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
6724
6725 #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
09a55596 6726@@ -1892,6 +1922,16 @@ static inline bool HAS_UNMAPPED_ID(struc
cc23e853
AM
6727 return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
6728 }
537831f9
AM
6729
6730+#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_vflags & V_BARRIER))
6731+
6732+#ifdef CONFIG_VSERVER_COWBL
6733+# define IS_COW(inode) (IS_IXUNLINK(inode) && IS_IMMUTABLE(inode))
6734+# define IS_COW_LINK(inode) (S_ISREG((inode)->i_mode) && ((inode)->i_nlink > 1))
6735+#else
6736+# define IS_COW(inode) (0)
6737+# define IS_COW_LINK(inode) (0)
6738+#endif
6739+
6740 /*
6741 * Inode state bits. Protected by inode->i_lock
6742 *
09a55596 6743@@ -2157,6 +2197,9 @@ extern struct kobject *fs_kobj;
bb20add7 6744 extern int locks_mandatory_locked(struct file *);
cc23e853 6745 extern int locks_mandatory_area(struct inode *, struct file *, loff_t, loff_t, unsigned char);
537831f9
AM
6746
6747+#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */
6748+#define ATTR_FLAG_IXUNLINK 1024 /* Immutable invert on unlink */
6749+
6750 /*
6751 * Candidates for mandatory locking have the setgid bit set
6752 * but no group execute bit - an otherwise meaningless combination.
09a55596 6753@@ -2337,7 +2380,7 @@ struct filename {
cc23e853
AM
6754 const char iname[];
6755 };
6756
6757-extern long vfs_truncate(const struct path *, loff_t);
6758+extern long vfs_truncate(struct path *, loff_t);
6759 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
6760 struct file *filp);
6761 extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
09a55596 6762@@ -2967,6 +3010,7 @@ extern int dcache_dir_open(struct inode
d337f35e
JR
6763 extern int dcache_dir_close(struct inode *, struct file *);
6764 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
c2e5f7c8
JR
6765 extern int dcache_readdir(struct file *, struct dir_context *);
6766+extern int dcache_readdir_filter(struct file *, struct dir_context *, int (*)(struct dentry *));
76514441 6767 extern int simple_setattr(struct dentry *, struct iattr *);
d337f35e
JR
6768 extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
6769 extern int simple_statfs(struct dentry *, struct kstatfs *);
09a55596
AM
6770diff -NurpP --minimal linux-4.9.135/include/linux/init_task.h linux-4.9.135-vs2.3.9.8/include/linux/init_task.h
6771--- linux-4.9.135/include/linux/init_task.h 2016-12-11 19:17:54.000000000 +0000
6772+++ linux-4.9.135-vs2.3.9.8/include/linux/init_task.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 6773@@ -271,6 +271,10 @@ extern struct task_group root_task_group
b00e13aa 6774 INIT_VTIME(tsk) \
cc23e853
AM
6775 INIT_NUMA_BALANCING(tsk) \
6776 INIT_KASAN(tsk) \
d337f35e
JR
6777+ .xid = 0, \
6778+ .vx_info = NULL, \
6779+ .nid = 0, \
6780+ .nx_info = NULL, \
6781 }
6782
6783
09a55596
AM
6784diff -NurpP --minimal linux-4.9.135/include/linux/ipc.h linux-4.9.135-vs2.3.9.8/include/linux/ipc.h
6785--- linux-4.9.135/include/linux/ipc.h 2016-12-11 19:17:54.000000000 +0000
6786+++ linux-4.9.135-vs2.3.9.8/include/linux/ipc.h 2018-10-20 04:58:14.000000000 +0000
537831f9 6787@@ -16,6 +16,7 @@ struct kern_ipc_perm
d337f35e 6788 key_t key;
537831f9
AM
6789 kuid_t uid;
6790 kgid_t gid;
61333608 6791+ vxid_t xid;
537831f9
AM
6792 kuid_t cuid;
6793 kgid_t cgid;
db55b927 6794 umode_t mode;
09a55596
AM
6795diff -NurpP --minimal linux-4.9.135/include/linux/memcontrol.h linux-4.9.135-vs2.3.9.8/include/linux/memcontrol.h
6796--- linux-4.9.135/include/linux/memcontrol.h 2018-10-20 10:39:19.000000000 +0000
6797+++ linux-4.9.135-vs2.3.9.8/include/linux/memcontrol.h 2018-10-20 04:58:14.000000000 +0000
cc23e853
AM
6798@@ -92,6 +92,7 @@ enum mem_cgroup_events_target {
6799 MEM_CGROUP_NTARGETS,
6800 };
6801
6802+
6803 #ifdef CONFIG_MEMCG
6804
6805 #define MEM_CGROUP_ID_SHIFT 16
369dbd59 6806@@ -402,6 +403,12 @@ static inline bool mem_cgroup_is_descend
cc23e853
AM
6807 return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
6808 }
6809
369dbd59
AM
6810+extern unsigned long mem_cgroup_mem_usage_pages(struct mem_cgroup *memcg);
6811+extern unsigned long mem_cgroup_mem_limit_pages(struct mem_cgroup *memcg);
6812+extern unsigned long mem_cgroup_memsw_usage_pages(struct mem_cgroup *memcg);
6813+extern unsigned long mem_cgroup_memsw_limit_pages(struct mem_cgroup *memcg);
6814+extern void dump_mem_cgroup(struct mem_cgroup *memcg);
cc23e853
AM
6815+
6816 static inline bool mm_match_cgroup(struct mm_struct *mm,
6817 struct mem_cgroup *memcg)
e3afe727 6818 {
09a55596
AM
6819diff -NurpP --minimal linux-4.9.135/include/linux/mount.h linux-4.9.135-vs2.3.9.8/include/linux/mount.h
6820--- linux-4.9.135/include/linux/mount.h 2018-10-20 10:39:19.000000000 +0000
6821+++ linux-4.9.135-vs2.3.9.8/include/linux/mount.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 6822@@ -63,6 +63,9 @@ struct mnt_namespace;
bb20add7 6823 #define MNT_MARKED 0x4000000
cc23e853 6824 #define MNT_UMOUNT 0x8000000
d337f35e 6825
2380c486
JR
6826+#define MNT_TAGID 0x10000
6827+#define MNT_NOTAG 0x20000
6828+
d337f35e 6829 struct vfsmount {
db55b927
AM
6830 struct dentry *mnt_root; /* root of the mounted tree */
6831 struct super_block *mnt_sb; /* pointer to superblock */
09a55596
AM
6832diff -NurpP --minimal linux-4.9.135/include/linux/net.h linux-4.9.135-vs2.3.9.8/include/linux/net.h
6833--- linux-4.9.135/include/linux/net.h 2016-12-11 19:17:54.000000000 +0000
6834+++ linux-4.9.135-vs2.3.9.8/include/linux/net.h 2018-10-20 04:58:14.000000000 +0000
cc23e853
AM
6835@@ -44,6 +44,7 @@ struct net;
6836 #define SOCK_NOSPACE 2
d337f35e
JR
6837 #define SOCK_PASSCRED 3
6838 #define SOCK_PASSSEC 4
cc23e853 6839+#define SOCK_USER_SOCKET 5
d337f35e
JR
6840
6841 #ifndef ARCH_HAS_SOCKET_TYPES
6842 /**
09a55596
AM
6843diff -NurpP --minimal linux-4.9.135/include/linux/netdevice.h linux-4.9.135-vs2.3.9.8/include/linux/netdevice.h
6844--- linux-4.9.135/include/linux/netdevice.h 2018-10-20 10:39:19.000000000 +0000
6845+++ linux-4.9.135-vs2.3.9.8/include/linux/netdevice.h 2018-10-20 05:55:43.000000000 +0000
6846@@ -2481,6 +2481,7 @@ static inline int dev_recursion_level(vo
c2e5f7c8
JR
6847
6848 struct net_device *dev_get_by_index(struct net *net, int ifindex);
6849 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
6850+struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex);
6851 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
6852 int netdev_get_name(struct net *net, char *name, int ifindex);
6853 int dev_restart(struct net_device *dev);
09a55596
AM
6854diff -NurpP --minimal linux-4.9.135/include/linux/nsproxy.h linux-4.9.135-vs2.3.9.8/include/linux/nsproxy.h
6855--- linux-4.9.135/include/linux/nsproxy.h 2016-12-11 19:17:54.000000000 +0000
6856+++ linux-4.9.135-vs2.3.9.8/include/linux/nsproxy.h 2018-10-20 04:58:14.000000000 +0000
2380c486 6857@@ -3,6 +3,7 @@
d337f35e 6858
2380c486
JR
6859 #include <linux/spinlock.h>
6860 #include <linux/sched.h>
6861+#include <linux/vserver/debug.h>
6862
6863 struct mnt_namespace;
6864 struct uts_namespace;
cc23e853 6865@@ -65,6 +66,7 @@ extern struct nsproxy init_nsproxy;
bb20add7 6866 */
2380c486
JR
6867
6868 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
6869+struct nsproxy *copy_nsproxy(struct nsproxy *orig);
6870 void exit_task_namespaces(struct task_struct *tsk);
6871 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
6872 void free_nsproxy(struct nsproxy *ns);
cc23e853 6873@@ -72,16 +74,26 @@ int unshare_nsproxy_namespaces(unsigned
b00e13aa 6874 struct cred *, struct fs_struct *);
a168f21d 6875 int __init nsproxy_cache_init(void);
2380c486
JR
6876
6877-static inline void put_nsproxy(struct nsproxy *ns)
6878+#define get_nsproxy(n) __get_nsproxy(n, __FILE__, __LINE__)
d337f35e 6879+
2380c486
JR
6880+static inline void __get_nsproxy(struct nsproxy *ns,
6881+ const char *_file, int _line)
6882 {
6883- if (atomic_dec_and_test(&ns->count)) {
6884- free_nsproxy(ns);
6885- }
6886+ vxlprintk(VXD_CBIT(space, 0), "get_nsproxy(%p[%u])",
6887+ ns, atomic_read(&ns->count), _file, _line);
d337f35e 6888+ atomic_inc(&ns->count);
2380c486
JR
6889 }
6890
6891-static inline void get_nsproxy(struct nsproxy *ns)
6892+#define put_nsproxy(n) __put_nsproxy(n, __FILE__, __LINE__)
d337f35e 6893+
2380c486
JR
6894+static inline void __put_nsproxy(struct nsproxy *ns,
6895+ const char *_file, int _line)
6896 {
6897- atomic_inc(&ns->count);
6898+ vxlprintk(VXD_CBIT(space, 0), "put_nsproxy(%p[%u])",
6899+ ns, atomic_read(&ns->count), _file, _line);
6900+ if (atomic_dec_and_test(&ns->count)) {
6901+ free_nsproxy(ns);
6902+ }
6903 }
d337f35e 6904
763640ca 6905 #endif
09a55596
AM
6906diff -NurpP --minimal linux-4.9.135/include/linux/pid.h linux-4.9.135-vs2.3.9.8/include/linux/pid.h
6907--- linux-4.9.135/include/linux/pid.h 2018-10-20 10:39:20.000000000 +0000
6908+++ linux-4.9.135-vs2.3.9.8/include/linux/pid.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 6909@@ -10,7 +10,8 @@ enum pid_type
d337f35e 6910 PIDTYPE_SID,
cc23e853
AM
6911 PIDTYPE_MAX,
6912 /* only valid to __task_pid_nr_ns() */
6913- __PIDTYPE_TGID
6914+ __PIDTYPE_TGID,
6915+ __PIDTYPE_REALPID
d337f35e
JR
6916 };
6917
6918 /*
cc23e853 6919@@ -172,6 +173,7 @@ static inline pid_t pid_nr(struct pid *p
2380c486
JR
6920 }
6921
6922 pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
6923+pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns);
6924 pid_t pid_vnr(struct pid *pid);
6925
6926 #define do_each_pid_task(pid, type, task) \
09a55596
AM
6927diff -NurpP --minimal linux-4.9.135/include/linux/quotaops.h linux-4.9.135-vs2.3.9.8/include/linux/quotaops.h
6928--- linux-4.9.135/include/linux/quotaops.h 2016-12-11 19:17:54.000000000 +0000
6929+++ linux-4.9.135-vs2.3.9.8/include/linux/quotaops.h 2018-10-20 04:58:14.000000000 +0000
e22b5178
AM
6930@@ -8,6 +8,7 @@
6931 #define _LINUX_QUOTAOPS_
6932
6933 #include <linux/fs.h>
6934+#include <linux/vs_dlimit.h>
6935
76514441
AM
6936 #define DQUOT_SPACE_WARN 0x1
6937 #define DQUOT_SPACE_RESERVE 0x2
cc23e853 6938@@ -214,11 +215,12 @@ static inline void dquot_drop(struct ino
76514441 6939
cc23e853 6940 static inline int dquot_alloc_inode(struct inode *inode)
76514441
AM
6941 {
6942- return 0;
6943+ return dl_alloc_inode(inode);
6944 }
6945
cc23e853 6946 static inline void dquot_free_inode(struct inode *inode)
e22b5178 6947 {
76514441
AM
6948+ dl_free_inode(inode);
6949 }
6950
6951 static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
cc23e853 6952@@ -229,6 +231,10 @@ static inline int dquot_transfer(struct
76514441
AM
6953 static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
6954 int flags)
6955 {
6956+ int ret = 0;
6957+
6958+ if ((ret = dl_alloc_space(inode, number)))
6959+ return ret;
6960 if (!(flags & DQUOT_SPACE_RESERVE))
6961 inode_add_bytes(inode, number);
6962 return 0;
cc23e853 6963@@ -239,6 +245,7 @@ static inline void __dquot_free_space(st
76514441
AM
6964 {
6965 if (!(flags & DQUOT_SPACE_RESERVE))
6966 inode_sub_bytes(inode, number);
6967+ dl_free_space(inode, number);
6968 }
6969
6970 static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
09a55596
AM
6971diff -NurpP --minimal linux-4.9.135/include/linux/sched.h linux-4.9.135-vs2.3.9.8/include/linux/sched.h
6972--- linux-4.9.135/include/linux/sched.h 2018-10-20 10:39:20.000000000 +0000
6973+++ linux-4.9.135-vs2.3.9.8/include/linux/sched.h 2018-10-20 05:55:43.000000000 +0000
6974@@ -1718,6 +1718,14 @@ struct task_struct {
2380c486 6975 #endif
42bc425c 6976 struct seccomp seccomp;
2380c486
JR
6977
6978+/* vserver context data */
6979+ struct vx_info *vx_info;
6980+ struct nx_info *nx_info;
d337f35e 6981+
61333608
AM
6982+ vxid_t xid;
6983+ vnid_t nid;
6984+ vtag_t tag;
2380c486
JR
6985+
6986 /* Thread group tracking */
6987 u32 parent_exec_id;
6988 u32 self_exec_id;
09a55596 6989@@ -2111,6 +2119,11 @@ struct pid_namespace;
ec22aa5c
AM
6990 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
6991 struct pid_namespace *ns);
d337f35e 6992
2380c486
JR
6993+#include <linux/vserver/base.h>
6994+#include <linux/vserver/context.h>
6995+#include <linux/vserver/debug.h>
6996+#include <linux/vserver/pid.h>
6997+
6998 static inline pid_t task_pid_nr(struct task_struct *tsk)
6999 {
7000 return tsk->pid;
09a55596 7001@@ -2124,7 +2137,8 @@ static inline pid_t task_pid_nr_ns(struc
d337f35e 7002
2380c486
JR
7003 static inline pid_t task_pid_vnr(struct task_struct *tsk)
7004 {
ec22aa5c
AM
7005- return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
7006+ // return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
7007+ return vx_map_pid(__task_pid_nr_ns(tsk, PIDTYPE_PID, NULL));
2380c486 7008 }
d337f35e 7009
d337f35e 7010
09a55596
AM
7011diff -NurpP --minimal linux-4.9.135/include/linux/shmem_fs.h linux-4.9.135-vs2.3.9.8/include/linux/shmem_fs.h
7012--- linux-4.9.135/include/linux/shmem_fs.h 2016-12-11 19:17:54.000000000 +0000
7013+++ linux-4.9.135-vs2.3.9.8/include/linux/shmem_fs.h 2018-10-20 04:58:14.000000000 +0000
bb20add7 7014@@ -10,6 +10,9 @@
2380c486 7015
a168f21d 7016 /* inode in-kernel data */
2380c486
JR
7017
7018+#define TMPFS_SUPER_MAGIC 0x01021994
7019+
7020+
7021 struct shmem_inode_info {
7022 spinlock_t lock;
bb20add7 7023 unsigned int seals; /* shmem seals */
09a55596
AM
7024diff -NurpP --minimal linux-4.9.135/include/linux/stat.h linux-4.9.135-vs2.3.9.8/include/linux/stat.h
7025--- linux-4.9.135/include/linux/stat.h 2016-12-11 19:17:54.000000000 +0000
7026+++ linux-4.9.135-vs2.3.9.8/include/linux/stat.h 2018-10-20 04:58:14.000000000 +0000
537831f9 7027@@ -25,6 +25,7 @@ struct kstat {
2380c486 7028 unsigned int nlink;
42bc425c
AM
7029 kuid_t uid;
7030 kgid_t gid;
8ce283e1 7031+ ktag_t tag;
2380c486
JR
7032 dev_t rdev;
7033 loff_t size;
7034 struct timespec atime;
09a55596
AM
7035diff -NurpP --minimal linux-4.9.135/include/linux/sunrpc/auth.h linux-4.9.135-vs2.3.9.8/include/linux/sunrpc/auth.h
7036--- linux-4.9.135/include/linux/sunrpc/auth.h 2016-12-11 19:17:54.000000000 +0000
7037+++ linux-4.9.135-vs2.3.9.8/include/linux/sunrpc/auth.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 7038@@ -46,6 +46,7 @@ enum {
2380c486 7039 struct auth_cred {
b00e13aa
AM
7040 kuid_t uid;
7041 kgid_t gid;
7042+ ktag_t tag;
2380c486 7043 struct group_info *group_info;
db55b927 7044 const char *principal;
c2e5f7c8 7045 unsigned long ac_flags;
09a55596
AM
7046diff -NurpP --minimal linux-4.9.135/include/linux/sunrpc/clnt.h linux-4.9.135-vs2.3.9.8/include/linux/sunrpc/clnt.h
7047--- linux-4.9.135/include/linux/sunrpc/clnt.h 2018-10-20 10:39:20.000000000 +0000
7048+++ linux-4.9.135-vs2.3.9.8/include/linux/sunrpc/clnt.h 2018-10-20 05:55:43.000000000 +0000
cc23e853 7049@@ -52,7 +52,8 @@ struct rpc_clnt {
2380c486 7050 cl_discrtry : 1,/* disconnect before retry */
c2e5f7c8 7051 cl_noretranstimeo: 1,/* No retransmit timeouts */
2380c486
JR
7052 cl_autobind : 1,/* use getport() */
7053- cl_chatty : 1;/* be verbose */
7054+ cl_chatty : 1,/* be verbose */
7055+ cl_tag : 1;/* context tagging */
d337f35e 7056
2380c486
JR
7057 struct rpc_rtt * cl_rtt; /* RTO estimator data */
7058 const struct rpc_timeout *cl_timeout; /* Timeout strategy */
09a55596
AM
7059diff -NurpP --minimal linux-4.9.135/include/linux/types.h linux-4.9.135-vs2.3.9.8/include/linux/types.h
7060--- linux-4.9.135/include/linux/types.h 2016-12-11 19:17:54.000000000 +0000
7061+++ linux-4.9.135-vs2.3.9.8/include/linux/types.h 2018-10-20 04:58:14.000000000 +0000
537831f9 7062@@ -32,6 +32,9 @@ typedef __kernel_uid32_t uid_t;
2380c486
JR
7063 typedef __kernel_gid32_t gid_t;
7064 typedef __kernel_uid16_t uid16_t;
7065 typedef __kernel_gid16_t gid16_t;
61333608
AM
7066+typedef unsigned int vxid_t;
7067+typedef unsigned int vnid_t;
7068+typedef unsigned int vtag_t;
2380c486
JR
7069
7070 typedef unsigned long uintptr_t;
7071
09a55596
AM
7072diff -NurpP --minimal linux-4.9.135/include/linux/uidgid.h linux-4.9.135-vs2.3.9.8/include/linux/uidgid.h
7073--- linux-4.9.135/include/linux/uidgid.h 2016-12-11 19:17:54.000000000 +0000
7074+++ linux-4.9.135-vs2.3.9.8/include/linux/uidgid.h 2018-10-20 04:58:14.000000000 +0000
bb20add7 7075@@ -21,13 +21,17 @@ typedef struct {
537831f9
AM
7076 uid_t val;
7077 } kuid_t;
7078
7079-
7080 typedef struct {
7081 gid_t val;
7082 } kgid_t;
7083
7084+typedef struct {
61333608 7085+ vtag_t val;
537831f9
AM
7086+} ktag_t;
7087+
7088 #define KUIDT_INIT(value) (kuid_t){ value }
7089 #define KGIDT_INIT(value) (kgid_t){ value }
7090+#define KTAGT_INIT(value) (ktag_t){ value }
7091
cc23e853 7092 #ifdef CONFIG_MULTIUSER
537831f9 7093 static inline uid_t __kuid_val(kuid_t uid)
cc23e853 7094@@ -51,11 +55,18 @@ static inline gid_t __kgid_val(kgid_t gi
537831f9 7095 }
cc23e853 7096 #endif
537831f9 7097
61333608 7098+static inline vtag_t __ktag_val(ktag_t tag)
537831f9
AM
7099+{
7100+ return tag.val;
7101+}
7102+
537831f9
AM
7103 #define GLOBAL_ROOT_UID KUIDT_INIT(0)
7104 #define GLOBAL_ROOT_GID KGIDT_INIT(0)
7105+#define GLOBAL_ROOT_TAG KTAGT_INIT(0)
7106
7107 #define INVALID_UID KUIDT_INIT(-1)
7108 #define INVALID_GID KGIDT_INIT(-1)
7109+#define INVALID_TAG KTAGT_INIT(-1)
7110
7111 static inline bool uid_eq(kuid_t left, kuid_t right)
7112 {
cc23e853 7113@@ -67,6 +78,11 @@ static inline bool gid_eq(kgid_t left, k
537831f9
AM
7114 return __kgid_val(left) == __kgid_val(right);
7115 }
7116
7117+static inline bool tag_eq(ktag_t left, ktag_t right)
7118+{
7119+ return __ktag_val(left) == __ktag_val(right);
7120+}
7121+
7122 static inline bool uid_gt(kuid_t left, kuid_t right)
7123 {
7124 return __kuid_val(left) > __kuid_val(right);
cc23e853
AM
7125@@ -117,13 +133,21 @@ static inline bool gid_valid(kgid_t gid)
7126 return __kgid_val(gid) != (gid_t) -1;
537831f9
AM
7127 }
7128
7129+static inline bool tag_valid(ktag_t tag)
7130+{
7131+ return !tag_eq(tag, INVALID_TAG);
7132+}
7133+
7134 #ifdef CONFIG_USER_NS
7135
7136 extern kuid_t make_kuid(struct user_namespace *from, uid_t uid);
7137 extern kgid_t make_kgid(struct user_namespace *from, gid_t gid);
c90fe048 7138+extern ktag_t make_ktag(struct user_namespace *from, gid_t gid);
537831f9
AM
7139
7140 extern uid_t from_kuid(struct user_namespace *to, kuid_t uid);
7141 extern gid_t from_kgid(struct user_namespace *to, kgid_t gid);
61333608 7142+extern vtag_t from_ktag(struct user_namespace *to, ktag_t tag);
537831f9
AM
7143+
7144 extern uid_t from_kuid_munged(struct user_namespace *to, kuid_t uid);
7145 extern gid_t from_kgid_munged(struct user_namespace *to, kgid_t gid);
7146
cc23e853 7147@@ -149,6 +173,11 @@ static inline kgid_t make_kgid(struct us
537831f9
AM
7148 return KGIDT_INIT(gid);
7149 }
7150
61333608 7151+static inline ktag_t make_ktag(struct user_namespace *from, vtag_t tag)
537831f9
AM
7152+{
7153+ return KTAGT_INIT(tag);
7154+}
7155+
7156 static inline uid_t from_kuid(struct user_namespace *to, kuid_t kuid)
7157 {
7158 return __kuid_val(kuid);
cc23e853 7159@@ -159,6 +188,11 @@ static inline gid_t from_kgid(struct use
537831f9
AM
7160 return __kgid_val(kgid);
7161 }
7162
61333608 7163+static inline vtag_t from_ktag(struct user_namespace *to, ktag_t ktag)
537831f9
AM
7164+{
7165+ return __ktag_val(ktag);
7166+}
7167+
7168 static inline uid_t from_kuid_munged(struct user_namespace *to, kuid_t kuid)
7169 {
7170 uid_t uid = from_kuid(to, kuid);
09a55596
AM
7171diff -NurpP --minimal linux-4.9.135/include/linux/vroot.h linux-4.9.135-vs2.3.9.8/include/linux/vroot.h
7172--- linux-4.9.135/include/linux/vroot.h 1970-01-01 00:00:00.000000000 +0000
7173+++ linux-4.9.135-vs2.3.9.8/include/linux/vroot.h 2018-10-20 04:58:14.000000000 +0000
2380c486
JR
7174@@ -0,0 +1,51 @@
7175+
7176+/*
7177+ * include/linux/vroot.h
7178+ *
cc23e853
AM
7179+ * written by Herbert P?tzl, 9/11/2002
7180+ * ported to 2.6 by Herbert P?tzl, 30/12/2004
2380c486 7181+ *
cc23e853 7182+ * Copyright (C) 2002-2007 by Herbert P?tzl.
2380c486
JR
7183+ * Redistribution of this file is permitted under the
7184+ * GNU General Public License.
7185+ */
7186+
7187+#ifndef _LINUX_VROOT_H
7188+#define _LINUX_VROOT_H
7189+
7190+
7191+#ifdef __KERNEL__
7192+
7193+/* Possible states of device */
7194+enum {
7195+ Vr_unbound,
7196+ Vr_bound,
7197+};
7198+
7199+struct vroot_device {
7200+ int vr_number;
7201+ int vr_refcnt;
7202+
7203+ struct semaphore vr_ctl_mutex;
7204+ struct block_device *vr_device;
7205+ int vr_state;
7206+};
7207+
7208+
7209+typedef struct block_device *(vroot_grb_func)(struct block_device *);
7210+
7211+extern int register_vroot_grb(vroot_grb_func *);
7212+extern int unregister_vroot_grb(vroot_grb_func *);
7213+
7214+#endif /* __KERNEL__ */
7215+
7216+#define MAX_VROOT_DEFAULT 8
7217+
7218+/*
7219+ * IOCTL commands --- we will commandeer 0x56 ('V')
7220+ */
7221+
7222+#define VROOT_SET_DEV 0x5600
7223+#define VROOT_CLR_DEV 0x5601
7224+
7225+#endif /* _LINUX_VROOT_H */
09a55596
AM
7226diff -NurpP --minimal linux-4.9.135/include/linux/vs_base.h linux-4.9.135-vs2.3.9.8/include/linux/vs_base.h
7227--- linux-4.9.135/include/linux/vs_base.h 1970-01-01 00:00:00.000000000 +0000
7228+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_base.h 2018-10-20 04:58:14.000000000 +0000
2380c486
JR
7229@@ -0,0 +1,10 @@
7230+#ifndef _VS_BASE_H
7231+#define _VS_BASE_H
7232+
7233+#include "vserver/base.h"
7234+#include "vserver/check.h"
7235+#include "vserver/debug.h"
7236+
7237+#else
7238+#warning duplicate inclusion
7239+#endif
09a55596
AM
7240diff -NurpP --minimal linux-4.9.135/include/linux/vs_context.h linux-4.9.135-vs2.3.9.8/include/linux/vs_context.h
7241--- linux-4.9.135/include/linux/vs_context.h 1970-01-01 00:00:00.000000000 +0000
7242+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_context.h 2018-10-20 04:58:14.000000000 +0000
4a036bed 7243@@ -0,0 +1,242 @@
2380c486
JR
7244+#ifndef _VS_CONTEXT_H
7245+#define _VS_CONTEXT_H
7246+
7247+#include "vserver/base.h"
7248+#include "vserver/check.h"
7249+#include "vserver/context.h"
7250+#include "vserver/history.h"
7251+#include "vserver/debug.h"
7252+
7253+#include <linux/sched.h>
7254+
7255+
7256+#define get_vx_info(i) __get_vx_info(i, __FILE__, __LINE__, __HERE__)
7257+
7258+static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
7259+ const char *_file, int _line, void *_here)
7260+{
7261+ if (!vxi)
7262+ return NULL;
7263+
7264+ vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
7265+ vxi, vxi ? vxi->vx_id : 0,
7266+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7267+ _file, _line);
7268+ __vxh_get_vx_info(vxi, _here);
7269+
7270+ atomic_inc(&vxi->vx_usecnt);
7271+ return vxi;
7272+}
7273+
7274+
7275+extern void free_vx_info(struct vx_info *);
7276+
7277+#define put_vx_info(i) __put_vx_info(i, __FILE__, __LINE__, __HERE__)
7278+
7279+static inline void __put_vx_info(struct vx_info *vxi,
7280+ const char *_file, int _line, void *_here)
7281+{
7282+ if (!vxi)
7283+ return;
7284+
7285+ vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
7286+ vxi, vxi ? vxi->vx_id : 0,
7287+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7288+ _file, _line);
7289+ __vxh_put_vx_info(vxi, _here);
7290+
7291+ if (atomic_dec_and_test(&vxi->vx_usecnt))
7292+ free_vx_info(vxi);
7293+}
7294+
7295+
7296+#define init_vx_info(p, i) \
7297+ __init_vx_info(p, i, __FILE__, __LINE__, __HERE__)
7298+
7299+static inline void __init_vx_info(struct vx_info **vxp, struct vx_info *vxi,
7300+ const char *_file, int _line, void *_here)
7301+{
7302+ if (vxi) {
7303+ vxlprintk(VXD_CBIT(xid, 3),
7304+ "init_vx_info(%p[#%d.%d])",
7305+ vxi, vxi ? vxi->vx_id : 0,
7306+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7307+ _file, _line);
7308+ __vxh_init_vx_info(vxi, vxp, _here);
7309+
7310+ atomic_inc(&vxi->vx_usecnt);
7311+ }
7312+ *vxp = vxi;
7313+}
7314+
7315+
7316+#define set_vx_info(p, i) \
7317+ __set_vx_info(p, i, __FILE__, __LINE__, __HERE__)
7318+
7319+static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
7320+ const char *_file, int _line, void *_here)
7321+{
7322+ struct vx_info *vxo;
7323+
7324+ if (!vxi)
7325+ return;
7326+
7327+ vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d])",
7328+ vxi, vxi ? vxi->vx_id : 0,
7329+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7330+ _file, _line);
7331+ __vxh_set_vx_info(vxi, vxp, _here);
7332+
7333+ atomic_inc(&vxi->vx_usecnt);
7334+ vxo = xchg(vxp, vxi);
7335+ BUG_ON(vxo);
7336+}
7337+
7338+
7339+#define clr_vx_info(p) __clr_vx_info(p, __FILE__, __LINE__, __HERE__)
7340+
7341+static inline void __clr_vx_info(struct vx_info **vxp,
7342+ const char *_file, int _line, void *_here)
7343+{
7344+ struct vx_info *vxo;
7345+
7346+ vxo = xchg(vxp, NULL);
7347+ if (!vxo)
7348+ return;
7349+
7350+ vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d])",
7351+ vxo, vxo ? vxo->vx_id : 0,
7352+ vxo ? atomic_read(&vxo->vx_usecnt) : 0,
7353+ _file, _line);
7354+ __vxh_clr_vx_info(vxo, vxp, _here);
7355+
7356+ if (atomic_dec_and_test(&vxo->vx_usecnt))
7357+ free_vx_info(vxo);
7358+}
7359+
7360+
7361+#define claim_vx_info(v, p) \
7362+ __claim_vx_info(v, p, __FILE__, __LINE__, __HERE__)
7363+
7364+static inline void __claim_vx_info(struct vx_info *vxi,
7365+ struct task_struct *task,
7366+ const char *_file, int _line, void *_here)
7367+{
7368+ vxlprintk(VXD_CBIT(xid, 3), "claim_vx_info(%p[#%d.%d.%d]) %p",
7369+ vxi, vxi ? vxi->vx_id : 0,
7370+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7371+ vxi ? atomic_read(&vxi->vx_tasks) : 0,
7372+ task, _file, _line);
7373+ __vxh_claim_vx_info(vxi, task, _here);
7374+
7375+ atomic_inc(&vxi->vx_tasks);
7376+}
7377+
7378+
7379+extern void unhash_vx_info(struct vx_info *);
7380+
7381+#define release_vx_info(v, p) \
7382+ __release_vx_info(v, p, __FILE__, __LINE__, __HERE__)
7383+
7384+static inline void __release_vx_info(struct vx_info *vxi,
7385+ struct task_struct *task,
7386+ const char *_file, int _line, void *_here)
7387+{
7388+ vxlprintk(VXD_CBIT(xid, 3), "release_vx_info(%p[#%d.%d.%d]) %p",
7389+ vxi, vxi ? vxi->vx_id : 0,
7390+ vxi ? atomic_read(&vxi->vx_usecnt) : 0,
7391+ vxi ? atomic_read(&vxi->vx_tasks) : 0,
7392+ task, _file, _line);
7393+ __vxh_release_vx_info(vxi, task, _here);
7394+
7395+ might_sleep();
7396+
7397+ if (atomic_dec_and_test(&vxi->vx_tasks))
7398+ unhash_vx_info(vxi);
7399+}
7400+
7401+
7402+#define task_get_vx_info(p) \
7403+ __task_get_vx_info(p, __FILE__, __LINE__, __HERE__)
7404+
7405+static inline struct vx_info *__task_get_vx_info(struct task_struct *p,
7406+ const char *_file, int _line, void *_here)
7407+{
7408+ struct vx_info *vxi;
7409+
7410+ task_lock(p);
7411+ vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)",
7412+ p, _file, _line);
7413+ vxi = __get_vx_info(p->vx_info, _file, _line, _here);
7414+ task_unlock(p);
7415+ return vxi;
7416+}
7417+
7418+
7419+static inline void __wakeup_vx_info(struct vx_info *vxi)
7420+{
7421+ if (waitqueue_active(&vxi->vx_wait))
7422+ wake_up_interruptible(&vxi->vx_wait);
7423+}
7424+
7425+
7426+#define enter_vx_info(v, s) __enter_vx_info(v, s, __FILE__, __LINE__)
7427+
7428+static inline void __enter_vx_info(struct vx_info *vxi,
7429+ struct vx_info_save *vxis, const char *_file, int _line)
7430+{
7431+ vxlprintk(VXD_CBIT(xid, 5), "enter_vx_info(%p[#%d],%p) %p[#%d,%p]",
7432+ vxi, vxi ? vxi->vx_id : 0, vxis, current,
7433+ current->xid, current->vx_info, _file, _line);
7434+ vxis->vxi = xchg(&current->vx_info, vxi);
7435+ vxis->xid = current->xid;
7436+ current->xid = vxi ? vxi->vx_id : 0;
7437+}
7438+
7439+#define leave_vx_info(s) __leave_vx_info(s, __FILE__, __LINE__)
7440+
7441+static inline void __leave_vx_info(struct vx_info_save *vxis,
7442+ const char *_file, int _line)
7443+{
7444+ vxlprintk(VXD_CBIT(xid, 5), "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]",
7445+ vxis, vxis->xid, vxis->vxi, current,
7446+ current->xid, current->vx_info, _file, _line);
7447+ (void)xchg(&current->vx_info, vxis->vxi);
7448+ current->xid = vxis->xid;
7449+}
7450+
7451+
7452+static inline void __enter_vx_admin(struct vx_info_save *vxis)
7453+{
7454+ vxis->vxi = xchg(&current->vx_info, NULL);
61333608 7455+ vxis->xid = xchg(&current->xid, (vxid_t)0);
2380c486
JR
7456+}
7457+
7458+static inline void __leave_vx_admin(struct vx_info_save *vxis)
7459+{
7460+ (void)xchg(&current->xid, vxis->xid);
7461+ (void)xchg(&current->vx_info, vxis->vxi);
7462+}
7463+
4a036bed
AM
7464+#define task_is_init(p) \
7465+ __task_is_init(p, __FILE__, __LINE__, __HERE__)
7466+
7467+static inline int __task_is_init(struct task_struct *p,
7468+ const char *_file, int _line, void *_here)
7469+{
7470+ int is_init = is_global_init(p);
7471+
7472+ task_lock(p);
7473+ if (p->vx_info)
7474+ is_init = p->vx_info->vx_initpid == p->pid;
7475+ task_unlock(p);
7476+ return is_init;
7477+}
7478+
2380c486
JR
7479+extern void exit_vx_info(struct task_struct *, int);
7480+extern void exit_vx_info_early(struct task_struct *, int);
7481+
7482+
7483+#else
7484+#warning duplicate inclusion
7485+#endif
09a55596
AM
7486diff -NurpP --minimal linux-4.9.135/include/linux/vs_cowbl.h linux-4.9.135-vs2.3.9.8/include/linux/vs_cowbl.h
7487--- linux-4.9.135/include/linux/vs_cowbl.h 1970-01-01 00:00:00.000000000 +0000
7488+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_cowbl.h 2018-10-20 04:58:14.000000000 +0000
78865d5b 7489@@ -0,0 +1,48 @@
2380c486
JR
7490+#ifndef _VS_COWBL_H
7491+#define _VS_COWBL_H
7492+
7493+#include <linux/fs.h>
7494+#include <linux/dcache.h>
7495+#include <linux/namei.h>
78865d5b 7496+#include <linux/slab.h>
2380c486
JR
7497+
7498+extern struct dentry *cow_break_link(const char *pathname);
7499+
7500+static inline int cow_check_and_break(struct path *path)
7501+{
7502+ struct inode *inode = path->dentry->d_inode;
7503+ int error = 0;
7504+
7505+ /* do we need this check? */
7506+ if (IS_RDONLY(inode))
7507+ return -EROFS;
7508+
7509+ if (IS_COW(inode)) {
7510+ if (IS_COW_LINK(inode)) {
7511+ struct dentry *new_dentry, *old_dentry = path->dentry;
7512+ char *pp, *buf;
7513+
7514+ buf = kmalloc(PATH_MAX, GFP_KERNEL);
7515+ if (!buf) {
7516+ return -ENOMEM;
7517+ }
7518+ pp = d_path(path, buf, PATH_MAX);
7519+ new_dentry = cow_break_link(pp);
7520+ kfree(buf);
7521+ if (!IS_ERR(new_dentry)) {
7522+ path->dentry = new_dentry;
7523+ dput(old_dentry);
7524+ } else
7525+ error = PTR_ERR(new_dentry);
7526+ } else {
7527+ inode->i_flags &= ~(S_IXUNLINK | S_IMMUTABLE);
7528+ inode->i_ctime = CURRENT_TIME;
7529+ mark_inode_dirty(inode);
7530+ }
7531+ }
7532+ return error;
7533+}
7534+
7535+#else
7536+#warning duplicate inclusion
7537+#endif
09a55596
AM
7538diff -NurpP --minimal linux-4.9.135/include/linux/vs_cvirt.h linux-4.9.135-vs2.3.9.8/include/linux/vs_cvirt.h
7539--- linux-4.9.135/include/linux/vs_cvirt.h 1970-01-01 00:00:00.000000000 +0000
7540+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_cvirt.h 2018-10-20 04:58:14.000000000 +0000
2380c486
JR
7541@@ -0,0 +1,50 @@
7542+#ifndef _VS_CVIRT_H
7543+#define _VS_CVIRT_H
7544+
7545+#include "vserver/cvirt.h"
7546+#include "vserver/context.h"
7547+#include "vserver/base.h"
7548+#include "vserver/check.h"
7549+#include "vserver/debug.h"
7550+
7551+
7552+static inline void vx_activate_task(struct task_struct *p)
7553+{
7554+ struct vx_info *vxi;
7555+
7556+ if ((vxi = p->vx_info)) {
7557+ vx_update_load(vxi);
7558+ atomic_inc(&vxi->cvirt.nr_running);
7559+ }
7560+}
7561+
7562+static inline void vx_deactivate_task(struct task_struct *p)
7563+{
7564+ struct vx_info *vxi;
7565+
7566+ if ((vxi = p->vx_info)) {
7567+ vx_update_load(vxi);
7568+ atomic_dec(&vxi->cvirt.nr_running);
7569+ }
7570+}
7571+
7572+static inline void vx_uninterruptible_inc(struct task_struct *p)
7573+{
7574+ struct vx_info *vxi;
7575+
7576+ if ((vxi = p->vx_info))
7577+ atomic_inc(&vxi->cvirt.nr_uninterruptible);
7578+}
7579+
7580+static inline void vx_uninterruptible_dec(struct task_struct *p)
7581+{
7582+ struct vx_info *vxi;
7583+
7584+ if ((vxi = p->vx_info))
7585+ atomic_dec(&vxi->cvirt.nr_uninterruptible);
7586+}
7587+
7588+
7589+#else
7590+#warning duplicate inclusion
7591+#endif
09a55596
AM
7592diff -NurpP --minimal linux-4.9.135/include/linux/vs_device.h linux-4.9.135-vs2.3.9.8/include/linux/vs_device.h
7593--- linux-4.9.135/include/linux/vs_device.h 1970-01-01 00:00:00.000000000 +0000
7594+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_device.h 2018-10-20 04:58:14.000000000 +0000
2380c486
JR
7595@@ -0,0 +1,45 @@
7596+#ifndef _VS_DEVICE_H
7597+#define _VS_DEVICE_H
7598+
7599+#include "vserver/base.h"
7600+#include "vserver/device.h"
7601+#include "vserver/debug.h"
7602+
7603+
7604+#ifdef CONFIG_VSERVER_DEVICE
7605+
7606+int vs_map_device(struct vx_info *, dev_t, dev_t *, umode_t);
7607+
7608+#define vs_device_perm(v, d, m, p) \
7609+ ((vs_map_device(current_vx_info(), d, NULL, m) & (p)) == (p))
7610+
7611+#else
7612+
7613+static inline
7614+int vs_map_device(struct vx_info *vxi,
7615+ dev_t device, dev_t *target, umode_t mode)
7616+{
7617+ if (target)
7618+ *target = device;
7619+ return ~0;
7620+}
7621+
7622+#define vs_device_perm(v, d, m, p) ((p) == (p))
7623+
7624+#endif
7625+
7626+
7627+#define vs_map_chrdev(d, t, p) \
7628+ ((vs_map_device(current_vx_info(), d, t, S_IFCHR) & (p)) == (p))
7629+#define vs_map_blkdev(d, t, p) \
7630+ ((vs_map_device(current_vx_info(), d, t, S_IFBLK) & (p)) == (p))
7631+
7632+#define vs_chrdev_perm(d, p) \
7633+ vs_device_perm(current_vx_info(), d, S_IFCHR, p)
7634+#define vs_blkdev_perm(d, p) \
7635+ vs_device_perm(current_vx_info(), d, S_IFBLK, p)
7636+
7637+
7638+#else
7639+#warning duplicate inclusion
7640+#endif
09a55596
AM
7641diff -NurpP --minimal linux-4.9.135/include/linux/vs_dlimit.h linux-4.9.135-vs2.3.9.8/include/linux/vs_dlimit.h
7642--- linux-4.9.135/include/linux/vs_dlimit.h 1970-01-01 00:00:00.000000000 +0000
7643+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_dlimit.h 2018-10-20 04:58:14.000000000 +0000
2c8c5bc5 7644@@ -0,0 +1,215 @@
2380c486
JR
7645+#ifndef _VS_DLIMIT_H
7646+#define _VS_DLIMIT_H
7647+
7648+#include <linux/fs.h>
7649+
7650+#include "vserver/dlimit.h"
7651+#include "vserver/base.h"
7652+#include "vserver/debug.h"
7653+
7654+
7655+#define get_dl_info(i) __get_dl_info(i, __FILE__, __LINE__)
7656+
7657+static inline struct dl_info *__get_dl_info(struct dl_info *dli,
7658+ const char *_file, int _line)
7659+{
7660+ if (!dli)
7661+ return NULL;
7662+ vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])",
7663+ dli, dli ? dli->dl_tag : 0,
7664+ dli ? atomic_read(&dli->dl_usecnt) : 0,
7665+ _file, _line);
7666+ atomic_inc(&dli->dl_usecnt);
7667+ return dli;
7668+}
7669+
7670+
7671+#define free_dl_info(i) \
7672+ call_rcu(&(i)->dl_rcu, rcu_free_dl_info)
7673+
7674+#define put_dl_info(i) __put_dl_info(i, __FILE__, __LINE__)
7675+
7676+static inline void __put_dl_info(struct dl_info *dli,
7677+ const char *_file, int _line)
7678+{
7679+ if (!dli)
7680+ return;
7681+ vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])",
7682+ dli, dli ? dli->dl_tag : 0,
7683+ dli ? atomic_read(&dli->dl_usecnt) : 0,
7684+ _file, _line);
7685+ if (atomic_dec_and_test(&dli->dl_usecnt))
7686+ free_dl_info(dli);
7687+}
7688+
7689+
7690+#define __dlimit_char(d) ((d) ? '*' : ' ')
7691+
7692+static inline int __dl_alloc_space(struct super_block *sb,
61333608 7693+ vtag_t tag, dlsize_t nr, const char *file, int line)
2380c486
JR
7694+{
7695+ struct dl_info *dli = NULL;
7696+ int ret = 0;
7697+
7698+ if (nr == 0)
7699+ goto out;
7700+ dli = locate_dl_info(sb, tag);
7701+ if (!dli)
7702+ goto out;
7703+
7704+ spin_lock(&dli->dl_lock);
7705+ ret = (dli->dl_space_used + nr > dli->dl_space_total);
7706+ if (!ret)
7707+ dli->dl_space_used += nr;
7708+ spin_unlock(&dli->dl_lock);
7709+ put_dl_info(dli);
7710+out:
7711+ vxlprintk(VXD_CBIT(dlim, 1),
7712+ "ALLOC (%p,#%d)%c %lld bytes (%d)",
7713+ sb, tag, __dlimit_char(dli), (long long)nr,
7714+ ret, file, line);
76514441 7715+ return ret ? -ENOSPC : 0;
2380c486
JR
7716+}
7717+
7718+static inline void __dl_free_space(struct super_block *sb,
61333608 7719+ vtag_t tag, dlsize_t nr, const char *_file, int _line)
2380c486
JR
7720+{
7721+ struct dl_info *dli = NULL;
7722+
7723+ if (nr == 0)
7724+ goto out;
7725+ dli = locate_dl_info(sb, tag);
7726+ if (!dli)
7727+ goto out;
7728+
7729+ spin_lock(&dli->dl_lock);
7730+ if (dli->dl_space_used > nr)
7731+ dli->dl_space_used -= nr;
7732+ else
7733+ dli->dl_space_used = 0;
7734+ spin_unlock(&dli->dl_lock);
7735+ put_dl_info(dli);
7736+out:
7737+ vxlprintk(VXD_CBIT(dlim, 1),
7738+ "FREE (%p,#%d)%c %lld bytes",
7739+ sb, tag, __dlimit_char(dli), (long long)nr,
7740+ _file, _line);
7741+}
7742+
7743+static inline int __dl_alloc_inode(struct super_block *sb,
61333608 7744+ vtag_t tag, const char *_file, int _line)
2380c486
JR
7745+{
7746+ struct dl_info *dli;
7747+ int ret = 0;
d337f35e 7748+
2380c486
JR
7749+ dli = locate_dl_info(sb, tag);
7750+ if (!dli)
7751+ goto out;
d337f35e 7752+
2380c486 7753+ spin_lock(&dli->dl_lock);
2c8c5bc5
AM
7754+ dli->dl_inodes_used++;
7755+ ret = (dli->dl_inodes_used > dli->dl_inodes_total);
2380c486
JR
7756+ spin_unlock(&dli->dl_lock);
7757+ put_dl_info(dli);
7758+out:
7759+ vxlprintk(VXD_CBIT(dlim, 0),
7760+ "ALLOC (%p,#%d)%c inode (%d)",
7761+ sb, tag, __dlimit_char(dli), ret, _file, _line);
76514441 7762+ return ret ? -ENOSPC : 0;
2380c486 7763+}
d337f35e 7764+
2380c486 7765+static inline void __dl_free_inode(struct super_block *sb,
61333608 7766+ vtag_t tag, const char *_file, int _line)
d337f35e 7767+{
2380c486
JR
7768+ struct dl_info *dli;
7769+
7770+ dli = locate_dl_info(sb, tag);
7771+ if (!dli)
7772+ goto out;
7773+
7774+ spin_lock(&dli->dl_lock);
7775+ if (dli->dl_inodes_used > 1)
7776+ dli->dl_inodes_used--;
7777+ else
7778+ dli->dl_inodes_used = 0;
7779+ spin_unlock(&dli->dl_lock);
7780+ put_dl_info(dli);
7781+out:
7782+ vxlprintk(VXD_CBIT(dlim, 0),
7783+ "FREE (%p,#%d)%c inode",
7784+ sb, tag, __dlimit_char(dli), _file, _line);
d337f35e
JR
7785+}
7786+
61333608 7787+static inline void __dl_adjust_block(struct super_block *sb, vtag_t tag,
2380c486
JR
7788+ unsigned long long *free_blocks, unsigned long long *root_blocks,
7789+ const char *_file, int _line)
d337f35e 7790+{
2380c486
JR
7791+ struct dl_info *dli;
7792+ uint64_t broot, bfree;
7793+
7794+ dli = locate_dl_info(sb, tag);
7795+ if (!dli)
7796+ return;
7797+
7798+ spin_lock(&dli->dl_lock);
7799+ broot = (dli->dl_space_total -
7800+ (dli->dl_space_total >> 10) * dli->dl_nrlmult)
7801+ >> sb->s_blocksize_bits;
7802+ bfree = (dli->dl_space_total - dli->dl_space_used)
7803+ >> sb->s_blocksize_bits;
7804+ spin_unlock(&dli->dl_lock);
7805+
7806+ vxlprintk(VXD_CBIT(dlim, 2),
7807+ "ADJUST: %lld,%lld on %lld,%lld [mult=%d]",
7808+ (long long)bfree, (long long)broot,
7809+ *free_blocks, *root_blocks, dli->dl_nrlmult,
7810+ _file, _line);
7811+ if (free_blocks) {
7812+ if (*free_blocks > bfree)
7813+ *free_blocks = bfree;
7814+ }
7815+ if (root_blocks) {
7816+ if (*root_blocks > broot)
7817+ *root_blocks = broot;
7818+ }
7819+ put_dl_info(dli);
d337f35e
JR
7820+}
7821+
e22b5178 7822+#define dl_prealloc_space(in, bytes) \
537831f9 7823+ __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7824+ __FILE__, __LINE__ )
d337f35e 7825+
e22b5178 7826+#define dl_alloc_space(in, bytes) \
537831f9 7827+ __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7828+ __FILE__, __LINE__ )
d337f35e 7829+
e22b5178 7830+#define dl_reserve_space(in, bytes) \
537831f9 7831+ __dl_alloc_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7832+ __FILE__, __LINE__ )
d337f35e 7833+
e22b5178
AM
7834+#define dl_claim_space(in, bytes) (0)
7835+
7836+#define dl_release_space(in, bytes) \
537831f9 7837+ __dl_free_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
2380c486 7838+ __FILE__, __LINE__ )
d337f35e 7839+
e22b5178 7840+#define dl_free_space(in, bytes) \
537831f9 7841+ __dl_free_space((in)->i_sb, i_tag_read(in), (dlsize_t)(bytes), \
e22b5178
AM
7842+ __FILE__, __LINE__ )
7843+
7844+
d337f35e 7845+
e22b5178 7846+#define dl_alloc_inode(in) \
537831f9 7847+ __dl_alloc_inode((in)->i_sb, i_tag_read(in), __FILE__, __LINE__ )
d337f35e 7848+
e22b5178 7849+#define dl_free_inode(in) \
537831f9 7850+ __dl_free_inode((in)->i_sb, i_tag_read(in), __FILE__, __LINE__ )
d337f35e 7851+
d337f35e 7852+
e22b5178 7853+#define dl_adjust_block(sb, tag, fb, rb) \
2380c486 7854+ __dl_adjust_block(sb, tag, fb, rb, __FILE__, __LINE__ )
d337f35e 7855+
d337f35e 7856+
2380c486
JR
7857+#else
7858+#warning duplicate inclusion
7859+#endif
09a55596
AM
7860diff -NurpP --minimal linux-4.9.135/include/linux/vs_inet.h linux-4.9.135-vs2.3.9.8/include/linux/vs_inet.h
7861--- linux-4.9.135/include/linux/vs_inet.h 1970-01-01 00:00:00.000000000 +0000
7862+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_inet.h 2018-10-20 04:58:14.000000000 +0000
5cb1760b 7863@@ -0,0 +1,364 @@
d33d7b00
AM
7864+#ifndef _VS_INET_H
7865+#define _VS_INET_H
d337f35e 7866+
d33d7b00
AM
7867+#include "vserver/base.h"
7868+#include "vserver/network.h"
7869+#include "vserver/debug.h"
d337f35e 7870+
d33d7b00 7871+#define IPI_LOOPBACK htonl(INADDR_LOOPBACK)
d337f35e 7872+
d33d7b00
AM
7873+#define NXAV4(a) NIPQUAD((a)->ip[0]), NIPQUAD((a)->ip[1]), \
7874+ NIPQUAD((a)->mask), (a)->type
7875+#define NXAV4_FMT "[" NIPQUAD_FMT "-" NIPQUAD_FMT "/" NIPQUAD_FMT ":%04x]"
d337f35e 7876+
d33d7b00
AM
7877+#define NIPQUAD(addr) \
7878+ ((unsigned char *)&addr)[0], \
7879+ ((unsigned char *)&addr)[1], \
7880+ ((unsigned char *)&addr)[2], \
7881+ ((unsigned char *)&addr)[3]
d337f35e 7882+
d33d7b00 7883+#define NIPQUAD_FMT "%u.%u.%u.%u"
d337f35e 7884+
d337f35e 7885+
d33d7b00
AM
7886+static inline
7887+int v4_addr_match(struct nx_addr_v4 *nxa, __be32 addr, uint16_t tmask)
7888+{
7889+ __be32 ip = nxa->ip[0].s_addr;
7890+ __be32 mask = nxa->mask.s_addr;
7891+ __be32 bcast = ip | ~mask;
7892+ int ret = 0;
d337f35e 7893+
d33d7b00
AM
7894+ switch (nxa->type & tmask) {
7895+ case NXA_TYPE_MASK:
7896+ ret = (ip == (addr & mask));
7897+ break;
7898+ case NXA_TYPE_ADDR:
7899+ ret = 3;
7900+ if (addr == ip)
7901+ break;
7902+ /* fall through to broadcast */
7903+ case NXA_MOD_BCAST:
7904+ ret = ((tmask & NXA_MOD_BCAST) && (addr == bcast));
7905+ break;
7906+ case NXA_TYPE_RANGE:
7907+ ret = ((nxa->ip[0].s_addr <= addr) &&
7908+ (nxa->ip[1].s_addr > addr));
7909+ break;
7910+ case NXA_TYPE_ANY:
7911+ ret = 2;
7912+ break;
7913+ }
d337f35e 7914+
d33d7b00
AM
7915+ vxdprintk(VXD_CBIT(net, 0),
7916+ "v4_addr_match(%p" NXAV4_FMT "," NIPQUAD_FMT ",%04x) = %d",
7917+ nxa, NXAV4(nxa), NIPQUAD(addr), tmask, ret);
7918+ return ret;
7919+}
d337f35e 7920+
d33d7b00
AM
7921+static inline
7922+int v4_addr_in_nx_info(struct nx_info *nxi, __be32 addr, uint16_t tmask)
7923+{
7924+ struct nx_addr_v4 *nxa;
7a9e40b8 7925+ unsigned long irqflags;
d33d7b00 7926+ int ret = 1;
d337f35e 7927+
d33d7b00
AM
7928+ if (!nxi)
7929+ goto out;
d337f35e 7930+
d33d7b00
AM
7931+ ret = 2;
7932+ /* allow 127.0.0.1 when remapping lback */
7933+ if ((tmask & NXA_LOOPBACK) &&
7934+ (addr == IPI_LOOPBACK) &&
7935+ nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
7936+ goto out;
7937+ ret = 3;
7938+ /* check for lback address */
7939+ if ((tmask & NXA_MOD_LBACK) &&
7940+ (nxi->v4_lback.s_addr == addr))
7941+ goto out;
7942+ ret = 4;
7943+ /* check for broadcast address */
7944+ if ((tmask & NXA_MOD_BCAST) &&
7945+ (nxi->v4_bcast.s_addr == addr))
7946+ goto out;
7947+ ret = 5;
4bf69007 7948+
d33d7b00 7949+ /* check for v4 addresses */
7a9e40b8 7950+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
d33d7b00
AM
7951+ for (nxa = &nxi->v4; nxa; nxa = nxa->next)
7952+ if (v4_addr_match(nxa, addr, tmask))
4bf69007 7953+ goto out_unlock;
d33d7b00 7954+ ret = 0;
4bf69007 7955+out_unlock:
7a9e40b8 7956+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
d33d7b00
AM
7957+out:
7958+ vxdprintk(VXD_CBIT(net, 0),
7959+ "v4_addr_in_nx_info(%p[#%u]," NIPQUAD_FMT ",%04x) = %d",
7960+ nxi, nxi ? nxi->nx_id : 0, NIPQUAD(addr), tmask, ret);
7961+ return ret;
7962+}
d337f35e 7963+
d33d7b00
AM
7964+static inline
7965+int v4_nx_addr_match(struct nx_addr_v4 *nxa, struct nx_addr_v4 *addr, uint16_t mask)
7966+{
7967+ /* FIXME: needs full range checks */
7968+ return v4_addr_match(nxa, addr->ip[0].s_addr, mask);
7969+}
d337f35e 7970+
d33d7b00
AM
7971+static inline
7972+int v4_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v4 *nxa, uint16_t mask)
7973+{
7974+ struct nx_addr_v4 *ptr;
7a9e40b8 7975+ unsigned long irqflags;
4bf69007 7976+ int ret = 1;
d337f35e 7977+
7a9e40b8 7978+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
d33d7b00
AM
7979+ for (ptr = &nxi->v4; ptr; ptr = ptr->next)
7980+ if (v4_nx_addr_match(ptr, nxa, mask))
4bf69007
AM
7981+ goto out_unlock;
7982+ ret = 0;
7983+out_unlock:
7a9e40b8 7984+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007 7985+ return ret;
d33d7b00 7986+}
d337f35e 7987+
d33d7b00 7988+#include <net/inet_sock.h>
d337f35e 7989+
d33d7b00
AM
7990+/*
7991+ * Check if a given address matches for a socket
7992+ *
7993+ * nxi: the socket's nx_info if any
7994+ * addr: to be verified address
7995+ */
7996+static inline
7997+int v4_sock_addr_match (
7998+ struct nx_info *nxi,
7999+ struct inet_sock *inet,
8000+ __be32 addr)
8001+{
8002+ __be32 saddr = inet->inet_rcv_saddr;
8003+ __be32 bcast = nxi ? nxi->v4_bcast.s_addr : INADDR_BROADCAST;
d337f35e 8004+
d33d7b00
AM
8005+ if (addr && (saddr == addr || bcast == addr))
8006+ return 1;
8007+ if (!saddr)
8008+ return v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND);
8009+ return 0;
8010+}
d337f35e 8011+
d337f35e 8012+
d33d7b00 8013+/* inet related checks and helpers */
d337f35e
JR
8014+
8015+
d33d7b00
AM
8016+struct in_ifaddr;
8017+struct net_device;
8018+struct sock;
d337f35e 8019+
d33d7b00 8020+#ifdef CONFIG_INET
d337f35e 8021+
d33d7b00
AM
8022+#include <linux/netdevice.h>
8023+#include <linux/inetdevice.h>
8024+#include <net/inet_sock.h>
8025+#include <net/inet_timewait_sock.h>
d337f35e 8026+
d337f35e 8027+
d33d7b00
AM
8028+int dev_in_nx_info(struct net_device *, struct nx_info *);
8029+int v4_dev_in_nx_info(struct net_device *, struct nx_info *);
8030+int nx_v4_addr_conflict(struct nx_info *, struct nx_info *);
d337f35e 8031+
d337f35e 8032+
d33d7b00
AM
8033+/*
8034+ * check if address is covered by socket
8035+ *
8036+ * sk: the socket to check against
8037+ * addr: the address in question (must be != 0)
8038+ */
d337f35e 8039+
d33d7b00
AM
8040+static inline
8041+int __v4_addr_match_socket(const struct sock *sk, struct nx_addr_v4 *nxa)
8042+{
8043+ struct nx_info *nxi = sk->sk_nx_info;
c2e5f7c8 8044+ __be32 saddr = sk->sk_rcv_saddr;
d337f35e 8045+
d33d7b00
AM
8046+ vxdprintk(VXD_CBIT(net, 5),
8047+ "__v4_addr_in_socket(%p," NXAV4_FMT ") %p:" NIPQUAD_FMT " %p;%lx",
8048+ sk, NXAV4(nxa), nxi, NIPQUAD(saddr), sk->sk_socket,
8049+ (sk->sk_socket?sk->sk_socket->flags:0));
d337f35e 8050+
d33d7b00
AM
8051+ if (saddr) { /* direct address match */
8052+ return v4_addr_match(nxa, saddr, -1);
8053+ } else if (nxi) { /* match against nx_info */
8054+ return v4_nx_addr_in_nx_info(nxi, nxa, -1);
8055+ } else { /* unrestricted any socket */
8056+ return 1;
8057+ }
8058+}
d337f35e
JR
8059+
8060+
d337f35e 8061+
d33d7b00
AM
8062+static inline
8063+int nx_dev_visible(struct nx_info *nxi, struct net_device *dev)
8064+{
8065+ vxdprintk(VXD_CBIT(net, 1),
8066+ "nx_dev_visible(%p[#%u],%p " VS_Q("%s") ") %d",
8067+ nxi, nxi ? nxi->nx_id : 0, dev, dev->name,
8068+ nxi ? dev_in_nx_info(dev, nxi) : 0);
d337f35e 8069+
d33d7b00
AM
8070+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8071+ return 1;
8072+ if (dev_in_nx_info(dev, nxi))
8073+ return 1;
8074+ return 0;
8075+}
d337f35e
JR
8076+
8077+
d33d7b00
AM
8078+static inline
8079+int v4_ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
8080+{
8081+ if (!nxi)
8082+ return 1;
8083+ if (!ifa)
8084+ return 0;
8085+ return v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW);
8086+}
d337f35e 8087+
d33d7b00
AM
8088+static inline
8089+int nx_v4_ifa_visible(struct nx_info *nxi, struct in_ifaddr *ifa)
8090+{
8091+ vxdprintk(VXD_CBIT(net, 1), "nx_v4_ifa_visible(%p[#%u],%p) %d",
8092+ nxi, nxi ? nxi->nx_id : 0, ifa,
8093+ nxi ? v4_ifa_in_nx_info(ifa, nxi) : 0);
d337f35e 8094+
d33d7b00
AM
8095+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8096+ return 1;
8097+ if (v4_ifa_in_nx_info(ifa, nxi))
8098+ return 1;
8099+ return 0;
8100+}
d337f35e 8101+
d337f35e 8102+
d33d7b00
AM
8103+struct nx_v4_sock_addr {
8104+ __be32 saddr; /* Address used for validation */
8105+ __be32 baddr; /* Address used for socket bind */
8106+};
d337f35e 8107+
d33d7b00
AM
8108+static inline
8109+int v4_map_sock_addr(struct inet_sock *inet, struct sockaddr_in *addr,
8110+ struct nx_v4_sock_addr *nsa)
8111+{
8112+ struct sock *sk = &inet->sk;
8113+ struct nx_info *nxi = sk->sk_nx_info;
8114+ __be32 saddr = addr->sin_addr.s_addr;
8115+ __be32 baddr = saddr;
d337f35e 8116+
d33d7b00
AM
8117+ vxdprintk(VXD_CBIT(net, 3),
8118+ "inet_bind(%p)* %p,%p;%lx " NIPQUAD_FMT,
8119+ sk, sk->sk_nx_info, sk->sk_socket,
8120+ (sk->sk_socket ? sk->sk_socket->flags : 0),
8121+ NIPQUAD(saddr));
d337f35e 8122+
d33d7b00
AM
8123+ if (nxi) {
8124+ if (saddr == INADDR_ANY) {
8125+ if (nx_info_flags(nxi, NXF_SINGLE_IP, 0))
8126+ baddr = nxi->v4.ip[0].s_addr;
8127+ } else if (saddr == IPI_LOOPBACK) {
8128+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
8129+ baddr = nxi->v4_lback.s_addr;
9795bf04
AM
8130+ } else if (!ipv4_is_multicast(saddr) ||
8131+ !nx_info_ncaps(nxi, NXC_MULTICAST)) {
8132+ /* normal address bind */
d33d7b00
AM
8133+ if (!v4_addr_in_nx_info(nxi, saddr, NXA_MASK_BIND))
8134+ return -EADDRNOTAVAIL;
8135+ }
8136+ }
d337f35e 8137+
d33d7b00
AM
8138+ vxdprintk(VXD_CBIT(net, 3),
8139+ "inet_bind(%p) " NIPQUAD_FMT ", " NIPQUAD_FMT,
8140+ sk, NIPQUAD(saddr), NIPQUAD(baddr));
d337f35e 8141+
d33d7b00
AM
8142+ nsa->saddr = saddr;
8143+ nsa->baddr = baddr;
8144+ return 0;
8145+}
d337f35e 8146+
d33d7b00
AM
8147+static inline
8148+void v4_set_sock_addr(struct inet_sock *inet, struct nx_v4_sock_addr *nsa)
8149+{
8150+ inet->inet_saddr = nsa->baddr;
8151+ inet->inet_rcv_saddr = nsa->baddr;
8152+}
d337f35e 8153+
d337f35e 8154+
d33d7b00
AM
8155+/*
8156+ * helper to simplify inet_lookup_listener
8157+ *
8158+ * nxi: the socket's nx_info if any
8159+ * addr: to be verified address
8160+ * saddr: socket address
8161+ */
8162+static inline int v4_inet_addr_match (
8163+ struct nx_info *nxi,
8164+ __be32 addr,
8165+ __be32 saddr)
8166+{
8167+ if (addr && (saddr == addr))
8168+ return 1;
8169+ if (!saddr)
8170+ return nxi ? v4_addr_in_nx_info(nxi, addr, NXA_MASK_BIND) : 1;
8171+ return 0;
8172+}
d337f35e 8173+
d33d7b00
AM
8174+static inline __be32 nx_map_sock_lback(struct nx_info *nxi, __be32 addr)
8175+{
8176+ if (nx_info_flags(nxi, NXF_HIDE_LBACK, 0) &&
8177+ (addr == nxi->v4_lback.s_addr))
8178+ return IPI_LOOPBACK;
8179+ return addr;
8180+}
d337f35e 8181+
d33d7b00
AM
8182+static inline
8183+int nx_info_has_v4(struct nx_info *nxi)
8184+{
8185+ if (!nxi)
8186+ return 1;
8187+ if (NX_IPV4(nxi))
8188+ return 1;
8189+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0))
8190+ return 1;
8191+ return 0;
8192+}
d337f35e 8193+
d33d7b00 8194+#else /* CONFIG_INET */
d337f35e 8195+
d33d7b00
AM
8196+static inline
8197+int nx_dev_visible(struct nx_info *n, struct net_device *d)
8198+{
8199+ return 1;
8200+}
d337f35e 8201+
d33d7b00
AM
8202+static inline
8203+int nx_v4_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
8204+{
8205+ return 1;
8206+}
d337f35e 8207+
d33d7b00
AM
8208+static inline
8209+int v4_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
8210+{
8211+ return 1;
8212+}
d337f35e 8213+
d33d7b00
AM
8214+static inline
8215+int nx_info_has_v4(struct nx_info *nxi)
8216+{
8217+ return 0;
8218+}
d337f35e 8219+
d33d7b00 8220+#endif /* CONFIG_INET */
d337f35e 8221+
d33d7b00
AM
8222+#define current_nx_info_has_v4() \
8223+ nx_info_has_v4(current_nx_info())
d337f35e 8224+
d33d7b00
AM
8225+#else
8226+// #warning duplicate inclusion
3bac966d 8227+#endif
09a55596
AM
8228diff -NurpP --minimal linux-4.9.135/include/linux/vs_inet6.h linux-4.9.135-vs2.3.9.8/include/linux/vs_inet6.h
8229--- linux-4.9.135/include/linux/vs_inet6.h 1970-01-01 00:00:00.000000000 +0000
8230+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_inet6.h 2018-10-20 04:58:14.000000000 +0000
369dbd59 8231@@ -0,0 +1,264 @@
d33d7b00
AM
8232+#ifndef _VS_INET6_H
8233+#define _VS_INET6_H
4a036bed 8234+
d33d7b00
AM
8235+#include "vserver/base.h"
8236+#include "vserver/network.h"
8237+#include "vserver/debug.h"
d337f35e 8238+
d33d7b00 8239+#include <net/ipv6.h>
d337f35e 8240+
d33d7b00
AM
8241+#define NXAV6(a) &(a)->ip, &(a)->mask, (a)->prefix, (a)->type
8242+#define NXAV6_FMT "[%pI6/%pI6/%d:%04x]"
7e46296a 8243+
7e46296a 8244+
d33d7b00 8245+#ifdef CONFIG_IPV6
7e46296a 8246+
d33d7b00
AM
8247+static inline
8248+int v6_addr_match(struct nx_addr_v6 *nxa,
8249+ const struct in6_addr *addr, uint16_t mask)
8250+{
8251+ int ret = 0;
7e46296a 8252+
d33d7b00
AM
8253+ switch (nxa->type & mask) {
8254+ case NXA_TYPE_MASK:
8255+ ret = ipv6_masked_addr_cmp(&nxa->ip, &nxa->mask, addr);
8256+ break;
8257+ case NXA_TYPE_ADDR:
8258+ ret = ipv6_addr_equal(&nxa->ip, addr);
8259+ break;
8260+ case NXA_TYPE_ANY:
8261+ ret = 1;
8262+ break;
8263+ }
8264+ vxdprintk(VXD_CBIT(net, 0),
8265+ "v6_addr_match(%p" NXAV6_FMT ",%pI6,%04x) = %d",
8266+ nxa, NXAV6(nxa), addr, mask, ret);
8267+ return ret;
8268+}
7e46296a 8269+
d33d7b00
AM
8270+static inline
8271+int v6_addr_in_nx_info(struct nx_info *nxi,
8272+ const struct in6_addr *addr, uint16_t mask)
8273+{
8274+ struct nx_addr_v6 *nxa;
7a9e40b8 8275+ unsigned long irqflags;
d33d7b00 8276+ int ret = 1;
d337f35e 8277+
d33d7b00
AM
8278+ if (!nxi)
8279+ goto out;
4bf69007 8280+
7a9e40b8 8281+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
d33d7b00
AM
8282+ for (nxa = &nxi->v6; nxa; nxa = nxa->next)
8283+ if (v6_addr_match(nxa, addr, mask))
4bf69007 8284+ goto out_unlock;
d33d7b00 8285+ ret = 0;
4bf69007 8286+out_unlock:
7a9e40b8 8287+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
d33d7b00
AM
8288+out:
8289+ vxdprintk(VXD_CBIT(net, 0),
8290+ "v6_addr_in_nx_info(%p[#%u],%pI6,%04x) = %d",
8291+ nxi, nxi ? nxi->nx_id : 0, addr, mask, ret);
8292+ return ret;
8293+}
d337f35e 8294+
d33d7b00
AM
8295+static inline
8296+int v6_nx_addr_match(struct nx_addr_v6 *nxa, struct nx_addr_v6 *addr, uint16_t mask)
8297+{
8298+ /* FIXME: needs full range checks */
8299+ return v6_addr_match(nxa, &addr->ip, mask);
8300+}
d337f35e 8301+
d33d7b00
AM
8302+static inline
8303+int v6_nx_addr_in_nx_info(struct nx_info *nxi, struct nx_addr_v6 *nxa, uint16_t mask)
8304+{
8305+ struct nx_addr_v6 *ptr;
7a9e40b8 8306+ unsigned long irqflags;
4bf69007 8307+ int ret = 1;
d337f35e 8308+
7a9e40b8 8309+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
d33d7b00
AM
8310+ for (ptr = &nxi->v6; ptr; ptr = ptr->next)
8311+ if (v6_nx_addr_match(ptr, nxa, mask))
4bf69007
AM
8312+ goto out_unlock;
8313+ ret = 0;
8314+out_unlock:
7a9e40b8 8315+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007 8316+ return ret;
d33d7b00 8317+}
d337f35e 8318+
d337f35e 8319+
d33d7b00
AM
8320+/*
8321+ * Check if a given address matches for a socket
8322+ *
8323+ * nxi: the socket's nx_info if any
8324+ * addr: to be verified address
8325+ */
8326+static inline
8327+int v6_sock_addr_match (
8328+ struct nx_info *nxi,
8329+ struct inet_sock *inet,
8330+ struct in6_addr *addr)
8331+{
8332+ struct sock *sk = &inet->sk;
c2e5f7c8 8333+ const struct in6_addr *saddr = inet6_rcv_saddr(sk);
d337f35e 8334+
d33d7b00
AM
8335+ if (!ipv6_addr_any(addr) &&
8336+ ipv6_addr_equal(saddr, addr))
8337+ return 1;
8338+ if (ipv6_addr_any(saddr))
8339+ return v6_addr_in_nx_info(nxi, addr, -1);
8340+ return 0;
8341+}
d337f35e 8342+
d33d7b00
AM
8343+/*
8344+ * check if address is covered by socket
8345+ *
8346+ * sk: the socket to check against
8347+ * addr: the address in question (must be != 0)
8348+ */
d337f35e 8349+
d33d7b00
AM
8350+static inline
8351+int __v6_addr_match_socket(const struct sock *sk, struct nx_addr_v6 *nxa)
8352+{
8353+ struct nx_info *nxi = sk->sk_nx_info;
c2e5f7c8 8354+ const struct in6_addr *saddr = inet6_rcv_saddr(sk);
d337f35e 8355+
d33d7b00
AM
8356+ vxdprintk(VXD_CBIT(net, 5),
8357+ "__v6_addr_in_socket(%p," NXAV6_FMT ") %p:%pI6 %p;%lx",
8358+ sk, NXAV6(nxa), nxi, saddr, sk->sk_socket,
8359+ (sk->sk_socket?sk->sk_socket->flags:0));
d337f35e 8360+
d33d7b00
AM
8361+ if (!ipv6_addr_any(saddr)) { /* direct address match */
8362+ return v6_addr_match(nxa, saddr, -1);
8363+ } else if (nxi) { /* match against nx_info */
8364+ return v6_nx_addr_in_nx_info(nxi, nxa, -1);
8365+ } else { /* unrestricted any socket */
8366+ return 1;
8367+ }
8368+}
d337f35e 8369+
d337f35e 8370+
d33d7b00 8371+/* inet related checks and helpers */
d337f35e 8372+
d337f35e 8373+
d33d7b00
AM
8374+struct in_ifaddr;
8375+struct net_device;
8376+struct sock;
d337f35e
JR
8377+
8378+
d33d7b00
AM
8379+#include <linux/netdevice.h>
8380+#include <linux/inetdevice.h>
8381+#include <net/inet_timewait_sock.h>
d337f35e 8382+
d337f35e 8383+
d33d7b00
AM
8384+int dev_in_nx_info(struct net_device *, struct nx_info *);
8385+int v6_dev_in_nx_info(struct net_device *, struct nx_info *);
8386+int nx_v6_addr_conflict(struct nx_info *, struct nx_info *);
d337f35e
JR
8387+
8388+
3bac966d 8389+
d33d7b00
AM
8390+static inline
8391+int v6_ifa_in_nx_info(struct inet6_ifaddr *ifa, struct nx_info *nxi)
adc1caaa 8392+{
d33d7b00
AM
8393+ if (!nxi)
8394+ return 1;
8395+ if (!ifa)
8396+ return 0;
8397+ return v6_addr_in_nx_info(nxi, &ifa->addr, -1);
8398+}
d337f35e 8399+
d33d7b00
AM
8400+static inline
8401+int nx_v6_ifa_visible(struct nx_info *nxi, struct inet6_ifaddr *ifa)
8402+{
8403+ vxdprintk(VXD_CBIT(net, 1), "nx_v6_ifa_visible(%p[#%u],%p) %d",
8404+ nxi, nxi ? nxi->nx_id : 0, ifa,
8405+ nxi ? v6_ifa_in_nx_info(ifa, nxi) : 0);
d337f35e 8406+
d33d7b00
AM
8407+ if (!nx_info_flags(nxi, NXF_HIDE_NETIF, 0))
8408+ return 1;
8409+ if (v6_ifa_in_nx_info(ifa, nxi))
8410+ return 1;
8411+ return 0;
adc1caaa 8412+}
d337f35e 8413+
d337f35e 8414+
d33d7b00
AM
8415+struct nx_v6_sock_addr {
8416+ struct in6_addr saddr; /* Address used for validation */
8417+ struct in6_addr baddr; /* Address used for socket bind */
8418+};
8419+
8420+static inline
8421+int v6_map_sock_addr(struct inet_sock *inet, struct sockaddr_in6 *addr,
8422+ struct nx_v6_sock_addr *nsa)
8423+{
8424+ // struct sock *sk = &inet->sk;
8425+ // struct nx_info *nxi = sk->sk_nx_info;
8426+ struct in6_addr saddr = addr->sin6_addr;
8427+ struct in6_addr baddr = saddr;
3bac966d 8428+
d33d7b00
AM
8429+ nsa->saddr = saddr;
8430+ nsa->baddr = baddr;
8431+ return 0;
8432+}
3bac966d 8433+
d33d7b00
AM
8434+static inline
8435+void v6_set_sock_addr(struct inet_sock *inet, struct nx_v6_sock_addr *nsa)
8436+{
8437+ // struct sock *sk = &inet->sk;
8438+ // struct in6_addr *saddr = inet6_rcv_saddr(sk);
3bac966d 8439+
d33d7b00
AM
8440+ // *saddr = nsa->baddr;
8441+ // inet->inet_saddr = nsa->baddr;
8442+}
3bac966d 8443+
d33d7b00
AM
8444+static inline
8445+int nx_info_has_v6(struct nx_info *nxi)
8446+{
8447+ if (!nxi)
8448+ return 1;
8449+ if (NX_IPV6(nxi))
8450+ return 1;
8451+ return 0;
8452+}
3bac966d 8453+
d33d7b00 8454+#else /* CONFIG_IPV6 */
d337f35e 8455+
2380c486 8456+static inline
d33d7b00 8457+int nx_v6_dev_visible(struct nx_info *n, struct net_device *d)
2380c486 8458+{
d33d7b00 8459+ return 1;
d337f35e
JR
8460+}
8461+
3bac966d 8462+
adc1caaa 8463+static inline
d33d7b00 8464+int nx_v6_addr_conflict(struct nx_info *n, uint32_t a, const struct sock *s)
adc1caaa 8465+{
d33d7b00 8466+ return 1;
adc1caaa 8467+}
2380c486 8468+
d33d7b00
AM
8469+static inline
8470+int v6_ifa_in_nx_info(struct in_ifaddr *a, struct nx_info *n)
8471+{
8472+ return 1;
8473+}
8474+
8475+static inline
8476+int nx_info_has_v6(struct nx_info *nxi)
8477+{
8478+ return 0;
8479+}
2380c486 8480+
369dbd59
AM
8481+static inline
8482+int v6_addr_in_nx_info(struct nx_info *nxi,
8483+ const struct in6_addr *addr, uint16_t mask)
8484+{
8485+ return 0;
8486+}
8487+
d33d7b00 8488+#endif /* CONFIG_IPV6 */
d337f35e 8489+
d33d7b00
AM
8490+#define current_nx_info_has_v6() \
8491+ nx_info_has_v6(current_nx_info())
3bac966d 8492+
d337f35e 8493+#else
d33d7b00 8494+#warning duplicate inclusion
d337f35e 8495+#endif
09a55596
AM
8496diff -NurpP --minimal linux-4.9.135/include/linux/vs_limit.h linux-4.9.135-vs2.3.9.8/include/linux/vs_limit.h
8497--- linux-4.9.135/include/linux/vs_limit.h 1970-01-01 00:00:00.000000000 +0000
8498+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_limit.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00
AM
8499@@ -0,0 +1,140 @@
8500+#ifndef _VS_LIMIT_H
8501+#define _VS_LIMIT_H
d337f35e 8502+
d33d7b00
AM
8503+#include "vserver/limit.h"
8504+#include "vserver/base.h"
8505+#include "vserver/context.h"
8506+#include "vserver/debug.h"
8507+#include "vserver/context.h"
8508+#include "vserver/limit_int.h"
d337f35e
JR
8509+
8510+
d33d7b00
AM
8511+#define vx_acc_cres(v, d, p, r) \
8512+ __vx_acc_cres(v, r, d, p, __FILE__, __LINE__)
d337f35e 8513+
d33d7b00
AM
8514+#define vx_acc_cres_cond(x, d, p, r) \
8515+ __vx_acc_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
8516+ r, d, p, __FILE__, __LINE__)
d337f35e
JR
8517+
8518+
d33d7b00
AM
8519+#define vx_add_cres(v, a, p, r) \
8520+ __vx_add_cres(v, r, a, p, __FILE__, __LINE__)
8521+#define vx_sub_cres(v, a, p, r) vx_add_cres(v, -(a), p, r)
d337f35e 8522+
d33d7b00
AM
8523+#define vx_add_cres_cond(x, a, p, r) \
8524+ __vx_add_cres(((x) == vx_current_xid()) ? current_vx_info() : 0, \
8525+ r, a, p, __FILE__, __LINE__)
8526+#define vx_sub_cres_cond(x, a, p, r) vx_add_cres_cond(x, -(a), p, r)
d337f35e 8527+
d337f35e 8528+
d33d7b00 8529+/* process and file limits */
d337f35e 8530+
d33d7b00
AM
8531+#define vx_nproc_inc(p) \
8532+ vx_acc_cres((p)->vx_info, 1, p, RLIMIT_NPROC)
d337f35e 8533+
d33d7b00
AM
8534+#define vx_nproc_dec(p) \
8535+ vx_acc_cres((p)->vx_info,-1, p, RLIMIT_NPROC)
d337f35e 8536+
d33d7b00
AM
8537+#define vx_files_inc(f) \
8538+ vx_acc_cres_cond((f)->f_xid, 1, f, RLIMIT_NOFILE)
d337f35e 8539+
d33d7b00
AM
8540+#define vx_files_dec(f) \
8541+ vx_acc_cres_cond((f)->f_xid,-1, f, RLIMIT_NOFILE)
d337f35e 8542+
d33d7b00
AM
8543+#define vx_locks_inc(l) \
8544+ vx_acc_cres_cond((l)->fl_xid, 1, l, RLIMIT_LOCKS)
d337f35e 8545+
d33d7b00
AM
8546+#define vx_locks_dec(l) \
8547+ vx_acc_cres_cond((l)->fl_xid,-1, l, RLIMIT_LOCKS)
d337f35e 8548+
d33d7b00
AM
8549+#define vx_openfd_inc(f) \
8550+ vx_acc_cres(current_vx_info(), 1, (void *)(long)(f), VLIMIT_OPENFD)
d337f35e 8551+
d33d7b00
AM
8552+#define vx_openfd_dec(f) \
8553+ vx_acc_cres(current_vx_info(),-1, (void *)(long)(f), VLIMIT_OPENFD)
d337f35e 8554+
d337f35e 8555+
d33d7b00
AM
8556+#define vx_cres_avail(v, n, r) \
8557+ __vx_cres_avail(v, r, n, __FILE__, __LINE__)
d337f35e 8558+
d337f35e 8559+
d33d7b00
AM
8560+#define vx_nproc_avail(n) \
8561+ vx_cres_avail(current_vx_info(), n, RLIMIT_NPROC)
d337f35e 8562+
d33d7b00
AM
8563+#define vx_files_avail(n) \
8564+ vx_cres_avail(current_vx_info(), n, RLIMIT_NOFILE)
d337f35e 8565+
d33d7b00
AM
8566+#define vx_locks_avail(n) \
8567+ vx_cres_avail(current_vx_info(), n, RLIMIT_LOCKS)
d337f35e 8568+
d33d7b00
AM
8569+#define vx_openfd_avail(n) \
8570+ vx_cres_avail(current_vx_info(), n, VLIMIT_OPENFD)
d337f35e 8571+
d337f35e 8572+
d33d7b00 8573+/* dentry limits */
d337f35e 8574+
d33d7b00 8575+#define vx_dentry_inc(d) do { \
c2e5f7c8 8576+ if (d_count(d) == 1) \
d33d7b00
AM
8577+ vx_acc_cres(current_vx_info(), 1, d, VLIMIT_DENTRY); \
8578+ } while (0)
d337f35e 8579+
d33d7b00 8580+#define vx_dentry_dec(d) do { \
c2e5f7c8 8581+ if (d_count(d) == 0) \
d33d7b00
AM
8582+ vx_acc_cres(current_vx_info(),-1, d, VLIMIT_DENTRY); \
8583+ } while (0)
d337f35e 8584+
d33d7b00
AM
8585+#define vx_dentry_avail(n) \
8586+ vx_cres_avail(current_vx_info(), n, VLIMIT_DENTRY)
d337f35e 8587+
d337f35e 8588+
d33d7b00 8589+/* socket limits */
d337f35e 8590+
d33d7b00
AM
8591+#define vx_sock_inc(s) \
8592+ vx_acc_cres((s)->sk_vx_info, 1, s, VLIMIT_NSOCK)
d337f35e 8593+
d33d7b00
AM
8594+#define vx_sock_dec(s) \
8595+ vx_acc_cres((s)->sk_vx_info,-1, s, VLIMIT_NSOCK)
d337f35e 8596+
d33d7b00
AM
8597+#define vx_sock_avail(n) \
8598+ vx_cres_avail(current_vx_info(), n, VLIMIT_NSOCK)
d337f35e 8599+
d337f35e 8600+
d33d7b00 8601+/* ipc resource limits */
d337f35e 8602+
d33d7b00
AM
8603+#define vx_ipcmsg_add(v, u, a) \
8604+ vx_add_cres(v, a, u, RLIMIT_MSGQUEUE)
d337f35e 8605+
d33d7b00
AM
8606+#define vx_ipcmsg_sub(v, u, a) \
8607+ vx_sub_cres(v, a, u, RLIMIT_MSGQUEUE)
d337f35e 8608+
d33d7b00
AM
8609+#define vx_ipcmsg_avail(v, a) \
8610+ vx_cres_avail(v, a, RLIMIT_MSGQUEUE)
d337f35e 8611+
d337f35e 8612+
d33d7b00
AM
8613+#define vx_ipcshm_add(v, k, a) \
8614+ vx_add_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
d337f35e 8615+
d33d7b00
AM
8616+#define vx_ipcshm_sub(v, k, a) \
8617+ vx_sub_cres(v, a, (void *)(long)(k), VLIMIT_SHMEM)
d337f35e 8618+
d33d7b00
AM
8619+#define vx_ipcshm_avail(v, a) \
8620+ vx_cres_avail(v, a, VLIMIT_SHMEM)
d337f35e
JR
8621+
8622+
d33d7b00
AM
8623+#define vx_semary_inc(a) \
8624+ vx_acc_cres(current_vx_info(), 1, a, VLIMIT_SEMARY)
d337f35e 8625+
d33d7b00
AM
8626+#define vx_semary_dec(a) \
8627+ vx_acc_cres(current_vx_info(), -1, a, VLIMIT_SEMARY)
d337f35e 8628+
d337f35e 8629+
d33d7b00
AM
8630+#define vx_nsems_add(a,n) \
8631+ vx_add_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
d337f35e 8632+
d33d7b00
AM
8633+#define vx_nsems_sub(a,n) \
8634+ vx_sub_cres(current_vx_info(), n, a, VLIMIT_NSEMS)
d337f35e
JR
8635+
8636+
d33d7b00
AM
8637+#else
8638+#warning duplicate inclusion
8639+#endif
09a55596
AM
8640diff -NurpP --minimal linux-4.9.135/include/linux/vs_network.h linux-4.9.135-vs2.3.9.8/include/linux/vs_network.h
8641--- linux-4.9.135/include/linux/vs_network.h 1970-01-01 00:00:00.000000000 +0000
8642+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_network.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00
AM
8643@@ -0,0 +1,169 @@
8644+#ifndef _NX_VS_NETWORK_H
8645+#define _NX_VS_NETWORK_H
7e46296a 8646+
d33d7b00
AM
8647+#include "vserver/context.h"
8648+#include "vserver/network.h"
8649+#include "vserver/base.h"
8650+#include "vserver/check.h"
8651+#include "vserver/debug.h"
2380c486 8652+
d33d7b00 8653+#include <linux/sched.h>
2380c486 8654+
2380c486 8655+
d33d7b00 8656+#define get_nx_info(i) __get_nx_info(i, __FILE__, __LINE__)
2380c486 8657+
d33d7b00
AM
8658+static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
8659+ const char *_file, int _line)
8660+{
8661+ if (!nxi)
8662+ return NULL;
d337f35e 8663+
d33d7b00
AM
8664+ vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])",
8665+ nxi, nxi ? nxi->nx_id : 0,
8666+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8667+ _file, _line);
d337f35e 8668+
d33d7b00
AM
8669+ atomic_inc(&nxi->nx_usecnt);
8670+ return nxi;
8671+}
d337f35e
JR
8672+
8673+
d33d7b00 8674+extern void free_nx_info(struct nx_info *);
d337f35e 8675+
d33d7b00 8676+#define put_nx_info(i) __put_nx_info(i, __FILE__, __LINE__)
d337f35e 8677+
d33d7b00
AM
8678+static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
8679+{
8680+ if (!nxi)
8681+ return;
d337f35e 8682+
d33d7b00
AM
8683+ vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])",
8684+ nxi, nxi ? nxi->nx_id : 0,
8685+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8686+ _file, _line);
d337f35e 8687+
d33d7b00
AM
8688+ if (atomic_dec_and_test(&nxi->nx_usecnt))
8689+ free_nx_info(nxi);
8690+}
d337f35e 8691+
d337f35e 8692+
d33d7b00 8693+#define init_nx_info(p, i) __init_nx_info(p, i, __FILE__, __LINE__)
d337f35e 8694+
d33d7b00
AM
8695+static inline void __init_nx_info(struct nx_info **nxp, struct nx_info *nxi,
8696+ const char *_file, int _line)
8697+{
8698+ if (nxi) {
8699+ vxlprintk(VXD_CBIT(nid, 3),
8700+ "init_nx_info(%p[#%d.%d])",
8701+ nxi, nxi ? nxi->nx_id : 0,
8702+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8703+ _file, _line);
d337f35e 8704+
d33d7b00
AM
8705+ atomic_inc(&nxi->nx_usecnt);
8706+ }
8707+ *nxp = nxi;
8708+}
d337f35e 8709+
d337f35e 8710+
d33d7b00 8711+#define set_nx_info(p, i) __set_nx_info(p, i, __FILE__, __LINE__)
d337f35e 8712+
d33d7b00
AM
8713+static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
8714+ const char *_file, int _line)
8715+{
8716+ struct nx_info *nxo;
d337f35e 8717+
d33d7b00
AM
8718+ if (!nxi)
8719+ return;
d337f35e 8720+
d33d7b00
AM
8721+ vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d])",
8722+ nxi, nxi ? nxi->nx_id : 0,
8723+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8724+ _file, _line);
d337f35e 8725+
d33d7b00
AM
8726+ atomic_inc(&nxi->nx_usecnt);
8727+ nxo = xchg(nxp, nxi);
8728+ BUG_ON(nxo);
8729+}
d337f35e 8730+
d33d7b00 8731+#define clr_nx_info(p) __clr_nx_info(p, __FILE__, __LINE__)
d337f35e 8732+
d33d7b00
AM
8733+static inline void __clr_nx_info(struct nx_info **nxp,
8734+ const char *_file, int _line)
8735+{
8736+ struct nx_info *nxo;
d337f35e 8737+
d33d7b00
AM
8738+ nxo = xchg(nxp, NULL);
8739+ if (!nxo)
8740+ return;
d337f35e 8741+
d33d7b00
AM
8742+ vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d])",
8743+ nxo, nxo ? nxo->nx_id : 0,
8744+ nxo ? atomic_read(&nxo->nx_usecnt) : 0,
8745+ _file, _line);
d337f35e 8746+
d33d7b00
AM
8747+ if (atomic_dec_and_test(&nxo->nx_usecnt))
8748+ free_nx_info(nxo);
8749+}
d337f35e
JR
8750+
8751+
d33d7b00 8752+#define claim_nx_info(v, p) __claim_nx_info(v, p, __FILE__, __LINE__)
d337f35e 8753+
d33d7b00
AM
8754+static inline void __claim_nx_info(struct nx_info *nxi,
8755+ struct task_struct *task, const char *_file, int _line)
8756+{
8757+ vxlprintk(VXD_CBIT(nid, 3), "claim_nx_info(%p[#%d.%d.%d]) %p",
8758+ nxi, nxi ? nxi->nx_id : 0,
8759+ nxi?atomic_read(&nxi->nx_usecnt):0,
8760+ nxi?atomic_read(&nxi->nx_tasks):0,
8761+ task, _file, _line);
d337f35e 8762+
d33d7b00
AM
8763+ atomic_inc(&nxi->nx_tasks);
8764+}
d337f35e 8765+
d337f35e 8766+
d33d7b00 8767+extern void unhash_nx_info(struct nx_info *);
d337f35e 8768+
d33d7b00 8769+#define release_nx_info(v, p) __release_nx_info(v, p, __FILE__, __LINE__)
d337f35e 8770+
d33d7b00
AM
8771+static inline void __release_nx_info(struct nx_info *nxi,
8772+ struct task_struct *task, const char *_file, int _line)
8773+{
8774+ vxlprintk(VXD_CBIT(nid, 3), "release_nx_info(%p[#%d.%d.%d]) %p",
8775+ nxi, nxi ? nxi->nx_id : 0,
8776+ nxi ? atomic_read(&nxi->nx_usecnt) : 0,
8777+ nxi ? atomic_read(&nxi->nx_tasks) : 0,
8778+ task, _file, _line);
ab30d09f 8779+
d33d7b00 8780+ might_sleep();
d337f35e 8781+
d33d7b00
AM
8782+ if (atomic_dec_and_test(&nxi->nx_tasks))
8783+ unhash_nx_info(nxi);
8784+}
d337f35e
JR
8785+
8786+
d33d7b00 8787+#define task_get_nx_info(i) __task_get_nx_info(i, __FILE__, __LINE__)
d337f35e 8788+
d33d7b00
AM
8789+static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
8790+ const char *_file, int _line)
8791+{
8792+ struct nx_info *nxi;
d337f35e 8793+
d33d7b00
AM
8794+ task_lock(p);
8795+ vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)",
8796+ p, _file, _line);
8797+ nxi = __get_nx_info(p->nx_info, _file, _line);
8798+ task_unlock(p);
8799+ return nxi;
8800+}
d337f35e 8801+
d337f35e 8802+
d33d7b00
AM
8803+static inline void exit_nx_info(struct task_struct *p)
8804+{
8805+ if (p->nx_info)
8806+ release_nx_info(p->nx_info, p);
8807+}
adc1caaa 8808+
d337f35e 8809+
2380c486 8810+#else
d33d7b00 8811+#warning duplicate inclusion
2380c486 8812+#endif
09a55596
AM
8813diff -NurpP --minimal linux-4.9.135/include/linux/vs_pid.h linux-4.9.135-vs2.3.9.8/include/linux/vs_pid.h
8814--- linux-4.9.135/include/linux/vs_pid.h 1970-01-01 00:00:00.000000000 +0000
8815+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_pid.h 2018-10-20 04:58:14.000000000 +0000
b3b0d4fd 8816@@ -0,0 +1,50 @@
d33d7b00
AM
8817+#ifndef _VS_PID_H
8818+#define _VS_PID_H
d337f35e 8819+
d33d7b00
AM
8820+#include "vserver/base.h"
8821+#include "vserver/check.h"
8822+#include "vserver/context.h"
8823+#include "vserver/debug.h"
8824+#include "vserver/pid.h"
8825+#include <linux/pid_namespace.h>
d337f35e 8826+
d337f35e 8827+
d33d7b00 8828+#define VXF_FAKE_INIT (VXF_INFO_INIT | VXF_STATE_INIT)
d337f35e 8829+
d33d7b00
AM
8830+static inline
8831+int vx_proc_task_visible(struct task_struct *task)
8832+{
8833+ if ((task->pid == 1) &&
8834+ !vx_flags(VXF_FAKE_INIT, VXF_FAKE_INIT))
8835+ /* show a blend through init */
8836+ goto visible;
8837+ if (vx_check(vx_task_xid(task), VS_WATCH | VS_IDENT))
8838+ goto visible;
8839+ return 0;
8840+visible:
8841+ return 1;
8842+}
d337f35e 8843+
d33d7b00 8844+#define find_task_by_real_pid(pid) find_task_by_pid_ns(pid, &init_pid_ns)
d337f35e 8845+
d337f35e 8846+
d33d7b00
AM
8847+static inline
8848+struct task_struct *vx_get_proc_task(struct inode *inode, struct pid *pid)
8849+{
8850+ struct task_struct *task = get_pid_task(pid, PIDTYPE_PID);
d337f35e 8851+
d33d7b00
AM
8852+ if (task && !vx_proc_task_visible(task)) {
8853+ vxdprintk(VXD_CBIT(misc, 6),
8854+ "dropping task (get) %p[#%u,%u] for %p[#%u,%u]",
8855+ task, task->xid, task->pid,
8856+ current, current->xid, current->pid);
8857+ put_task_struct(task);
8858+ task = NULL;
8859+ }
8860+ return task;
8861+}
d337f35e 8862+
d337f35e 8863+
d33d7b00
AM
8864+#else
8865+#warning duplicate inclusion
8866+#endif
09a55596
AM
8867diff -NurpP --minimal linux-4.9.135/include/linux/vs_sched.h linux-4.9.135-vs2.3.9.8/include/linux/vs_sched.h
8868--- linux-4.9.135/include/linux/vs_sched.h 1970-01-01 00:00:00.000000000 +0000
8869+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_sched.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00
AM
8870@@ -0,0 +1,40 @@
8871+#ifndef _VS_SCHED_H
8872+#define _VS_SCHED_H
d337f35e 8873+
d33d7b00
AM
8874+#include "vserver/base.h"
8875+#include "vserver/context.h"
8876+#include "vserver/sched.h"
d337f35e
JR
8877+
8878+
d33d7b00
AM
8879+#define MAX_PRIO_BIAS 20
8880+#define MIN_PRIO_BIAS -20
d337f35e 8881+
d33d7b00
AM
8882+static inline
8883+int vx_adjust_prio(struct task_struct *p, int prio, int max_user)
8884+{
8885+ struct vx_info *vxi = p->vx_info;
d337f35e 8886+
d33d7b00
AM
8887+ if (vxi)
8888+ prio += vx_cpu(vxi, sched_pc).prio_bias;
8889+ return prio;
8890+}
d337f35e 8891+
d33d7b00
AM
8892+static inline void vx_account_user(struct vx_info *vxi,
8893+ cputime_t cputime, int nice)
8894+{
8895+ if (!vxi)
8896+ return;
8897+ vx_cpu(vxi, sched_pc).user_ticks += cputime;
8898+}
d337f35e 8899+
d33d7b00
AM
8900+static inline void vx_account_system(struct vx_info *vxi,
8901+ cputime_t cputime, int idle)
8902+{
8903+ if (!vxi)
8904+ return;
8905+ vx_cpu(vxi, sched_pc).sys_ticks += cputime;
8906+}
d337f35e 8907+
d33d7b00
AM
8908+#else
8909+#warning duplicate inclusion
8910+#endif
09a55596
AM
8911diff -NurpP --minimal linux-4.9.135/include/linux/vs_socket.h linux-4.9.135-vs2.3.9.8/include/linux/vs_socket.h
8912--- linux-4.9.135/include/linux/vs_socket.h 1970-01-01 00:00:00.000000000 +0000
8913+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_socket.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00
AM
8914@@ -0,0 +1,67 @@
8915+#ifndef _VS_SOCKET_H
8916+#define _VS_SOCKET_H
d337f35e 8917+
d33d7b00
AM
8918+#include "vserver/debug.h"
8919+#include "vserver/base.h"
8920+#include "vserver/cacct.h"
8921+#include "vserver/context.h"
8922+#include "vserver/tag.h"
d337f35e 8923+
d337f35e 8924+
d33d7b00 8925+/* socket accounting */
d337f35e 8926+
d33d7b00 8927+#include <linux/socket.h>
d337f35e 8928+
d33d7b00
AM
8929+static inline int vx_sock_type(int family)
8930+{
8931+ switch (family) {
8932+ case PF_UNSPEC:
8933+ return VXA_SOCK_UNSPEC;
8934+ case PF_UNIX:
8935+ return VXA_SOCK_UNIX;
8936+ case PF_INET:
8937+ return VXA_SOCK_INET;
8938+ case PF_INET6:
8939+ return VXA_SOCK_INET6;
8940+ case PF_PACKET:
8941+ return VXA_SOCK_PACKET;
8942+ default:
8943+ return VXA_SOCK_OTHER;
8944+ }
8945+}
d337f35e 8946+
d33d7b00
AM
8947+#define vx_acc_sock(v, f, p, s) \
8948+ __vx_acc_sock(v, f, p, s, __FILE__, __LINE__)
d337f35e 8949+
d33d7b00
AM
8950+static inline void __vx_acc_sock(struct vx_info *vxi,
8951+ int family, int pos, int size, char *file, int line)
8952+{
8953+ if (vxi) {
8954+ int type = vx_sock_type(family);
d337f35e 8955+
d33d7b00
AM
8956+ atomic_long_inc(&vxi->cacct.sock[type][pos].count);
8957+ atomic_long_add(size, &vxi->cacct.sock[type][pos].total);
8958+ }
8959+}
d337f35e 8960+
d33d7b00
AM
8961+#define vx_sock_recv(sk, s) \
8962+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, s)
8963+#define vx_sock_send(sk, s) \
8964+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, s)
8965+#define vx_sock_fail(sk, s) \
8966+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, s)
d337f35e 8967+
d337f35e 8968+
d33d7b00
AM
8969+#define sock_vx_init(s) do { \
8970+ (s)->sk_xid = 0; \
8971+ (s)->sk_vx_info = NULL; \
8972+ } while (0)
d337f35e 8973+
d33d7b00
AM
8974+#define sock_nx_init(s) do { \
8975+ (s)->sk_nid = 0; \
8976+ (s)->sk_nx_info = NULL; \
8977+ } while (0)
d337f35e 8978+
d33d7b00
AM
8979+#else
8980+#warning duplicate inclusion
8981+#endif
09a55596
AM
8982diff -NurpP --minimal linux-4.9.135/include/linux/vs_tag.h linux-4.9.135-vs2.3.9.8/include/linux/vs_tag.h
8983--- linux-4.9.135/include/linux/vs_tag.h 1970-01-01 00:00:00.000000000 +0000
8984+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_tag.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00
AM
8985@@ -0,0 +1,47 @@
8986+#ifndef _VS_TAG_H
8987+#define _VS_TAG_H
d337f35e 8988+
d33d7b00 8989+#include <linux/vserver/tag.h>
d337f35e 8990+
d33d7b00 8991+/* check conditions */
d337f35e 8992+
d33d7b00
AM
8993+#define DX_ADMIN 0x0001
8994+#define DX_WATCH 0x0002
8995+#define DX_HOSTID 0x0008
d337f35e 8996+
d33d7b00 8997+#define DX_IDENT 0x0010
d337f35e 8998+
d33d7b00 8999+#define DX_ARG_MASK 0x0010
d337f35e 9000+
d337f35e 9001+
d33d7b00 9002+#define dx_task_tag(t) ((t)->tag)
d337f35e 9003+
d33d7b00 9004+#define dx_current_tag() dx_task_tag(current)
d337f35e 9005+
d33d7b00 9006+#define dx_check(c, m) __dx_check(dx_current_tag(), c, m)
d337f35e 9007+
d33d7b00 9008+#define dx_weak_check(c, m) ((m) ? dx_check(c, m) : 1)
d337f35e
JR
9009+
9010+
d33d7b00
AM
9011+/*
9012+ * check current context for ADMIN/WATCH and
9013+ * optionally against supplied argument
9014+ */
61333608 9015+static inline int __dx_check(vtag_t cid, vtag_t id, unsigned int mode)
d33d7b00
AM
9016+{
9017+ if (mode & DX_ARG_MASK) {
9018+ if ((mode & DX_IDENT) && (id == cid))
9019+ return 1;
9020+ }
9021+ return (((mode & DX_ADMIN) && (cid == 0)) ||
9022+ ((mode & DX_WATCH) && (cid == 1)) ||
9023+ ((mode & DX_HOSTID) && (id == 0)));
9024+}
d337f35e 9025+
d33d7b00
AM
9026+struct inode;
9027+int dx_permission(const struct inode *inode, int mask);
d337f35e 9028+
d337f35e 9029+
d33d7b00
AM
9030+#else
9031+#warning duplicate inclusion
9032+#endif
09a55596
AM
9033diff -NurpP --minimal linux-4.9.135/include/linux/vs_time.h linux-4.9.135-vs2.3.9.8/include/linux/vs_time.h
9034--- linux-4.9.135/include/linux/vs_time.h 1970-01-01 00:00:00.000000000 +0000
9035+++ linux-4.9.135-vs2.3.9.8/include/linux/vs_time.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 9036@@ -0,0 +1,21 @@
d33d7b00
AM
9037+#ifndef _VS_TIME_H
9038+#define _VS_TIME_H
d337f35e 9039+
d337f35e 9040+
d33d7b00 9041+/* time faking stuff */
d337f35e 9042+
d33d7b00 9043+#ifdef CONFIG_VSERVER_VTIME
d337f35e 9044+
d33d7b00 9045+extern void vx_adjust_timespec(struct timespec *ts);
763640ca 9046+extern int vx_settimeofday(const struct timespec *ts);
369dbd59 9047+extern int vx_settimeofday64(const struct timespec64 *ts);
d337f35e 9048+
d33d7b00
AM
9049+#else
9050+#define vx_adjust_timespec(t) do { } while (0)
9051+#define vx_settimeofday(t) do_settimeofday(t)
cc23e853 9052+#define vx_settimeofday64(t) do_settimeofday64(t)
d33d7b00 9053+#endif
d337f35e 9054+
d33d7b00
AM
9055+#else
9056+#warning duplicate inclusion
9057+#endif
09a55596
AM
9058diff -NurpP --minimal linux-4.9.135/include/linux/vserver/base.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/base.h
9059--- linux-4.9.135/include/linux/vserver/base.h 1970-01-01 00:00:00.000000000 +0000
9060+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/base.h 2018-10-20 04:58:14.000000000 +0000
c2e5f7c8 9061@@ -0,0 +1,184 @@
4bf69007
AM
9062+#ifndef _VSERVER_BASE_H
9063+#define _VSERVER_BASE_H
d337f35e 9064+
d337f35e 9065+
d33d7b00 9066+/* context state changes */
d337f35e 9067+
d33d7b00
AM
9068+enum {
9069+ VSC_STARTUP = 1,
9070+ VSC_SHUTDOWN,
d337f35e 9071+
d33d7b00
AM
9072+ VSC_NETUP,
9073+ VSC_NETDOWN,
3bac966d 9074+};
d337f35e 9075+
d337f35e
JR
9076+
9077+
d33d7b00 9078+#define vx_task_xid(t) ((t)->xid)
d337f35e 9079+
d33d7b00 9080+#define vx_current_xid() vx_task_xid(current)
d337f35e 9081+
d33d7b00 9082+#define current_vx_info() (current->vx_info)
d337f35e 9083+
ba86f833 9084+
d33d7b00 9085+#define nx_task_nid(t) ((t)->nid)
ba86f833 9086+
d33d7b00 9087+#define nx_current_nid() nx_task_nid(current)
d337f35e 9088+
d33d7b00 9089+#define current_nx_info() (current->nx_info)
d337f35e 9090+
d337f35e 9091+
d33d7b00 9092+/* generic flag merging */
d337f35e 9093+
d33d7b00 9094+#define vs_check_flags(v, m, f) (((v) & (m)) ^ (f))
d337f35e 9095+
d33d7b00 9096+#define vs_mask_flags(v, f, m) (((v) & ~(m)) | ((f) & (m)))
d337f35e 9097+
d33d7b00 9098+#define vs_mask_mask(v, f, m) (((v) & ~(m)) | ((v) & (f) & (m)))
d337f35e 9099+
d33d7b00 9100+#define vs_check_bit(v, n) ((v) & (1LL << (n)))
d337f35e 9101+
d337f35e 9102+
d33d7b00 9103+/* context flags */
d337f35e 9104+
d33d7b00 9105+#define __vx_flags(v) ((v) ? (v)->vx_flags : 0)
d337f35e 9106+
d33d7b00 9107+#define vx_current_flags() __vx_flags(current_vx_info())
d337f35e 9108+
d33d7b00
AM
9109+#define vx_info_flags(v, m, f) \
9110+ vs_check_flags(__vx_flags(v), m, f)
d337f35e 9111+
d33d7b00
AM
9112+#define task_vx_flags(t, m, f) \
9113+ ((t) && vx_info_flags((t)->vx_info, m, f))
d337f35e 9114+
d33d7b00 9115+#define vx_flags(m, f) vx_info_flags(current_vx_info(), m, f)
d337f35e
JR
9116+
9117+
d33d7b00 9118+/* context caps */
d337f35e 9119+
d33d7b00 9120+#define __vx_ccaps(v) ((v) ? (v)->vx_ccaps : 0)
d337f35e 9121+
d33d7b00 9122+#define vx_current_ccaps() __vx_ccaps(current_vx_info())
d337f35e 9123+
d33d7b00 9124+#define vx_info_ccaps(v, c) (__vx_ccaps(v) & (c))
d337f35e 9125+
d33d7b00 9126+#define vx_ccaps(c) vx_info_ccaps(current_vx_info(), (c))
d337f35e 9127+
d337f35e
JR
9128+
9129+
d33d7b00 9130+/* network flags */
2380c486 9131+
d33d7b00 9132+#define __nx_flags(n) ((n) ? (n)->nx_flags : 0)
d337f35e 9133+
d33d7b00 9134+#define nx_current_flags() __nx_flags(current_nx_info())
d337f35e 9135+
d33d7b00
AM
9136+#define nx_info_flags(n, m, f) \
9137+ vs_check_flags(__nx_flags(n), m, f)
d337f35e 9138+
d33d7b00
AM
9139+#define task_nx_flags(t, m, f) \
9140+ ((t) && nx_info_flags((t)->nx_info, m, f))
d337f35e 9141+
d33d7b00 9142+#define nx_flags(m, f) nx_info_flags(current_nx_info(), m, f)
d337f35e 9143+
d337f35e 9144+
d33d7b00 9145+/* network caps */
d337f35e 9146+
d33d7b00 9147+#define __nx_ncaps(n) ((n) ? (n)->nx_ncaps : 0)
d337f35e 9148+
d33d7b00 9149+#define nx_current_ncaps() __nx_ncaps(current_nx_info())
d337f35e 9150+
d33d7b00 9151+#define nx_info_ncaps(n, c) (__nx_ncaps(n) & (c))
d337f35e 9152+
d33d7b00 9153+#define nx_ncaps(c) nx_info_ncaps(current_nx_info(), c)
d337f35e 9154+
d337f35e 9155+
d33d7b00 9156+/* context mask capabilities */
d337f35e 9157+
d33d7b00 9158+#define __vx_mcaps(v) ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
d337f35e 9159+
d33d7b00 9160+#define vx_info_mcaps(v, c) (__vx_mcaps(v) & (c))
d337f35e 9161+
d33d7b00 9162+#define vx_mcaps(c) vx_info_mcaps(current_vx_info(), c)
d337f35e
JR
9163+
9164+
d33d7b00 9165+/* context bcap mask */
d337f35e 9166+
d33d7b00 9167+#define __vx_bcaps(v) ((v)->vx_bcaps)
d337f35e 9168+
d33d7b00 9169+#define vx_current_bcaps() __vx_bcaps(current_vx_info())
d337f35e 9170+
d337f35e 9171+
d33d7b00 9172+/* mask given bcaps */
adc1caaa 9173+
d33d7b00 9174+#define vx_info_mbcaps(v, c) ((v) ? cap_intersect(__vx_bcaps(v), c) : c)
2380c486 9175+
d33d7b00 9176+#define vx_mbcaps(c) vx_info_mbcaps(current_vx_info(), c)
d337f35e
JR
9177+
9178+
d33d7b00 9179+/* masked cap_bset */
2380c486 9180+
d33d7b00 9181+#define vx_info_cap_bset(v) vx_info_mbcaps(v, current->cap_bset)
2380c486 9182+
d33d7b00 9183+#define vx_current_cap_bset() vx_info_cap_bset(current_vx_info())
d337f35e 9184+
d33d7b00
AM
9185+#if 0
9186+#define vx_info_mbcap(v, b) \
9187+ (!vx_info_flags(v, VXF_STATE_SETUP, 0) ? \
9188+ vx_info_bcaps(v, b) : (b))
d337f35e 9189+
d33d7b00
AM
9190+#define task_vx_mbcap(t, b) \
9191+ vx_info_mbcap((t)->vx_info, (t)->b)
9192+
9193+#define vx_mbcap(b) task_vx_mbcap(current, b)
3bac966d 9194+#endif
d337f35e 9195+
d33d7b00 9196+#define vx_cap_raised(v, c, f) cap_raised(vx_info_mbcaps(v, c), f)
d337f35e 9197+
d33d7b00
AM
9198+#define vx_capable(b, c) (capable(b) || \
9199+ (cap_raised(current_cap(), b) && vx_ccaps(c)))
d337f35e 9200+
763640ca
JR
9201+#define vx_ns_capable(n, b, c) (ns_capable(n, b) || \
9202+ (cap_raised(current_cap(), b) && vx_ccaps(c)))
9203+
d33d7b00
AM
9204+#define nx_capable(b, c) (capable(b) || \
9205+ (cap_raised(current_cap(), b) && nx_ncaps(c)))
d337f35e 9206+
c2e5f7c8
JR
9207+#define nx_ns_capable(n, b, c) (ns_capable(n, b) || \
9208+ (cap_raised(current_cap(), b) && nx_ncaps(c)))
9209+
d33d7b00
AM
9210+#define vx_task_initpid(t, n) \
9211+ ((t)->vx_info && \
9212+ ((t)->vx_info->vx_initpid == (n)))
d337f35e 9213+
d33d7b00 9214+#define vx_current_initpid(n) vx_task_initpid(current, n)
d337f35e 9215+
d337f35e 9216+
d33d7b00 9217+/* context unshare mask */
d337f35e 9218+
d33d7b00 9219+#define __vx_umask(v) ((v)->vx_umask)
7e46296a 9220+
d33d7b00 9221+#define vx_current_umask() __vx_umask(current_vx_info())
7e46296a 9222+
d33d7b00
AM
9223+#define vx_can_unshare(b, f) (capable(b) || \
9224+ (cap_raised(current_cap(), b) && \
9225+ !((f) & ~vx_current_umask())))
7e46296a 9226+
b00e13aa
AM
9227+#define vx_ns_can_unshare(n, b, f) (ns_capable(n, b) || \
9228+ (cap_raised(current_cap(), b) && \
9229+ !((f) & ~vx_current_umask())))
7e46296a 9230+
265d6dcc
JR
9231+#define __vx_wmask(v) ((v)->vx_wmask)
9232+
9233+#define vx_current_wmask() __vx_wmask(current_vx_info())
9234+
9235+
d33d7b00 9236+#define __vx_state(v) ((v) ? ((v)->vx_state) : 0)
7e46296a 9237+
d33d7b00 9238+#define vx_info_state(v, m) (__vx_state(v) & (m))
d337f35e 9239+
d337f35e 9240+
d33d7b00 9241+#define __nx_state(n) ((n) ? ((n)->nx_state) : 0)
d337f35e 9242+
d33d7b00 9243+#define nx_info_state(n, m) (__nx_state(n) & (m))
d337f35e 9244+
d33d7b00 9245+#endif
09a55596
AM
9246diff -NurpP --minimal linux-4.9.135/include/linux/vserver/cacct.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/cacct.h
9247--- linux-4.9.135/include/linux/vserver/cacct.h 1970-01-01 00:00:00.000000000 +0000
9248+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/cacct.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00 9249@@ -0,0 +1,15 @@
4bf69007
AM
9250+#ifndef _VSERVER_CACCT_H
9251+#define _VSERVER_CACCT_H
d337f35e 9252+
d337f35e 9253+
d33d7b00
AM
9254+enum sock_acc_field {
9255+ VXA_SOCK_UNSPEC = 0,
9256+ VXA_SOCK_UNIX,
9257+ VXA_SOCK_INET,
9258+ VXA_SOCK_INET6,
9259+ VXA_SOCK_PACKET,
9260+ VXA_SOCK_OTHER,
9261+ VXA_SOCK_SIZE /* array size */
9262+};
d337f35e 9263+
4bf69007 9264+#endif /* _VSERVER_CACCT_H */
09a55596
AM
9265diff -NurpP --minimal linux-4.9.135/include/linux/vserver/cacct_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/cacct_cmd.h
9266--- linux-4.9.135/include/linux/vserver/cacct_cmd.h 1970-01-01 00:00:00.000000000 +0000
9267+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/cacct_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
9268@@ -0,0 +1,10 @@
9269+#ifndef _VSERVER_CACCT_CMD_H
9270+#define _VSERVER_CACCT_CMD_H
d337f35e 9271+
d337f35e 9272+
3bac966d 9273+#include <linux/compiler.h>
4bf69007 9274+#include <uapi/vserver/cacct_cmd.h>
d337f35e 9275+
d33d7b00 9276+extern int vc_sock_stat(struct vx_info *, void __user *);
d337f35e 9277+
4bf69007 9278+#endif /* _VSERVER_CACCT_CMD_H */
09a55596
AM
9279diff -NurpP --minimal linux-4.9.135/include/linux/vserver/cacct_def.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/cacct_def.h
9280--- linux-4.9.135/include/linux/vserver/cacct_def.h 1970-01-01 00:00:00.000000000 +0000
9281+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/cacct_def.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00 9282@@ -0,0 +1,43 @@
4bf69007
AM
9283+#ifndef _VSERVER_CACCT_DEF_H
9284+#define _VSERVER_CACCT_DEF_H
d337f35e 9285+
d33d7b00
AM
9286+#include <asm/atomic.h>
9287+#include <linux/vserver/cacct.h>
d337f35e
JR
9288+
9289+
d33d7b00
AM
9290+struct _vx_sock_acc {
9291+ atomic_long_t count;
9292+ atomic_long_t total;
9293+};
d337f35e 9294+
d33d7b00 9295+/* context sub struct */
d337f35e 9296+
d33d7b00
AM
9297+struct _vx_cacct {
9298+ struct _vx_sock_acc sock[VXA_SOCK_SIZE][3];
9299+ atomic_t slab[8];
9300+ atomic_t page[6][8];
9301+};
d337f35e 9302+
d33d7b00 9303+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 9304+
d33d7b00
AM
9305+static inline void __dump_vx_cacct(struct _vx_cacct *cacct)
9306+{
9307+ int i, j;
d337f35e 9308+
d33d7b00
AM
9309+ printk("\t_vx_cacct:");
9310+ for (i = 0; i < 6; i++) {
9311+ struct _vx_sock_acc *ptr = cacct->sock[i];
d337f35e 9312+
d33d7b00
AM
9313+ printk("\t [%d] =", i);
9314+ for (j = 0; j < 3; j++) {
9315+ printk(" [%d] = %8lu, %8lu", j,
9316+ atomic_long_read(&ptr[j].count),
9317+ atomic_long_read(&ptr[j].total));
9318+ }
9319+ printk("\n");
9320+ }
9321+}
2380c486 9322+
d33d7b00 9323+#endif
d337f35e 9324+
4bf69007 9325+#endif /* _VSERVER_CACCT_DEF_H */
09a55596
AM
9326diff -NurpP --minimal linux-4.9.135/include/linux/vserver/cacct_int.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/cacct_int.h
9327--- linux-4.9.135/include/linux/vserver/cacct_int.h 1970-01-01 00:00:00.000000000 +0000
9328+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/cacct_int.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
9329@@ -0,0 +1,17 @@
9330+#ifndef _VSERVER_CACCT_INT_H
9331+#define _VSERVER_CACCT_INT_H
d337f35e 9332+
d33d7b00
AM
9333+static inline
9334+unsigned long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
9335+{
9336+ return atomic_long_read(&cacct->sock[type][pos].count);
9337+}
d337f35e 9338+
d337f35e 9339+
d33d7b00
AM
9340+static inline
9341+unsigned long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
9342+{
9343+ return atomic_long_read(&cacct->sock[type][pos].total);
9344+}
d337f35e 9345+
4bf69007 9346+#endif /* _VSERVER_CACCT_INT_H */
09a55596
AM
9347diff -NurpP --minimal linux-4.9.135/include/linux/vserver/check.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/check.h
9348--- linux-4.9.135/include/linux/vserver/check.h 1970-01-01 00:00:00.000000000 +0000
9349+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/check.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00 9350@@ -0,0 +1,89 @@
4bf69007
AM
9351+#ifndef _VSERVER_CHECK_H
9352+#define _VSERVER_CHECK_H
d337f35e 9353+
d337f35e 9354+
d33d7b00 9355+#define MAX_S_CONTEXT 65535 /* Arbitrary limit */
d337f35e 9356+
d33d7b00
AM
9357+#ifdef CONFIG_VSERVER_DYNAMIC_IDS
9358+#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */
9359+#else
9360+#define MIN_D_CONTEXT 65536
9361+#endif
d337f35e 9362+
d33d7b00 9363+/* check conditions */
d337f35e 9364+
d33d7b00
AM
9365+#define VS_ADMIN 0x0001
9366+#define VS_WATCH 0x0002
9367+#define VS_HIDE 0x0004
9368+#define VS_HOSTID 0x0008
d337f35e 9369+
d33d7b00
AM
9370+#define VS_IDENT 0x0010
9371+#define VS_EQUIV 0x0020
9372+#define VS_PARENT 0x0040
9373+#define VS_CHILD 0x0080
d337f35e 9374+
d33d7b00 9375+#define VS_ARG_MASK 0x00F0
d337f35e 9376+
d33d7b00
AM
9377+#define VS_DYNAMIC 0x0100
9378+#define VS_STATIC 0x0200
d337f35e 9379+
d33d7b00 9380+#define VS_ATR_MASK 0x0F00
d337f35e 9381+
d33d7b00
AM
9382+#ifdef CONFIG_VSERVER_PRIVACY
9383+#define VS_ADMIN_P (0)
9384+#define VS_WATCH_P (0)
9385+#else
9386+#define VS_ADMIN_P VS_ADMIN
9387+#define VS_WATCH_P VS_WATCH
9388+#endif
d337f35e 9389+
d33d7b00
AM
9390+#define VS_HARDIRQ 0x1000
9391+#define VS_SOFTIRQ 0x2000
9392+#define VS_IRQ 0x4000
d337f35e 9393+
d33d7b00 9394+#define VS_IRQ_MASK 0xF000
d337f35e 9395+
d33d7b00 9396+#include <linux/hardirq.h>
d337f35e 9397+
d33d7b00
AM
9398+/*
9399+ * check current context for ADMIN/WATCH and
9400+ * optionally against supplied argument
9401+ */
9402+static inline int __vs_check(int cid, int id, unsigned int mode)
9403+{
9404+ if (mode & VS_ARG_MASK) {
9405+ if ((mode & VS_IDENT) && (id == cid))
9406+ return 1;
9407+ }
9408+ if (mode & VS_ATR_MASK) {
9409+ if ((mode & VS_DYNAMIC) &&
9410+ (id >= MIN_D_CONTEXT) &&
9411+ (id <= MAX_S_CONTEXT))
9412+ return 1;
9413+ if ((mode & VS_STATIC) &&
9414+ (id > 1) && (id < MIN_D_CONTEXT))
9415+ return 1;
9416+ }
9417+ if (mode & VS_IRQ_MASK) {
9418+ if ((mode & VS_IRQ) && unlikely(in_interrupt()))
9419+ return 1;
9420+ if ((mode & VS_HARDIRQ) && unlikely(in_irq()))
9421+ return 1;
9422+ if ((mode & VS_SOFTIRQ) && unlikely(in_softirq()))
9423+ return 1;
9424+ }
9425+ return (((mode & VS_ADMIN) && (cid == 0)) ||
9426+ ((mode & VS_WATCH) && (cid == 1)) ||
9427+ ((mode & VS_HOSTID) && (id == 0)));
9428+}
d337f35e 9429+
d33d7b00 9430+#define vx_check(c, m) __vs_check(vx_current_xid(), c, (m) | VS_IRQ)
d337f35e 9431+
d33d7b00 9432+#define vx_weak_check(c, m) ((m) ? vx_check(c, m) : 1)
2380c486 9433+
d337f35e 9434+
d33d7b00 9435+#define nx_check(c, m) __vs_check(nx_current_nid(), c, m)
d337f35e 9436+
d33d7b00 9437+#define nx_weak_check(c, m) ((m) ? nx_check(c, m) : 1)
d337f35e 9438+
d33d7b00 9439+#endif
09a55596
AM
9440diff -NurpP --minimal linux-4.9.135/include/linux/vserver/context.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/context.h
9441--- linux-4.9.135/include/linux/vserver/context.h 1970-01-01 00:00:00.000000000 +0000
9442+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/context.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
9443@@ -0,0 +1,110 @@
9444+#ifndef _VSERVER_CONTEXT_H
9445+#define _VSERVER_CONTEXT_H
d337f35e
JR
9446+
9447+
d33d7b00
AM
9448+#include <linux/list.h>
9449+#include <linux/spinlock.h>
9450+#include <linux/rcupdate.h>
4bf69007 9451+#include <uapi/vserver/context.h>
d337f35e 9452+
d33d7b00
AM
9453+#include "limit_def.h"
9454+#include "sched_def.h"
9455+#include "cvirt_def.h"
9456+#include "cacct_def.h"
9457+#include "device_def.h"
d337f35e 9458+
d33d7b00 9459+#define VX_SPACES 2
d337f35e 9460+
d33d7b00
AM
9461+struct _vx_info_pc {
9462+ struct _vx_sched_pc sched_pc;
9463+ struct _vx_cvirt_pc cvirt_pc;
9464+};
d337f35e 9465+
d33d7b00
AM
9466+struct _vx_space {
9467+ unsigned long vx_nsmask; /* assignment mask */
9468+ struct nsproxy *vx_nsproxy; /* private namespaces */
9469+ struct fs_struct *vx_fs; /* private namespace fs */
9470+ const struct cred *vx_cred; /* task credentials */
9471+};
d337f35e 9472+
d33d7b00
AM
9473+struct vx_info {
9474+ struct hlist_node vx_hlist; /* linked list of contexts */
61333608 9475+ vxid_t vx_id; /* context id */
d33d7b00
AM
9476+ atomic_t vx_usecnt; /* usage count */
9477+ atomic_t vx_tasks; /* tasks count */
9478+ struct vx_info *vx_parent; /* parent context */
9479+ int vx_state; /* context state */
d337f35e 9480+
d33d7b00 9481+ struct _vx_space space[VX_SPACES]; /* namespace store */
d337f35e 9482+
d33d7b00
AM
9483+ uint64_t vx_flags; /* context flags */
9484+ uint64_t vx_ccaps; /* context caps (vserver) */
763640ca 9485+ uint64_t vx_umask; /* unshare mask (guest) */
265d6dcc 9486+ uint64_t vx_wmask; /* warn mask (guest) */
d33d7b00 9487+ kernel_cap_t vx_bcaps; /* bounding caps (system) */
d337f35e 9488+
d33d7b00
AM
9489+ struct task_struct *vx_reaper; /* guest reaper process */
9490+ pid_t vx_initpid; /* PID of guest init */
9491+ int64_t vx_badness_bias; /* OOM points bias */
d337f35e 9492+
d33d7b00
AM
9493+ struct _vx_limit limit; /* vserver limits */
9494+ struct _vx_sched sched; /* vserver scheduler */
9495+ struct _vx_cvirt cvirt; /* virtual/bias stuff */
9496+ struct _vx_cacct cacct; /* context accounting */
d337f35e 9497+
d33d7b00 9498+ struct _vx_device dmap; /* default device map targets */
d337f35e 9499+
d33d7b00
AM
9500+#ifndef CONFIG_SMP
9501+ struct _vx_info_pc info_pc; /* per cpu data */
9502+#else
9503+ struct _vx_info_pc *ptr_pc; /* per cpu array */
9504+#endif
d337f35e 9505+
d33d7b00
AM
9506+ wait_queue_head_t vx_wait; /* context exit waitqueue */
9507+ int reboot_cmd; /* last sys_reboot() cmd */
9508+ int exit_code; /* last process exit code */
d337f35e 9509+
d33d7b00
AM
9510+ char vx_name[65]; /* vserver name */
9511+};
d337f35e 9512+
d33d7b00
AM
9513+#ifndef CONFIG_SMP
9514+#define vx_ptr_pc(vxi) (&(vxi)->info_pc)
9515+#define vx_per_cpu(vxi, v, id) vx_ptr_pc(vxi)->v
9516+#else
9517+#define vx_ptr_pc(vxi) ((vxi)->ptr_pc)
9518+#define vx_per_cpu(vxi, v, id) per_cpu_ptr(vx_ptr_pc(vxi), id)->v
9519+#endif
d337f35e 9520+
d33d7b00 9521+#define vx_cpu(vxi, v) vx_per_cpu(vxi, v, smp_processor_id())
d337f35e 9522+
d337f35e 9523+
d33d7b00
AM
9524+struct vx_info_save {
9525+ struct vx_info *vxi;
61333608 9526+ vxid_t xid;
d33d7b00 9527+};
d337f35e
JR
9528+
9529+
d33d7b00 9530+/* status flags */
d337f35e 9531+
d33d7b00
AM
9532+#define VXS_HASHED 0x0001
9533+#define VXS_PAUSED 0x0010
9534+#define VXS_SHUTDOWN 0x0100
9535+#define VXS_HELPER 0x1000
9536+#define VXS_RELEASED 0x8000
d337f35e 9537+
d337f35e 9538+
d33d7b00
AM
9539+extern void claim_vx_info(struct vx_info *, struct task_struct *);
9540+extern void release_vx_info(struct vx_info *, struct task_struct *);
adc1caaa 9541+
d33d7b00
AM
9542+extern struct vx_info *lookup_vx_info(int);
9543+extern struct vx_info *lookup_or_create_vx_info(int);
d337f35e 9544+
d33d7b00 9545+extern int get_xid_list(int, unsigned int *, int);
61333608 9546+extern int xid_is_hashed(vxid_t);
d337f35e 9547+
d33d7b00 9548+extern int vx_migrate_task(struct task_struct *, struct vx_info *, int);
d337f35e 9549+
d33d7b00 9550+extern long vs_state_change(struct vx_info *, unsigned int);
d337f35e 9551+
d337f35e 9552+
4bf69007 9553+#endif /* _VSERVER_CONTEXT_H */
09a55596
AM
9554diff -NurpP --minimal linux-4.9.135/include/linux/vserver/context_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/context_cmd.h
9555--- linux-4.9.135/include/linux/vserver/context_cmd.h 1970-01-01 00:00:00.000000000 +0000
9556+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/context_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
9557@@ -0,0 +1,33 @@
9558+#ifndef _VSERVER_CONTEXT_CMD_H
9559+#define _VSERVER_CONTEXT_CMD_H
d337f35e 9560+
4bf69007 9561+#include <uapi/vserver/context_cmd.h>
d337f35e 9562+
d33d7b00 9563+extern int vc_task_xid(uint32_t);
d337f35e 9564+
d33d7b00 9565+extern int vc_vx_info(struct vx_info *, void __user *);
d337f35e 9566+
d33d7b00 9567+extern int vc_ctx_stat(struct vx_info *, void __user *);
d337f35e 9568+
4bf69007
AM
9569+extern int vc_ctx_create(uint32_t, void __user *);
9570+extern int vc_ctx_migrate(struct vx_info *, void __user *);
d337f35e 9571+
4bf69007
AM
9572+extern int vc_get_cflags(struct vx_info *, void __user *);
9573+extern int vc_set_cflags(struct vx_info *, void __user *);
d337f35e 9574+
4bf69007
AM
9575+extern int vc_get_ccaps(struct vx_info *, void __user *);
9576+extern int vc_set_ccaps(struct vx_info *, void __user *);
d337f35e 9577+
4bf69007
AM
9578+extern int vc_get_bcaps(struct vx_info *, void __user *);
9579+extern int vc_set_bcaps(struct vx_info *, void __user *);
d337f35e 9580+
4bf69007
AM
9581+extern int vc_get_umask(struct vx_info *, void __user *);
9582+extern int vc_set_umask(struct vx_info *, void __user *);
d33d7b00 9583+
4bf69007
AM
9584+extern int vc_get_wmask(struct vx_info *, void __user *);
9585+extern int vc_set_wmask(struct vx_info *, void __user *);
d33d7b00 9586+
4bf69007
AM
9587+extern int vc_get_badness(struct vx_info *, void __user *);
9588+extern int vc_set_badness(struct vx_info *, void __user *);
d337f35e 9589+
4bf69007 9590+#endif /* _VSERVER_CONTEXT_CMD_H */
09a55596
AM
9591diff -NurpP --minimal linux-4.9.135/include/linux/vserver/cvirt.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/cvirt.h
9592--- linux-4.9.135/include/linux/vserver/cvirt.h 1970-01-01 00:00:00.000000000 +0000
9593+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/cvirt.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
9594@@ -0,0 +1,18 @@
9595+#ifndef _VSERVER_CVIRT_H
9596+#define _VSERVER_CVIRT_H
d337f35e 9597+
4bf69007 9598+struct timespec;
d337f35e 9599+
369dbd59 9600+void vx_vsi_boottime64(struct timespec64 *);
d337f35e 9601+
4bf69007 9602+void vx_vsi_uptime(struct timespec *, struct timespec *);
d337f35e 9603+
d337f35e 9604+
4bf69007 9605+struct vx_info;
d337f35e 9606+
4bf69007 9607+void vx_update_load(struct vx_info *);
d337f35e 9608+
d337f35e 9609+
4bf69007 9610+int vx_do_syslog(int, char __user *, int);
d337f35e 9611+
4bf69007 9612+#endif /* _VSERVER_CVIRT_H */
09a55596
AM
9613diff -NurpP --minimal linux-4.9.135/include/linux/vserver/cvirt_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/cvirt_cmd.h
9614--- linux-4.9.135/include/linux/vserver/cvirt_cmd.h 1970-01-01 00:00:00.000000000 +0000
9615+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/cvirt_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
9616@@ -0,0 +1,13 @@
9617+#ifndef _VSERVER_CVIRT_CMD_H
9618+#define _VSERVER_CVIRT_CMD_H
d337f35e 9619+
d337f35e 9620+
4bf69007
AM
9621+#include <linux/compiler.h>
9622+#include <uapi/vserver/cvirt_cmd.h>
d337f35e 9623+
4bf69007
AM
9624+extern int vc_set_vhi_name(struct vx_info *, void __user *);
9625+extern int vc_get_vhi_name(struct vx_info *, void __user *);
d337f35e 9626+
4bf69007 9627+extern int vc_virt_stat(struct vx_info *, void __user *);
d337f35e 9628+
4bf69007 9629+#endif /* _VSERVER_CVIRT_CMD_H */
09a55596
AM
9630diff -NurpP --minimal linux-4.9.135/include/linux/vserver/cvirt_def.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/cvirt_def.h
9631--- linux-4.9.135/include/linux/vserver/cvirt_def.h 1970-01-01 00:00:00.000000000 +0000
9632+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/cvirt_def.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
9633@@ -0,0 +1,80 @@
9634+#ifndef _VSERVER_CVIRT_DEF_H
9635+#define _VSERVER_CVIRT_DEF_H
d337f35e 9636+
d33d7b00
AM
9637+#include <linux/jiffies.h>
9638+#include <linux/spinlock.h>
9639+#include <linux/wait.h>
9640+#include <linux/time.h>
9641+#include <asm/atomic.h>
d337f35e 9642+
d337f35e 9643+
d33d7b00
AM
9644+struct _vx_usage_stat {
9645+ uint64_t user;
9646+ uint64_t nice;
9647+ uint64_t system;
9648+ uint64_t softirq;
9649+ uint64_t irq;
9650+ uint64_t idle;
9651+ uint64_t iowait;
9652+};
d337f35e 9653+
d33d7b00
AM
9654+struct _vx_syslog {
9655+ wait_queue_head_t log_wait;
9656+ spinlock_t logbuf_lock; /* lock for the log buffer */
d337f35e 9657+
d33d7b00
AM
9658+ unsigned long log_start; /* next char to be read by syslog() */
9659+ unsigned long con_start; /* next char to be sent to consoles */
9660+ unsigned long log_end; /* most-recently-written-char + 1 */
9661+ unsigned long logged_chars; /* #chars since last read+clear operation */
d337f35e 9662+
d33d7b00
AM
9663+ char log_buf[1024];
9664+};
d337f35e 9665+
d337f35e 9666+
d33d7b00 9667+/* context sub struct */
d337f35e 9668+
d33d7b00
AM
9669+struct _vx_cvirt {
9670+ atomic_t nr_threads; /* number of current threads */
9671+ atomic_t nr_running; /* number of running threads */
9672+ atomic_t nr_uninterruptible; /* number of uninterruptible threads */
d337f35e 9673+
d33d7b00
AM
9674+ atomic_t nr_onhold; /* processes on hold */
9675+ uint32_t onhold_last; /* jiffies when put on hold */
d337f35e 9676+
cc23e853
AM
9677+ struct timespec64 bias_ts; /* time offset to the host */
9678+ struct timespec64 bias_idle;
9679+ struct timespec64 bias_uptime; /* context creation point */
d33d7b00 9680+ uint64_t bias_clock; /* offset in clock_t */
3bac966d 9681+
d33d7b00
AM
9682+ spinlock_t load_lock; /* lock for the load averages */
9683+ atomic_t load_updates; /* nr of load updates done so far */
9684+ uint32_t load_last; /* last time load was calculated */
9685+ uint32_t load[3]; /* load averages 1,5,15 */
d337f35e 9686+
d33d7b00 9687+ atomic_t total_forks; /* number of forks so far */
d337f35e 9688+
d33d7b00
AM
9689+ struct _vx_syslog syslog;
9690+};
d337f35e 9691+
d33d7b00
AM
9692+struct _vx_cvirt_pc {
9693+ struct _vx_usage_stat cpustat;
9694+};
3bac966d 9695+
d337f35e 9696+
d33d7b00 9697+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 9698+
d33d7b00 9699+static inline void __dump_vx_cvirt(struct _vx_cvirt *cvirt)
3bac966d 9700+{
d33d7b00
AM
9701+ printk("\t_vx_cvirt:\n");
9702+ printk("\t threads: %4d, %4d, %4d, %4d\n",
9703+ atomic_read(&cvirt->nr_threads),
9704+ atomic_read(&cvirt->nr_running),
9705+ atomic_read(&cvirt->nr_uninterruptible),
9706+ atomic_read(&cvirt->nr_onhold));
9707+ /* add rest here */
9708+ printk("\t total_forks = %d\n", atomic_read(&cvirt->total_forks));
3bac966d 9709+}
d337f35e 9710+
d33d7b00 9711+#endif
d337f35e 9712+
4bf69007 9713+#endif /* _VSERVER_CVIRT_DEF_H */
09a55596
AM
9714diff -NurpP --minimal linux-4.9.135/include/linux/vserver/debug.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/debug.h
9715--- linux-4.9.135/include/linux/vserver/debug.h 1970-01-01 00:00:00.000000000 +0000
9716+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/debug.h 2018-10-20 04:58:14.000000000 +0000
a4a22af8 9717@@ -0,0 +1,146 @@
4bf69007
AM
9718+#ifndef _VSERVER_DEBUG_H
9719+#define _VSERVER_DEBUG_H
d337f35e 9720+
d337f35e 9721+
dd5f3080 9722+#define VXD_CBIT(n, m) (vs_debug_ ## n & (1 << (m)))
9723+#define VXD_CMIN(n, m) (vs_debug_ ## n > (m))
9724+#define VXD_MASK(n, m) (vs_debug_ ## n & (m))
d337f35e 9725+
d33d7b00
AM
9726+#define VXD_DEV(d) (d), (d)->bd_inode->i_ino, \
9727+ imajor((d)->bd_inode), iminor((d)->bd_inode)
9728+#define VXF_DEV "%p[%lu,%d:%d]"
d337f35e 9729+
d33d7b00
AM
9730+#if defined(CONFIG_QUOTES_UTF8)
9731+#define VS_Q_LQM "\xc2\xbb"
9732+#define VS_Q_RQM "\xc2\xab"
9733+#elif defined(CONFIG_QUOTES_ASCII)
9734+#define VS_Q_LQM "\x27"
9735+#define VS_Q_RQM "\x27"
9736+#else
9737+#define VS_Q_LQM "\xbb"
9738+#define VS_Q_RQM "\xab"
9739+#endif
d337f35e 9740+
d33d7b00 9741+#define VS_Q(f) VS_Q_LQM f VS_Q_RQM
d337f35e
JR
9742+
9743+
d33d7b00
AM
9744+#define vxd_path(p) \
9745+ ({ static char _buffer[PATH_MAX]; \
9746+ d_path(p, _buffer, sizeof(_buffer)); })
d337f35e 9747+
d33d7b00
AM
9748+#define vxd_cond_path(n) \
9749+ ((n) ? vxd_path(&(n)->path) : "<null>" )
d337f35e 9750+
d337f35e 9751+
d33d7b00 9752+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 9753+
dd5f3080 9754+extern unsigned int vs_debug_switch;
9755+extern unsigned int vs_debug_xid;
9756+extern unsigned int vs_debug_nid;
9757+extern unsigned int vs_debug_tag;
9758+extern unsigned int vs_debug_net;
9759+extern unsigned int vs_debug_limit;
9760+extern unsigned int vs_debug_cres;
9761+extern unsigned int vs_debug_dlim;
9762+extern unsigned int vs_debug_quota;
9763+extern unsigned int vs_debug_cvirt;
9764+extern unsigned int vs_debug_space;
9765+extern unsigned int vs_debug_perm;
9766+extern unsigned int vs_debug_misc;
d337f35e 9767+
d337f35e 9768+
d33d7b00
AM
9769+#define VX_LOGLEVEL "vxD: "
9770+#define VX_PROC_FMT "%p: "
9771+#define VX_PROCESS current
d337f35e 9772+
d33d7b00
AM
9773+#define vxdprintk(c, f, x...) \
9774+ do { \
9775+ if (c) \
9776+ printk(VX_LOGLEVEL VX_PROC_FMT f "\n", \
9777+ VX_PROCESS , ##x); \
9778+ } while (0)
d337f35e 9779+
d33d7b00
AM
9780+#define vxlprintk(c, f, x...) \
9781+ do { \
9782+ if (c) \
9783+ printk(VX_LOGLEVEL f " @%s:%d\n", x); \
9784+ } while (0)
d337f35e 9785+
d33d7b00
AM
9786+#define vxfprintk(c, f, x...) \
9787+ do { \
9788+ if (c) \
9789+ printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
9790+ } while (0)
d337f35e 9791+
d337f35e 9792+
d33d7b00 9793+struct vx_info;
d337f35e 9794+
d33d7b00
AM
9795+void dump_vx_info(struct vx_info *, int);
9796+void dump_vx_info_inactive(int);
d337f35e 9797+
d33d7b00 9798+#else /* CONFIG_VSERVER_DEBUG */
d337f35e 9799+
dd5f3080 9800+#define vs_debug_switch 0
9801+#define vs_debug_xid 0
9802+#define vs_debug_nid 0
9803+#define vs_debug_tag 0
9804+#define vs_debug_net 0
9805+#define vs_debug_limit 0
9806+#define vs_debug_cres 0
9807+#define vs_debug_dlim 0
9808+#define vs_debug_quota 0
9809+#define vs_debug_cvirt 0
9810+#define vs_debug_space 0
9811+#define vs_debug_perm 0
9812+#define vs_debug_misc 0
d337f35e 9813+
d33d7b00
AM
9814+#define vxdprintk(x...) do { } while (0)
9815+#define vxlprintk(x...) do { } while (0)
9816+#define vxfprintk(x...) do { } while (0)
2380c486 9817+
d33d7b00 9818+#endif /* CONFIG_VSERVER_DEBUG */
2380c486 9819+
d337f35e 9820+
d33d7b00 9821+#ifdef CONFIG_VSERVER_WARN
d337f35e 9822+
d33d7b00
AM
9823+#define VX_WARNLEVEL KERN_WARNING "vxW: "
9824+#define VX_WARN_TASK "[" VS_Q("%s") ",%u:#%u|%u|%u] "
9825+#define VX_WARN_XID "[xid #%u] "
9826+#define VX_WARN_NID "[nid #%u] "
9827+#define VX_WARN_TAG "[tag #%u] "
d337f35e 9828+
d33d7b00
AM
9829+#define vxwprintk(c, f, x...) \
9830+ do { \
9831+ if (c) \
9832+ printk(VX_WARNLEVEL f "\n", ##x); \
9833+ } while (0)
d337f35e 9834+
d33d7b00 9835+#else /* CONFIG_VSERVER_WARN */
d337f35e 9836+
d33d7b00 9837+#define vxwprintk(x...) do { } while (0)
d337f35e 9838+
d33d7b00 9839+#endif /* CONFIG_VSERVER_WARN */
d337f35e 9840+
d33d7b00
AM
9841+#define vxwprintk_task(c, f, x...) \
9842+ vxwprintk(c, VX_WARN_TASK f, \
9843+ current->comm, current->pid, \
a4a22af8
AM
9844+ current->xid, current->nid, \
9845+ current->tag, ##x)
d33d7b00
AM
9846+#define vxwprintk_xid(c, f, x...) \
9847+ vxwprintk(c, VX_WARN_XID f, current->xid, x)
9848+#define vxwprintk_nid(c, f, x...) \
9849+ vxwprintk(c, VX_WARN_NID f, current->nid, x)
9850+#define vxwprintk_tag(c, f, x...) \
9851+ vxwprintk(c, VX_WARN_TAG f, current->tag, x)
d337f35e 9852+
d33d7b00
AM
9853+#ifdef CONFIG_VSERVER_DEBUG
9854+#define vxd_assert_lock(l) assert_spin_locked(l)
9855+#define vxd_assert(c, f, x...) vxlprintk(!(c), \
9856+ "assertion [" f "] failed.", ##x, __FILE__, __LINE__)
9857+#else
9858+#define vxd_assert_lock(l) do { } while (0)
9859+#define vxd_assert(c, f, x...) do { } while (0)
9860+#endif
d337f35e 9861+
d337f35e 9862+
4bf69007 9863+#endif /* _VSERVER_DEBUG_H */
09a55596
AM
9864diff -NurpP --minimal linux-4.9.135/include/linux/vserver/debug_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/debug_cmd.h
9865--- linux-4.9.135/include/linux/vserver/debug_cmd.h 1970-01-01 00:00:00.000000000 +0000
9866+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/debug_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
9867@@ -0,0 +1,37 @@
9868+#ifndef _VSERVER_DEBUG_CMD_H
9869+#define _VSERVER_DEBUG_CMD_H
d337f35e 9870+
4bf69007 9871+#include <uapi/vserver/debug_cmd.h>
d337f35e
JR
9872+
9873+
d33d7b00 9874+#ifdef CONFIG_COMPAT
d337f35e 9875+
d33d7b00 9876+#include <asm/compat.h>
d337f35e 9877+
d33d7b00
AM
9878+struct vcmd_read_history_v0_x32 {
9879+ uint32_t index;
9880+ uint32_t count;
9881+ compat_uptr_t data_ptr;
3bac966d 9882+};
d337f35e 9883+
d33d7b00
AM
9884+struct vcmd_read_monitor_v0_x32 {
9885+ uint32_t index;
9886+ uint32_t count;
9887+ compat_uptr_t data_ptr;
3bac966d 9888+};
d337f35e 9889+
d33d7b00 9890+#endif /* CONFIG_COMPAT */
d337f35e 9891+
d33d7b00 9892+extern int vc_dump_history(uint32_t);
d337f35e 9893+
d33d7b00
AM
9894+extern int vc_read_history(uint32_t, void __user *);
9895+extern int vc_read_monitor(uint32_t, void __user *);
d337f35e 9896+
d33d7b00 9897+#ifdef CONFIG_COMPAT
d337f35e 9898+
d33d7b00
AM
9899+extern int vc_read_history_x32(uint32_t, void __user *);
9900+extern int vc_read_monitor_x32(uint32_t, void __user *);
d337f35e 9901+
d33d7b00 9902+#endif /* CONFIG_COMPAT */
d337f35e 9903+
4bf69007 9904+#endif /* _VSERVER_DEBUG_CMD_H */
09a55596
AM
9905diff -NurpP --minimal linux-4.9.135/include/linux/vserver/device.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/device.h
9906--- linux-4.9.135/include/linux/vserver/device.h 1970-01-01 00:00:00.000000000 +0000
9907+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/device.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
9908@@ -0,0 +1,9 @@
9909+#ifndef _VSERVER_DEVICE_H
9910+#define _VSERVER_DEVICE_H
d337f35e 9911+
d337f35e 9912+
4bf69007 9913+#include <uapi/vserver/device.h>
d337f35e 9914+
4bf69007 9915+#else /* _VSERVER_DEVICE_H */
d33d7b00 9916+#warning duplicate inclusion
4bf69007 9917+#endif /* _VSERVER_DEVICE_H */
09a55596
AM
9918diff -NurpP --minimal linux-4.9.135/include/linux/vserver/device_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/device_cmd.h
9919--- linux-4.9.135/include/linux/vserver/device_cmd.h 1970-01-01 00:00:00.000000000 +0000
9920+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/device_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
9921@@ -0,0 +1,31 @@
9922+#ifndef _VSERVER_DEVICE_CMD_H
9923+#define _VSERVER_DEVICE_CMD_H
d337f35e 9924+
4bf69007 9925+#include <uapi/vserver/device_cmd.h>
d337f35e 9926+
d337f35e 9927+
d33d7b00 9928+#ifdef CONFIG_COMPAT
d337f35e 9929+
d33d7b00 9930+#include <asm/compat.h>
3bac966d 9931+
d33d7b00
AM
9932+struct vcmd_set_mapping_v0_x32 {
9933+ compat_uptr_t device_ptr;
9934+ compat_uptr_t target_ptr;
9935+ uint32_t flags;
d337f35e
JR
9936+};
9937+
d33d7b00 9938+#endif /* CONFIG_COMPAT */
d337f35e 9939+
d33d7b00 9940+#include <linux/compiler.h>
d337f35e 9941+
d33d7b00
AM
9942+extern int vc_set_mapping(struct vx_info *, void __user *);
9943+extern int vc_unset_mapping(struct vx_info *, void __user *);
d337f35e 9944+
d33d7b00 9945+#ifdef CONFIG_COMPAT
d337f35e 9946+
d33d7b00
AM
9947+extern int vc_set_mapping_x32(struct vx_info *, void __user *);
9948+extern int vc_unset_mapping_x32(struct vx_info *, void __user *);
d337f35e 9949+
d33d7b00 9950+#endif /* CONFIG_COMPAT */
d337f35e 9951+
4bf69007 9952+#endif /* _VSERVER_DEVICE_CMD_H */
09a55596
AM
9953diff -NurpP --minimal linux-4.9.135/include/linux/vserver/device_def.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/device_def.h
9954--- linux-4.9.135/include/linux/vserver/device_def.h 1970-01-01 00:00:00.000000000 +0000
9955+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/device_def.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00 9956@@ -0,0 +1,17 @@
4bf69007
AM
9957+#ifndef _VSERVER_DEVICE_DEF_H
9958+#define _VSERVER_DEVICE_DEF_H
d337f35e 9959+
d33d7b00 9960+#include <linux/types.h>
d337f35e 9961+
d33d7b00
AM
9962+struct vx_dmap_target {
9963+ dev_t target;
9964+ uint32_t flags;
9965+};
d337f35e 9966+
d33d7b00
AM
9967+struct _vx_device {
9968+#ifdef CONFIG_VSERVER_DEVICE
9969+ struct vx_dmap_target targets[2];
9970+#endif
9971+};
d337f35e 9972+
4bf69007 9973+#endif /* _VSERVER_DEVICE_DEF_H */
09a55596
AM
9974diff -NurpP --minimal linux-4.9.135/include/linux/vserver/dlimit.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/dlimit.h
9975--- linux-4.9.135/include/linux/vserver/dlimit.h 1970-01-01 00:00:00.000000000 +0000
9976+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/dlimit.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00 9977@@ -0,0 +1,54 @@
4bf69007
AM
9978+#ifndef _VSERVER_DLIMIT_H
9979+#define _VSERVER_DLIMIT_H
d337f35e 9980+
d33d7b00 9981+#include "switch.h"
3bac966d 9982+
d337f35e 9983+
3bac966d 9984+#ifdef __KERNEL__
d337f35e 9985+
d33d7b00 9986+/* keep in sync with CDLIM_INFINITY */
d337f35e 9987+
d33d7b00 9988+#define DLIM_INFINITY (~0ULL)
d337f35e 9989+
d33d7b00
AM
9990+#include <linux/spinlock.h>
9991+#include <linux/rcupdate.h>
d337f35e 9992+
d33d7b00 9993+struct super_block;
d337f35e 9994+
d33d7b00
AM
9995+struct dl_info {
9996+ struct hlist_node dl_hlist; /* linked list of contexts */
9997+ struct rcu_head dl_rcu; /* the rcu head */
61333608 9998+ vtag_t dl_tag; /* context tag */
d33d7b00
AM
9999+ atomic_t dl_usecnt; /* usage count */
10000+ atomic_t dl_refcnt; /* reference count */
d337f35e 10001+
d33d7b00 10002+ struct super_block *dl_sb; /* associated superblock */
d337f35e 10003+
d33d7b00 10004+ spinlock_t dl_lock; /* protect the values */
d337f35e 10005+
d33d7b00
AM
10006+ unsigned long long dl_space_used; /* used space in bytes */
10007+ unsigned long long dl_space_total; /* maximum space in bytes */
10008+ unsigned long dl_inodes_used; /* used inodes */
10009+ unsigned long dl_inodes_total; /* maximum inodes */
d337f35e 10010+
d33d7b00
AM
10011+ unsigned int dl_nrlmult; /* non root limit mult */
10012+};
d337f35e 10013+
d33d7b00 10014+struct rcu_head;
d337f35e 10015+
d33d7b00
AM
10016+extern void rcu_free_dl_info(struct rcu_head *);
10017+extern void unhash_dl_info(struct dl_info *);
d337f35e 10018+
61333608 10019+extern struct dl_info *locate_dl_info(struct super_block *, vtag_t);
d337f35e 10020+
d337f35e 10021+
d33d7b00 10022+struct kstatfs;
d337f35e 10023+
d33d7b00 10024+extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
d337f35e 10025+
d33d7b00 10026+typedef uint64_t dlsize_t;
d337f35e 10027+
d33d7b00 10028+#endif /* __KERNEL__ */
4bf69007 10029+#else /* _VSERVER_DLIMIT_H */
d33d7b00 10030+#warning duplicate inclusion
4bf69007 10031+#endif /* _VSERVER_DLIMIT_H */
09a55596
AM
10032diff -NurpP --minimal linux-4.9.135/include/linux/vserver/dlimit_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/dlimit_cmd.h
10033--- linux-4.9.135/include/linux/vserver/dlimit_cmd.h 1970-01-01 00:00:00.000000000 +0000
10034+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/dlimit_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10035@@ -0,0 +1,46 @@
10036+#ifndef _VSERVER_DLIMIT_CMD_H
10037+#define _VSERVER_DLIMIT_CMD_H
d337f35e 10038+
4bf69007 10039+#include <uapi/vserver/dlimit_cmd.h>
d337f35e 10040+
d337f35e 10041+
4bf69007 10042+#ifdef CONFIG_COMPAT
d337f35e 10043+
4bf69007 10044+#include <asm/compat.h>
2380c486 10045+
4bf69007
AM
10046+struct vcmd_ctx_dlimit_base_v0_x32 {
10047+ compat_uptr_t name_ptr;
d33d7b00
AM
10048+ uint32_t flags;
10049+};
adc1caaa 10050+
4bf69007
AM
10051+struct vcmd_ctx_dlimit_v0_x32 {
10052+ compat_uptr_t name_ptr;
d33d7b00
AM
10053+ uint32_t space_used; /* used space in kbytes */
10054+ uint32_t space_total; /* maximum space in kbytes */
10055+ uint32_t inodes_used; /* used inodes */
10056+ uint32_t inodes_total; /* maximum inodes */
10057+ uint32_t reserved; /* reserved for root in % */
10058+ uint32_t flags;
10059+};
d337f35e 10060+
4bf69007 10061+#endif /* CONFIG_COMPAT */
d337f35e 10062+
4bf69007 10063+#include <linux/compiler.h>
d337f35e 10064+
4bf69007
AM
10065+extern int vc_add_dlimit(uint32_t, void __user *);
10066+extern int vc_rem_dlimit(uint32_t, void __user *);
d337f35e 10067+
4bf69007
AM
10068+extern int vc_set_dlimit(uint32_t, void __user *);
10069+extern int vc_get_dlimit(uint32_t, void __user *);
d337f35e 10070+
4bf69007 10071+#ifdef CONFIG_COMPAT
d337f35e 10072+
4bf69007
AM
10073+extern int vc_add_dlimit_x32(uint32_t, void __user *);
10074+extern int vc_rem_dlimit_x32(uint32_t, void __user *);
2380c486 10075+
d33d7b00
AM
10076+extern int vc_set_dlimit_x32(uint32_t, void __user *);
10077+extern int vc_get_dlimit_x32(uint32_t, void __user *);
d337f35e 10078+
d33d7b00 10079+#endif /* CONFIG_COMPAT */
d337f35e 10080+
4bf69007 10081+#endif /* _VSERVER_DLIMIT_CMD_H */
09a55596
AM
10082diff -NurpP --minimal linux-4.9.135/include/linux/vserver/global.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/global.h
10083--- linux-4.9.135/include/linux/vserver/global.h 1970-01-01 00:00:00.000000000 +0000
10084+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/global.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 10085@@ -0,0 +1,20 @@
4bf69007
AM
10086+#ifndef _VSERVER_GLOBAL_H
10087+#define _VSERVER_GLOBAL_H
d337f35e 10088+
d337f35e 10089+
d33d7b00
AM
10090+extern atomic_t vx_global_ctotal;
10091+extern atomic_t vx_global_cactive;
d337f35e 10092+
d33d7b00
AM
10093+extern atomic_t nx_global_ctotal;
10094+extern atomic_t nx_global_cactive;
d337f35e 10095+
d33d7b00
AM
10096+extern atomic_t vs_global_nsproxy;
10097+extern atomic_t vs_global_fs;
10098+extern atomic_t vs_global_mnt_ns;
10099+extern atomic_t vs_global_uts_ns;
cc23e853 10100+extern atomic_t vs_global_ipc_ns;
d33d7b00
AM
10101+extern atomic_t vs_global_user_ns;
10102+extern atomic_t vs_global_pid_ns;
d337f35e
JR
10103+
10104+
4bf69007 10105+#endif /* _VSERVER_GLOBAL_H */
09a55596
AM
10106diff -NurpP --minimal linux-4.9.135/include/linux/vserver/history.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/history.h
10107--- linux-4.9.135/include/linux/vserver/history.h 1970-01-01 00:00:00.000000000 +0000
10108+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/history.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00 10109@@ -0,0 +1,197 @@
4bf69007
AM
10110+#ifndef _VSERVER_HISTORY_H
10111+#define _VSERVER_HISTORY_H
d337f35e 10112+
d337f35e 10113+
d33d7b00
AM
10114+enum {
10115+ VXH_UNUSED = 0,
10116+ VXH_THROW_OOPS = 1,
d337f35e 10117+
d33d7b00
AM
10118+ VXH_GET_VX_INFO,
10119+ VXH_PUT_VX_INFO,
10120+ VXH_INIT_VX_INFO,
10121+ VXH_SET_VX_INFO,
10122+ VXH_CLR_VX_INFO,
10123+ VXH_CLAIM_VX_INFO,
10124+ VXH_RELEASE_VX_INFO,
10125+ VXH_ALLOC_VX_INFO,
10126+ VXH_DEALLOC_VX_INFO,
10127+ VXH_HASH_VX_INFO,
10128+ VXH_UNHASH_VX_INFO,
10129+ VXH_LOC_VX_INFO,
10130+ VXH_LOOKUP_VX_INFO,
10131+ VXH_CREATE_VX_INFO,
10132+};
d337f35e 10133+
d33d7b00
AM
10134+struct _vxhe_vxi {
10135+ struct vx_info *ptr;
10136+ unsigned xid;
10137+ unsigned usecnt;
10138+ unsigned tasks;
10139+};
d337f35e 10140+
d33d7b00
AM
10141+struct _vxhe_set_clr {
10142+ void *data;
10143+};
d337f35e 10144+
d33d7b00
AM
10145+struct _vxhe_loc_lookup {
10146+ unsigned arg;
10147+};
d337f35e 10148+
d33d7b00
AM
10149+struct _vx_hist_entry {
10150+ void *loc;
10151+ unsigned short seq;
10152+ unsigned short type;
10153+ struct _vxhe_vxi vxi;
10154+ union {
10155+ struct _vxhe_set_clr sc;
10156+ struct _vxhe_loc_lookup ll;
10157+ };
3bac966d 10158+};
d337f35e 10159+
d33d7b00 10160+#ifdef CONFIG_VSERVER_HISTORY
d337f35e 10161+
d33d7b00 10162+extern unsigned volatile int vxh_active;
d337f35e 10163+
d33d7b00 10164+struct _vx_hist_entry *vxh_advance(void *loc);
d337f35e 10165+
d337f35e 10166+
d33d7b00
AM
10167+static inline
10168+void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
10169+{
10170+ entry->vxi.ptr = vxi;
10171+ if (vxi) {
10172+ entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
10173+ entry->vxi.tasks = atomic_read(&vxi->vx_tasks);
10174+ entry->vxi.xid = vxi->vx_id;
10175+ }
10176+}
d337f35e 10177+
d337f35e 10178+
d33d7b00 10179+#define __HERE__ current_text_addr()
d337f35e 10180+
d33d7b00
AM
10181+#define __VXH_BODY(__type, __data, __here) \
10182+ struct _vx_hist_entry *entry; \
10183+ \
10184+ preempt_disable(); \
10185+ entry = vxh_advance(__here); \
10186+ __data; \
10187+ entry->type = __type; \
10188+ preempt_enable();
d337f35e 10189+
d337f35e 10190+
d33d7b00 10191+ /* pass vxi only */
d337f35e 10192+
d33d7b00
AM
10193+#define __VXH_SMPL \
10194+ __vxh_copy_vxi(entry, vxi)
d337f35e 10195+
d33d7b00
AM
10196+static inline
10197+void __vxh_smpl(struct vx_info *vxi, int __type, void *__here)
10198+{
10199+ __VXH_BODY(__type, __VXH_SMPL, __here)
10200+}
d337f35e 10201+
d33d7b00 10202+ /* pass vxi and data (void *) */
d337f35e 10203+
d33d7b00
AM
10204+#define __VXH_DATA \
10205+ __vxh_copy_vxi(entry, vxi); \
10206+ entry->sc.data = data
d337f35e 10207+
d33d7b00
AM
10208+static inline
10209+void __vxh_data(struct vx_info *vxi, void *data,
10210+ int __type, void *__here)
3bac966d 10211+{
d33d7b00 10212+ __VXH_BODY(__type, __VXH_DATA, __here)
3bac966d 10213+}
d337f35e 10214+
d33d7b00 10215+ /* pass vxi and arg (long) */
d337f35e 10216+
d33d7b00
AM
10217+#define __VXH_LONG \
10218+ __vxh_copy_vxi(entry, vxi); \
10219+ entry->ll.arg = arg
d337f35e 10220+
d33d7b00
AM
10221+static inline
10222+void __vxh_long(struct vx_info *vxi, long arg,
10223+ int __type, void *__here)
10224+{
10225+ __VXH_BODY(__type, __VXH_LONG, __here)
10226+}
d337f35e 10227+
d337f35e 10228+
d33d7b00
AM
10229+static inline
10230+void __vxh_throw_oops(void *__here)
10231+{
10232+ __VXH_BODY(VXH_THROW_OOPS, {}, __here);
10233+ /* prevent further acquisition */
10234+ vxh_active = 0;
10235+}
d337f35e 10236+
d337f35e 10237+
d33d7b00 10238+#define vxh_throw_oops() __vxh_throw_oops(__HERE__);
d337f35e 10239+
d33d7b00
AM
10240+#define __vxh_get_vx_info(v, h) __vxh_smpl(v, VXH_GET_VX_INFO, h);
10241+#define __vxh_put_vx_info(v, h) __vxh_smpl(v, VXH_PUT_VX_INFO, h);
d337f35e 10242+
d33d7b00
AM
10243+#define __vxh_init_vx_info(v, d, h) \
10244+ __vxh_data(v, d, VXH_INIT_VX_INFO, h);
10245+#define __vxh_set_vx_info(v, d, h) \
10246+ __vxh_data(v, d, VXH_SET_VX_INFO, h);
10247+#define __vxh_clr_vx_info(v, d, h) \
10248+ __vxh_data(v, d, VXH_CLR_VX_INFO, h);
d337f35e 10249+
d33d7b00
AM
10250+#define __vxh_claim_vx_info(v, d, h) \
10251+ __vxh_data(v, d, VXH_CLAIM_VX_INFO, h);
10252+#define __vxh_release_vx_info(v, d, h) \
10253+ __vxh_data(v, d, VXH_RELEASE_VX_INFO, h);
d337f35e 10254+
d33d7b00
AM
10255+#define vxh_alloc_vx_info(v) \
10256+ __vxh_smpl(v, VXH_ALLOC_VX_INFO, __HERE__);
10257+#define vxh_dealloc_vx_info(v) \
10258+ __vxh_smpl(v, VXH_DEALLOC_VX_INFO, __HERE__);
d337f35e 10259+
d33d7b00
AM
10260+#define vxh_hash_vx_info(v) \
10261+ __vxh_smpl(v, VXH_HASH_VX_INFO, __HERE__);
10262+#define vxh_unhash_vx_info(v) \
10263+ __vxh_smpl(v, VXH_UNHASH_VX_INFO, __HERE__);
d337f35e 10264+
d33d7b00
AM
10265+#define vxh_loc_vx_info(v, l) \
10266+ __vxh_long(v, l, VXH_LOC_VX_INFO, __HERE__);
10267+#define vxh_lookup_vx_info(v, l) \
10268+ __vxh_long(v, l, VXH_LOOKUP_VX_INFO, __HERE__);
10269+#define vxh_create_vx_info(v, l) \
10270+ __vxh_long(v, l, VXH_CREATE_VX_INFO, __HERE__);
d337f35e 10271+
d33d7b00 10272+extern void vxh_dump_history(void);
d337f35e 10273+
d337f35e 10274+
d33d7b00 10275+#else /* CONFIG_VSERVER_HISTORY */
2380c486 10276+
d33d7b00 10277+#define __HERE__ 0
d337f35e 10278+
d33d7b00 10279+#define vxh_throw_oops() do { } while (0)
d337f35e 10280+
d33d7b00
AM
10281+#define __vxh_get_vx_info(v, h) do { } while (0)
10282+#define __vxh_put_vx_info(v, h) do { } while (0)
d337f35e 10283+
d33d7b00
AM
10284+#define __vxh_init_vx_info(v, d, h) do { } while (0)
10285+#define __vxh_set_vx_info(v, d, h) do { } while (0)
10286+#define __vxh_clr_vx_info(v, d, h) do { } while (0)
d337f35e 10287+
d33d7b00
AM
10288+#define __vxh_claim_vx_info(v, d, h) do { } while (0)
10289+#define __vxh_release_vx_info(v, d, h) do { } while (0)
3bac966d 10290+
d33d7b00
AM
10291+#define vxh_alloc_vx_info(v) do { } while (0)
10292+#define vxh_dealloc_vx_info(v) do { } while (0)
d337f35e 10293+
d33d7b00
AM
10294+#define vxh_hash_vx_info(v) do { } while (0)
10295+#define vxh_unhash_vx_info(v) do { } while (0)
d337f35e 10296+
d33d7b00
AM
10297+#define vxh_loc_vx_info(v, l) do { } while (0)
10298+#define vxh_lookup_vx_info(v, l) do { } while (0)
10299+#define vxh_create_vx_info(v, l) do { } while (0)
d337f35e 10300+
d33d7b00 10301+#define vxh_dump_history() do { } while (0)
d337f35e 10302+
d337f35e 10303+
d33d7b00 10304+#endif /* CONFIG_VSERVER_HISTORY */
d337f35e 10305+
4bf69007 10306+#endif /* _VSERVER_HISTORY_H */
09a55596
AM
10307diff -NurpP --minimal linux-4.9.135/include/linux/vserver/inode.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/inode.h
10308--- linux-4.9.135/include/linux/vserver/inode.h 1970-01-01 00:00:00.000000000 +0000
10309+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/inode.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10310@@ -0,0 +1,19 @@
10311+#ifndef _VSERVER_INODE_H
10312+#define _VSERVER_INODE_H
d337f35e 10313+
4bf69007 10314+#include <uapi/vserver/inode.h>
d337f35e 10315+
d337f35e 10316+
d33d7b00
AM
10317+#ifdef CONFIG_VSERVER_PROC_SECURE
10318+#define IATTR_PROC_DEFAULT ( IATTR_ADMIN | IATTR_HIDE )
10319+#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
10320+#else
10321+#define IATTR_PROC_DEFAULT ( IATTR_ADMIN )
10322+#define IATTR_PROC_SYMLINK ( IATTR_ADMIN )
10323+#endif
d337f35e 10324+
d33d7b00 10325+#define vx_hide_check(c, m) (((m) & IATTR_HIDE) ? vx_check(c, m) : 1)
d337f35e 10326+
4bf69007 10327+#else /* _VSERVER_INODE_H */
3bac966d 10328+#warning duplicate inclusion
4bf69007 10329+#endif /* _VSERVER_INODE_H */
09a55596
AM
10330diff -NurpP --minimal linux-4.9.135/include/linux/vserver/inode_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/inode_cmd.h
10331--- linux-4.9.135/include/linux/vserver/inode_cmd.h 1970-01-01 00:00:00.000000000 +0000
10332+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/inode_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10333@@ -0,0 +1,36 @@
10334+#ifndef _VSERVER_INODE_CMD_H
10335+#define _VSERVER_INODE_CMD_H
d337f35e 10336+
4bf69007 10337+#include <uapi/vserver/inode_cmd.h>
d337f35e 10338+
d337f35e
JR
10339+
10340+
d33d7b00 10341+#ifdef CONFIG_COMPAT
d337f35e 10342+
d33d7b00 10343+#include <asm/compat.h>
d337f35e 10344+
d33d7b00
AM
10345+struct vcmd_ctx_iattr_v1_x32 {
10346+ compat_uptr_t name_ptr;
10347+ uint32_t tag;
10348+ uint32_t flags;
10349+ uint32_t mask;
10350+};
d337f35e 10351+
d33d7b00 10352+#endif /* CONFIG_COMPAT */
d337f35e 10353+
d33d7b00 10354+#include <linux/compiler.h>
d337f35e 10355+
d33d7b00
AM
10356+extern int vc_get_iattr(void __user *);
10357+extern int vc_set_iattr(void __user *);
d337f35e 10358+
d33d7b00
AM
10359+extern int vc_fget_iattr(uint32_t, void __user *);
10360+extern int vc_fset_iattr(uint32_t, void __user *);
d337f35e 10361+
d33d7b00 10362+#ifdef CONFIG_COMPAT
d337f35e 10363+
d33d7b00
AM
10364+extern int vc_get_iattr_x32(void __user *);
10365+extern int vc_set_iattr_x32(void __user *);
d337f35e 10366+
d33d7b00 10367+#endif /* CONFIG_COMPAT */
d337f35e 10368+
4bf69007 10369+#endif /* _VSERVER_INODE_CMD_H */
09a55596
AM
10370diff -NurpP --minimal linux-4.9.135/include/linux/vserver/limit.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/limit.h
10371--- linux-4.9.135/include/linux/vserver/limit.h 1970-01-01 00:00:00.000000000 +0000
10372+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/limit.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 10373@@ -0,0 +1,67 @@
4bf69007
AM
10374+#ifndef _VSERVER_LIMIT_H
10375+#define _VSERVER_LIMIT_H
d337f35e 10376+
4bf69007 10377+#include <uapi/vserver/limit.h>
d337f35e 10378+
d337f35e 10379+
d33d7b00 10380+#define VLIM_NOCHECK ((1L << VLIMIT_DENTRY) | (1L << RLIMIT_RSS))
d337f35e 10381+
d33d7b00 10382+/* keep in sync with CRLIM_INFINITY */
d337f35e 10383+
d33d7b00 10384+#define VLIM_INFINITY (~0ULL)
d337f35e 10385+
d33d7b00
AM
10386+#include <asm/atomic.h>
10387+#include <asm/resource.h>
d337f35e 10388+
d33d7b00
AM
10389+#ifndef RLIM_INFINITY
10390+#warning RLIM_INFINITY is undefined
10391+#endif
d337f35e 10392+
d33d7b00 10393+#define __rlim_val(l, r, v) ((l)->res[r].v)
d337f35e 10394+
d33d7b00
AM
10395+#define __rlim_soft(l, r) __rlim_val(l, r, soft)
10396+#define __rlim_hard(l, r) __rlim_val(l, r, hard)
d337f35e 10397+
d33d7b00
AM
10398+#define __rlim_rcur(l, r) __rlim_val(l, r, rcur)
10399+#define __rlim_rmin(l, r) __rlim_val(l, r, rmin)
10400+#define __rlim_rmax(l, r) __rlim_val(l, r, rmax)
d337f35e 10401+
d33d7b00
AM
10402+#define __rlim_lhit(l, r) __rlim_val(l, r, lhit)
10403+#define __rlim_hit(l, r) atomic_inc(&__rlim_lhit(l, r))
d337f35e 10404+
d33d7b00
AM
10405+typedef atomic_long_t rlim_atomic_t;
10406+typedef unsigned long rlim_t;
d337f35e 10407+
d33d7b00
AM
10408+#define __rlim_get(l, r) atomic_long_read(&__rlim_rcur(l, r))
10409+#define __rlim_set(l, r, v) atomic_long_set(&__rlim_rcur(l, r), v)
10410+#define __rlim_inc(l, r) atomic_long_inc(&__rlim_rcur(l, r))
10411+#define __rlim_dec(l, r) atomic_long_dec(&__rlim_rcur(l, r))
10412+#define __rlim_add(l, r, v) atomic_long_add(v, &__rlim_rcur(l, r))
10413+#define __rlim_sub(l, r, v) atomic_long_sub(v, &__rlim_rcur(l, r))
d337f35e 10414+
d337f35e 10415+
d33d7b00
AM
10416+#if (RLIM_INFINITY == VLIM_INFINITY)
10417+#define VX_VLIM(r) ((long long)(long)(r))
10418+#define VX_RLIM(v) ((rlim_t)(v))
3bac966d 10419+#else
d33d7b00
AM
10420+#define VX_VLIM(r) (((r) == RLIM_INFINITY) \
10421+ ? VLIM_INFINITY : (long long)(r))
10422+#define VX_RLIM(v) (((v) == VLIM_INFINITY) \
10423+ ? RLIM_INFINITY : (rlim_t)(v))
3bac966d 10424+#endif
d337f35e 10425+
d33d7b00 10426+struct sysinfo;
d337f35e 10427+
cc23e853 10428+#ifdef CONFIG_MEMCG
d33d7b00
AM
10429+void vx_vsi_meminfo(struct sysinfo *);
10430+void vx_vsi_swapinfo(struct sysinfo *);
10431+long vx_vsi_cached(struct sysinfo *);
cc23e853
AM
10432+#else /* !CONFIG_MEMCG */
10433+#define vx_vsi_meminfo(s) do { } while (0)
10434+#define vx_vsi_swapinfo(s) do { } while (0)
10435+#define vx_vsi_cached(s) (0L)
10436+#endif /* !CONFIG_MEMCG */
d337f35e 10437+
d33d7b00 10438+#define NUM_LIMITS 24
d337f35e 10439+
4bf69007 10440+#endif /* _VSERVER_LIMIT_H */
09a55596
AM
10441diff -NurpP --minimal linux-4.9.135/include/linux/vserver/limit_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/limit_cmd.h
10442--- linux-4.9.135/include/linux/vserver/limit_cmd.h 1970-01-01 00:00:00.000000000 +0000
10443+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/limit_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10444@@ -0,0 +1,35 @@
10445+#ifndef _VSERVER_LIMIT_CMD_H
10446+#define _VSERVER_LIMIT_CMD_H
d337f35e 10447+
4bf69007 10448+#include <uapi/vserver/limit_cmd.h>
d337f35e 10449+
d337f35e 10450+
d33d7b00 10451+#ifdef CONFIG_IA32_EMULATION
d337f35e 10452+
d33d7b00
AM
10453+struct vcmd_ctx_rlimit_v0_x32 {
10454+ uint32_t id;
10455+ uint64_t minimum;
10456+ uint64_t softlimit;
10457+ uint64_t maximum;
10458+} __attribute__ ((packed));
d337f35e 10459+
d33d7b00 10460+#endif /* CONFIG_IA32_EMULATION */
d337f35e 10461+
d33d7b00 10462+#include <linux/compiler.h>
d337f35e 10463+
d33d7b00
AM
10464+extern int vc_get_rlimit_mask(uint32_t, void __user *);
10465+extern int vc_get_rlimit(struct vx_info *, void __user *);
10466+extern int vc_set_rlimit(struct vx_info *, void __user *);
10467+extern int vc_reset_hits(struct vx_info *, void __user *);
10468+extern int vc_reset_minmax(struct vx_info *, void __user *);
d337f35e 10469+
d33d7b00 10470+extern int vc_rlimit_stat(struct vx_info *, void __user *);
d337f35e 10471+
d33d7b00 10472+#ifdef CONFIG_IA32_EMULATION
d337f35e 10473+
d33d7b00
AM
10474+extern int vc_get_rlimit_x32(struct vx_info *, void __user *);
10475+extern int vc_set_rlimit_x32(struct vx_info *, void __user *);
adc1caaa 10476+
d33d7b00 10477+#endif /* CONFIG_IA32_EMULATION */
d337f35e 10478+
4bf69007 10479+#endif /* _VSERVER_LIMIT_CMD_H */
09a55596
AM
10480diff -NurpP --minimal linux-4.9.135/include/linux/vserver/limit_def.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/limit_def.h
10481--- linux-4.9.135/include/linux/vserver/limit_def.h 1970-01-01 00:00:00.000000000 +0000
10482+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/limit_def.h 2018-10-20 04:58:14.000000000 +0000
d33d7b00 10483@@ -0,0 +1,47 @@
4bf69007
AM
10484+#ifndef _VSERVER_LIMIT_DEF_H
10485+#define _VSERVER_LIMIT_DEF_H
d337f35e 10486+
d33d7b00
AM
10487+#include <asm/atomic.h>
10488+#include <asm/resource.h>
d337f35e 10489+
d33d7b00 10490+#include "limit.h"
d337f35e 10491+
d337f35e 10492+
d33d7b00
AM
10493+struct _vx_res_limit {
10494+ rlim_t soft; /* Context soft limit */
10495+ rlim_t hard; /* Context hard limit */
d337f35e 10496+
d33d7b00
AM
10497+ rlim_atomic_t rcur; /* Current value */
10498+ rlim_t rmin; /* Context minimum */
10499+ rlim_t rmax; /* Context maximum */
d337f35e 10500+
d33d7b00
AM
10501+ atomic_t lhit; /* Limit hits */
10502+};
d337f35e 10503+
d33d7b00 10504+/* context sub struct */
2380c486 10505+
d33d7b00
AM
10506+struct _vx_limit {
10507+ struct _vx_res_limit res[NUM_LIMITS];
10508+};
adc1caaa 10509+
d33d7b00 10510+#ifdef CONFIG_VSERVER_DEBUG
adc1caaa 10511+
d33d7b00 10512+static inline void __dump_vx_limit(struct _vx_limit *limit)
3bac966d 10513+{
d33d7b00 10514+ int i;
d337f35e 10515+
d33d7b00
AM
10516+ printk("\t_vx_limit:");
10517+ for (i = 0; i < NUM_LIMITS; i++) {
10518+ printk("\t [%2d] = %8lu %8lu/%8lu, %8ld/%8ld, %8d\n",
10519+ i, (unsigned long)__rlim_get(limit, i),
10520+ (unsigned long)__rlim_rmin(limit, i),
10521+ (unsigned long)__rlim_rmax(limit, i),
10522+ (long)__rlim_soft(limit, i),
10523+ (long)__rlim_hard(limit, i),
10524+ atomic_read(&__rlim_lhit(limit, i)));
10525+ }
3bac966d 10526+}
d337f35e 10527+
d33d7b00 10528+#endif
d337f35e 10529+
4bf69007 10530+#endif /* _VSERVER_LIMIT_DEF_H */
09a55596
AM
10531diff -NurpP --minimal linux-4.9.135/include/linux/vserver/limit_int.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/limit_int.h
10532--- linux-4.9.135/include/linux/vserver/limit_int.h 1970-01-01 00:00:00.000000000 +0000
10533+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/limit_int.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10534@@ -0,0 +1,193 @@
10535+#ifndef _VSERVER_LIMIT_INT_H
10536+#define _VSERVER_LIMIT_INT_H
d337f35e 10537+
d33d7b00
AM
10538+#define VXD_RCRES_COND(r) VXD_CBIT(cres, r)
10539+#define VXD_RLIMIT_COND(r) VXD_CBIT(limit, r)
d337f35e 10540+
d33d7b00 10541+extern const char *vlimit_name[NUM_LIMITS];
2380c486 10542+
d33d7b00
AM
10543+static inline void __vx_acc_cres(struct vx_info *vxi,
10544+ int res, int dir, void *_data, char *_file, int _line)
10545+{
10546+ if (VXD_RCRES_COND(res))
10547+ vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5ld%s (%p)",
10548+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10549+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10550+ (dir > 0) ? "++" : "--", _data, _file, _line);
10551+ if (!vxi)
10552+ return;
d337f35e 10553+
d33d7b00
AM
10554+ if (dir > 0)
10555+ __rlim_inc(&vxi->limit, res);
10556+ else
10557+ __rlim_dec(&vxi->limit, res);
10558+}
d337f35e 10559+
d33d7b00
AM
10560+static inline void __vx_add_cres(struct vx_info *vxi,
10561+ int res, int amount, void *_data, char *_file, int _line)
10562+{
10563+ if (VXD_RCRES_COND(res))
10564+ vxlprintk(1, "vx_add_cres[%5d,%s,%2d]: %5ld += %5d (%p)",
10565+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10566+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10567+ amount, _data, _file, _line);
10568+ if (amount == 0)
10569+ return;
10570+ if (!vxi)
10571+ return;
10572+ __rlim_add(&vxi->limit, res, amount);
10573+}
d337f35e 10574+
3bac966d 10575+static inline
d33d7b00 10576+int __vx_cres_adjust_max(struct _vx_limit *limit, int res, rlim_t value)
3bac966d 10577+{
d33d7b00 10578+ int cond = (value > __rlim_rmax(limit, res));
d337f35e 10579+
d33d7b00
AM
10580+ if (cond)
10581+ __rlim_rmax(limit, res) = value;
10582+ return cond;
3bac966d 10583+}
d337f35e 10584+
3bac966d 10585+static inline
d33d7b00 10586+int __vx_cres_adjust_min(struct _vx_limit *limit, int res, rlim_t value)
3bac966d 10587+{
d33d7b00 10588+ int cond = (value < __rlim_rmin(limit, res));
d337f35e 10589+
d33d7b00
AM
10590+ if (cond)
10591+ __rlim_rmin(limit, res) = value;
10592+ return cond;
3bac966d 10593+}
d337f35e 10594+
3bac966d 10595+static inline
d33d7b00 10596+void __vx_cres_fixup(struct _vx_limit *limit, int res, rlim_t value)
3bac966d 10597+{
d33d7b00
AM
10598+ if (!__vx_cres_adjust_max(limit, res, value))
10599+ __vx_cres_adjust_min(limit, res, value);
3bac966d 10600+}
d337f35e 10601+
2380c486 10602+
d33d7b00
AM
10603+/* return values:
10604+ +1 ... no limit hit
10605+ -1 ... over soft limit
10606+ 0 ... over hard limit */
d337f35e 10607+
d33d7b00
AM
10608+static inline int __vx_cres_avail(struct vx_info *vxi,
10609+ int res, int num, char *_file, int _line)
3bac966d 10610+{
d33d7b00
AM
10611+ struct _vx_limit *limit;
10612+ rlim_t value;
d337f35e 10613+
d33d7b00
AM
10614+ if (VXD_RLIMIT_COND(res))
10615+ vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld/%5ld > %5ld + %5d",
10616+ (vxi ? vxi->vx_id : -1), vlimit_name[res], res,
10617+ (vxi ? (long)__rlim_soft(&vxi->limit, res) : -1),
10618+ (vxi ? (long)__rlim_hard(&vxi->limit, res) : -1),
10619+ (vxi ? (long)__rlim_get(&vxi->limit, res) : 0),
10620+ num, _file, _line);
10621+ if (!vxi)
3bac966d 10622+ return 1;
d337f35e 10623+
d33d7b00
AM
10624+ limit = &vxi->limit;
10625+ value = __rlim_get(limit, res);
d337f35e 10626+
d33d7b00
AM
10627+ if (!__vx_cres_adjust_max(limit, res, value))
10628+ __vx_cres_adjust_min(limit, res, value);
d337f35e 10629+
d33d7b00 10630+ if (num == 0)
3bac966d 10631+ return 1;
d337f35e 10632+
d33d7b00
AM
10633+ if (__rlim_soft(limit, res) == RLIM_INFINITY)
10634+ return -1;
10635+ if (value + num <= __rlim_soft(limit, res))
10636+ return -1;
d337f35e 10637+
d33d7b00 10638+ if (__rlim_hard(limit, res) == RLIM_INFINITY)
3bac966d 10639+ return 1;
d33d7b00 10640+ if (value + num <= __rlim_hard(limit, res))
3bac966d 10641+ return 1;
d33d7b00
AM
10642+
10643+ __rlim_hit(limit, res);
3bac966d
AM
10644+ return 0;
10645+}
d337f35e 10646+
d337f35e 10647+
d33d7b00 10648+static const int VLA_RSS[] = { RLIMIT_RSS, VLIMIT_ANON, VLIMIT_MAPPED, 0 };
d337f35e 10649+
3bac966d 10650+static inline
d33d7b00 10651+rlim_t __vx_cres_array_sum(struct _vx_limit *limit, const int *array)
3bac966d 10652+{
d33d7b00
AM
10653+ rlim_t value, sum = 0;
10654+ int res;
d337f35e 10655+
d33d7b00
AM
10656+ while ((res = *array++)) {
10657+ value = __rlim_get(limit, res);
10658+ __vx_cres_fixup(limit, res, value);
10659+ sum += value;
10660+ }
10661+ return sum;
3bac966d 10662+}
d337f35e 10663+
3bac966d 10664+static inline
d33d7b00 10665+rlim_t __vx_cres_array_fixup(struct _vx_limit *limit, const int *array)
3bac966d 10666+{
d33d7b00
AM
10667+ rlim_t value = __vx_cres_array_sum(limit, array + 1);
10668+ int res = *array;
d337f35e 10669+
d33d7b00
AM
10670+ if (value == __rlim_get(limit, res))
10671+ return value;
10672+
10673+ __rlim_set(limit, res, value);
10674+ /* now adjust min/max */
10675+ if (!__vx_cres_adjust_max(limit, res, value))
10676+ __vx_cres_adjust_min(limit, res, value);
10677+
10678+ return value;
3bac966d 10679+}
d337f35e 10680+
d33d7b00
AM
10681+static inline int __vx_cres_array_avail(struct vx_info *vxi,
10682+ const int *array, int num, char *_file, int _line)
3bac966d 10683+{
d33d7b00
AM
10684+ struct _vx_limit *limit;
10685+ rlim_t value = 0;
10686+ int res;
10687+
10688+ if (num == 0)
3bac966d 10689+ return 1;
d33d7b00 10690+ if (!vxi)
3bac966d 10691+ return 1;
d337f35e 10692+
d33d7b00
AM
10693+ limit = &vxi->limit;
10694+ res = *array;
10695+ value = __vx_cres_array_sum(limit, array + 1);
d337f35e 10696+
d33d7b00
AM
10697+ __rlim_set(limit, res, value);
10698+ __vx_cres_fixup(limit, res, value);
10699+
10700+ return __vx_cres_avail(vxi, res, num, _file, _line);
3bac966d 10701+}
d337f35e 10702+
d337f35e 10703+
d33d7b00 10704+static inline void vx_limit_fixup(struct _vx_limit *limit, int id)
3bac966d 10705+{
d33d7b00
AM
10706+ rlim_t value;
10707+ int res;
d337f35e 10708+
d33d7b00
AM
10709+ /* complex resources first */
10710+ if ((id < 0) || (id == RLIMIT_RSS))
10711+ __vx_cres_array_fixup(limit, VLA_RSS);
d337f35e 10712+
d33d7b00
AM
10713+ for (res = 0; res < NUM_LIMITS; res++) {
10714+ if ((id > 0) && (res != id))
10715+ continue;
10716+
10717+ value = __rlim_get(limit, res);
10718+ __vx_cres_fixup(limit, res, value);
10719+
10720+ /* not supposed to happen, maybe warn? */
10721+ if (__rlim_rmax(limit, res) > __rlim_hard(limit, res))
10722+ __rlim_rmax(limit, res) = __rlim_hard(limit, res);
10723+ }
3bac966d 10724+}
d337f35e
JR
10725+
10726+
4bf69007 10727+#endif /* _VSERVER_LIMIT_INT_H */
09a55596
AM
10728diff -NurpP --minimal linux-4.9.135/include/linux/vserver/monitor.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/monitor.h
10729--- linux-4.9.135/include/linux/vserver/monitor.h 1970-01-01 00:00:00.000000000 +0000
10730+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/monitor.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10731@@ -0,0 +1,6 @@
10732+#ifndef _VSERVER_MONITOR_H
10733+#define _VSERVER_MONITOR_H
d337f35e 10734+
4bf69007 10735+#include <uapi/vserver/monitor.h>
d337f35e 10736+
4bf69007 10737+#endif /* _VSERVER_MONITOR_H */
09a55596
AM
10738diff -NurpP --minimal linux-4.9.135/include/linux/vserver/network.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/network.h
10739--- linux-4.9.135/include/linux/vserver/network.h 1970-01-01 00:00:00.000000000 +0000
10740+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/network.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10741@@ -0,0 +1,76 @@
10742+#ifndef _VSERVER_NETWORK_H
10743+#define _VSERVER_NETWORK_H
d337f35e 10744+
d337f35e 10745+
4bf69007
AM
10746+#include <linux/list.h>
10747+#include <linux/spinlock.h>
10748+#include <linux/rcupdate.h>
10749+#include <linux/in.h>
10750+#include <linux/in6.h>
10751+#include <asm/atomic.h>
10752+#include <uapi/vserver/network.h>
d337f35e 10753+
4bf69007
AM
10754+struct nx_addr_v4 {
10755+ struct nx_addr_v4 *next;
10756+ struct in_addr ip[2];
10757+ struct in_addr mask;
10758+ uint16_t type;
10759+ uint16_t flags;
10760+};
d337f35e 10761+
4bf69007
AM
10762+struct nx_addr_v6 {
10763+ struct nx_addr_v6 *next;
10764+ struct in6_addr ip;
10765+ struct in6_addr mask;
10766+ uint32_t prefix;
10767+ uint16_t type;
10768+ uint16_t flags;
10769+};
d337f35e 10770+
4bf69007
AM
10771+struct nx_info {
10772+ struct hlist_node nx_hlist; /* linked list of nxinfos */
61333608 10773+ vnid_t nx_id; /* vnet id */
4bf69007
AM
10774+ atomic_t nx_usecnt; /* usage count */
10775+ atomic_t nx_tasks; /* tasks count */
10776+ int nx_state; /* context state */
d337f35e 10777+
4bf69007
AM
10778+ uint64_t nx_flags; /* network flag word */
10779+ uint64_t nx_ncaps; /* network capabilities */
d337f35e 10780+
4bf69007
AM
10781+ spinlock_t addr_lock; /* protect address changes */
10782+ struct in_addr v4_lback; /* Loopback address */
10783+ struct in_addr v4_bcast; /* Broadcast address */
10784+ struct nx_addr_v4 v4; /* First/Single ipv4 address */
10785+#ifdef CONFIG_IPV6
10786+ struct nx_addr_v6 v6; /* First/Single ipv6 address */
10787+#endif
10788+ char nx_name[65]; /* network context name */
d33d7b00 10789+};
d337f35e 10790+
d337f35e 10791+
4bf69007 10792+/* status flags */
d337f35e 10793+
4bf69007
AM
10794+#define NXS_HASHED 0x0001
10795+#define NXS_SHUTDOWN 0x0100
10796+#define NXS_RELEASED 0x8000
d337f35e 10797+
4bf69007 10798+extern struct nx_info *lookup_nx_info(int);
d337f35e 10799+
4bf69007 10800+extern int get_nid_list(int, unsigned int *, int);
61333608 10801+extern int nid_is_hashed(vnid_t);
d337f35e 10802+
4bf69007 10803+extern int nx_migrate_task(struct task_struct *, struct nx_info *);
d337f35e 10804+
4bf69007 10805+extern long vs_net_change(struct nx_info *, unsigned int);
d337f35e 10806+
4bf69007 10807+struct sock;
d337f35e 10808+
d337f35e 10809+
4bf69007
AM
10810+#define NX_IPV4(n) ((n)->v4.type != NXA_TYPE_NONE)
10811+#ifdef CONFIG_IPV6
10812+#define NX_IPV6(n) ((n)->v6.type != NXA_TYPE_NONE)
10813+#else
10814+#define NX_IPV6(n) (0)
10815+#endif
d337f35e 10816+
4bf69007 10817+#endif /* _VSERVER_NETWORK_H */
09a55596
AM
10818diff -NurpP --minimal linux-4.9.135/include/linux/vserver/network_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/network_cmd.h
10819--- linux-4.9.135/include/linux/vserver/network_cmd.h 1970-01-01 00:00:00.000000000 +0000
10820+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/network_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10821@@ -0,0 +1,37 @@
10822+#ifndef _VSERVER_NETWORK_CMD_H
10823+#define _VSERVER_NETWORK_CMD_H
d337f35e 10824+
4bf69007 10825+#include <uapi/vserver/network_cmd.h>
d337f35e 10826+
4bf69007 10827+extern int vc_task_nid(uint32_t);
d337f35e 10828+
4bf69007 10829+extern int vc_nx_info(struct nx_info *, void __user *);
d337f35e 10830+
4bf69007
AM
10831+extern int vc_net_create(uint32_t, void __user *);
10832+extern int vc_net_migrate(struct nx_info *, void __user *);
d337f35e 10833+
4bf69007
AM
10834+extern int vc_net_add(struct nx_info *, void __user *);
10835+extern int vc_net_remove(struct nx_info *, void __user *);
d337f35e 10836+
4bf69007
AM
10837+extern int vc_net_add_ipv4_v1(struct nx_info *, void __user *);
10838+extern int vc_net_add_ipv4(struct nx_info *, void __user *);
d337f35e 10839+
4bf69007
AM
10840+extern int vc_net_rem_ipv4_v1(struct nx_info *, void __user *);
10841+extern int vc_net_rem_ipv4(struct nx_info *, void __user *);
d337f35e 10842+
4bf69007
AM
10843+extern int vc_net_add_ipv6(struct nx_info *, void __user *);
10844+extern int vc_net_remove_ipv6(struct nx_info *, void __user *);
d337f35e 10845+
4bf69007
AM
10846+extern int vc_add_match_ipv4(struct nx_info *, void __user *);
10847+extern int vc_get_match_ipv4(struct nx_info *, void __user *);
d33d7b00 10848+
4bf69007
AM
10849+extern int vc_add_match_ipv6(struct nx_info *, void __user *);
10850+extern int vc_get_match_ipv6(struct nx_info *, void __user *);
d337f35e 10851+
4bf69007
AM
10852+extern int vc_get_nflags(struct nx_info *, void __user *);
10853+extern int vc_set_nflags(struct nx_info *, void __user *);
d337f35e 10854+
4bf69007
AM
10855+extern int vc_get_ncaps(struct nx_info *, void __user *);
10856+extern int vc_set_ncaps(struct nx_info *, void __user *);
d337f35e 10857+
4bf69007 10858+#endif /* _VSERVER_CONTEXT_CMD_H */
09a55596
AM
10859diff -NurpP --minimal linux-4.9.135/include/linux/vserver/percpu.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/percpu.h
10860--- linux-4.9.135/include/linux/vserver/percpu.h 1970-01-01 00:00:00.000000000 +0000
10861+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/percpu.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10862@@ -0,0 +1,14 @@
10863+#ifndef _VSERVER_PERCPU_H
10864+#define _VSERVER_PERCPU_H
d337f35e 10865+
4bf69007
AM
10866+#include "cvirt_def.h"
10867+#include "sched_def.h"
d337f35e 10868+
4bf69007
AM
10869+struct _vx_percpu {
10870+ struct _vx_cvirt_pc cvirt;
10871+ struct _vx_sched_pc sched;
10872+};
9795bf04 10873+
4bf69007 10874+#define PERCPU_PERCTX (sizeof(struct _vx_percpu))
d337f35e 10875+
4bf69007 10876+#endif /* _VSERVER_PERCPU_H */
09a55596
AM
10877diff -NurpP --minimal linux-4.9.135/include/linux/vserver/pid.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/pid.h
10878--- linux-4.9.135/include/linux/vserver/pid.h 1970-01-01 00:00:00.000000000 +0000
10879+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/pid.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10880@@ -0,0 +1,51 @@
10881+#ifndef _VSERVER_PID_H
10882+#define _VSERVER_PID_H
d337f35e 10883+
4bf69007 10884+/* pid faking stuff */
d337f35e 10885+
4bf69007
AM
10886+#define vx_info_map_pid(v, p) \
10887+ __vx_info_map_pid((v), (p), __func__, __FILE__, __LINE__)
10888+#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p)
10889+#define vx_map_pid(p) vx_info_map_pid(current_vx_info(), p)
10890+#define vx_map_tgid(p) vx_map_pid(p)
d337f35e 10891+
4bf69007
AM
10892+static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
10893+ const char *func, const char *file, int line)
10894+{
10895+ if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
10896+ vxfprintk(VXD_CBIT(cvirt, 2),
10897+ "vx_map_tgid: %p/%llx: %d -> %d",
10898+ vxi, (long long)vxi->vx_flags, pid,
10899+ (pid && pid == vxi->vx_initpid) ? 1 : pid,
10900+ func, file, line);
10901+ if (pid == 0)
10902+ return 0;
10903+ if (pid == vxi->vx_initpid)
10904+ return 1;
10905+ }
10906+ return pid;
10907+}
d337f35e 10908+
4bf69007
AM
10909+#define vx_info_rmap_pid(v, p) \
10910+ __vx_info_rmap_pid((v), (p), __func__, __FILE__, __LINE__)
10911+#define vx_rmap_pid(p) vx_info_rmap_pid(current_vx_info(), p)
10912+#define vx_rmap_tgid(p) vx_rmap_pid(p)
d337f35e 10913+
4bf69007
AM
10914+static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
10915+ const char *func, const char *file, int line)
10916+{
10917+ if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
10918+ vxfprintk(VXD_CBIT(cvirt, 2),
10919+ "vx_rmap_tgid: %p/%llx: %d -> %d",
10920+ vxi, (long long)vxi->vx_flags, pid,
10921+ (pid == 1) ? vxi->vx_initpid : pid,
10922+ func, file, line);
10923+ if ((pid == 1) && vxi->vx_initpid)
10924+ return vxi->vx_initpid;
10925+ if (pid == vxi->vx_initpid)
10926+ return ~0U;
10927+ }
10928+ return pid;
10929+}
d337f35e 10930+
4bf69007 10931+#endif
09a55596
AM
10932diff -NurpP --minimal linux-4.9.135/include/linux/vserver/sched.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/sched.h
10933--- linux-4.9.135/include/linux/vserver/sched.h 1970-01-01 00:00:00.000000000 +0000
10934+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/sched.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10935@@ -0,0 +1,23 @@
10936+#ifndef _VSERVER_SCHED_H
10937+#define _VSERVER_SCHED_H
d337f35e 10938+
d337f35e 10939+
d33d7b00 10940+#ifdef __KERNEL__
d337f35e 10941+
4bf69007 10942+struct timespec;
d337f35e 10943+
4bf69007 10944+void vx_vsi_uptime(struct timespec *, struct timespec *);
d337f35e
JR
10945+
10946+
4bf69007 10947+struct vx_info;
d337f35e 10948+
4bf69007 10949+void vx_update_load(struct vx_info *);
d337f35e 10950+
d337f35e 10951+
4bf69007
AM
10952+void vx_update_sched_param(struct _vx_sched *sched,
10953+ struct _vx_sched_pc *sched_pc);
d337f35e 10954+
4bf69007
AM
10955+#endif /* __KERNEL__ */
10956+#else /* _VSERVER_SCHED_H */
10957+#warning duplicate inclusion
10958+#endif /* _VSERVER_SCHED_H */
09a55596
AM
10959diff -NurpP --minimal linux-4.9.135/include/linux/vserver/sched_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/sched_cmd.h
10960--- linux-4.9.135/include/linux/vserver/sched_cmd.h 1970-01-01 00:00:00.000000000 +0000
10961+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/sched_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10962@@ -0,0 +1,11 @@
10963+#ifndef _VSERVER_SCHED_CMD_H
10964+#define _VSERVER_SCHED_CMD_H
2380c486 10965+
2380c486 10966+
4bf69007
AM
10967+#include <linux/compiler.h>
10968+#include <uapi/vserver/sched_cmd.h>
d337f35e 10969+
4bf69007
AM
10970+extern int vc_set_prio_bias(struct vx_info *, void __user *);
10971+extern int vc_get_prio_bias(struct vx_info *, void __user *);
d337f35e 10972+
4bf69007 10973+#endif /* _VSERVER_SCHED_CMD_H */
09a55596
AM
10974diff -NurpP --minimal linux-4.9.135/include/linux/vserver/sched_def.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/sched_def.h
10975--- linux-4.9.135/include/linux/vserver/sched_def.h 1970-01-01 00:00:00.000000000 +0000
10976+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/sched_def.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
10977@@ -0,0 +1,38 @@
10978+#ifndef _VSERVER_SCHED_DEF_H
10979+#define _VSERVER_SCHED_DEF_H
d33d7b00 10980+
4bf69007
AM
10981+#include <linux/spinlock.h>
10982+#include <linux/jiffies.h>
10983+#include <linux/cpumask.h>
10984+#include <asm/atomic.h>
10985+#include <asm/param.h>
d33d7b00 10986+
d337f35e 10987+
4bf69007 10988+/* context sub struct */
d337f35e 10989+
4bf69007
AM
10990+struct _vx_sched {
10991+ int prio_bias; /* bias offset for priority */
d337f35e 10992+
4bf69007
AM
10993+ cpumask_t update; /* CPUs which should update */
10994+};
d337f35e 10995+
4bf69007
AM
10996+struct _vx_sched_pc {
10997+ int prio_bias; /* bias offset for priority */
d337f35e 10998+
4bf69007
AM
10999+ uint64_t user_ticks; /* token tick events */
11000+ uint64_t sys_ticks; /* token tick events */
11001+ uint64_t hold_ticks; /* token ticks paused */
11002+};
d337f35e 11003+
d337f35e 11004+
4bf69007 11005+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 11006+
4bf69007
AM
11007+static inline void __dump_vx_sched(struct _vx_sched *sched)
11008+{
11009+ printk("\t_vx_sched:\n");
11010+ printk("\t priority = %4d\n", sched->prio_bias);
11011+}
d337f35e 11012+
4bf69007
AM
11013+#endif
11014+
11015+#endif /* _VSERVER_SCHED_DEF_H */
09a55596
AM
11016diff -NurpP --minimal linux-4.9.135/include/linux/vserver/signal.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/signal.h
11017--- linux-4.9.135/include/linux/vserver/signal.h 1970-01-01 00:00:00.000000000 +0000
11018+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/signal.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11019@@ -0,0 +1,14 @@
11020+#ifndef _VSERVER_SIGNAL_H
11021+#define _VSERVER_SIGNAL_H
d337f35e 11022+
d337f35e 11023+
d33d7b00 11024+#ifdef __KERNEL__
4bf69007
AM
11025+
11026+struct vx_info;
11027+
11028+int vx_info_kill(struct vx_info *, int, int);
d337f35e 11029+
d33d7b00 11030+#endif /* __KERNEL__ */
4bf69007
AM
11031+#else /* _VSERVER_SIGNAL_H */
11032+#warning duplicate inclusion
11033+#endif /* _VSERVER_SIGNAL_H */
09a55596
AM
11034diff -NurpP --minimal linux-4.9.135/include/linux/vserver/signal_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/signal_cmd.h
11035--- linux-4.9.135/include/linux/vserver/signal_cmd.h 1970-01-01 00:00:00.000000000 +0000
11036+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/signal_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11037@@ -0,0 +1,14 @@
11038+#ifndef _VSERVER_SIGNAL_CMD_H
11039+#define _VSERVER_SIGNAL_CMD_H
d337f35e 11040+
4bf69007 11041+#include <uapi/vserver/signal_cmd.h>
d337f35e 11042+
d337f35e 11043+
4bf69007
AM
11044+extern int vc_ctx_kill(struct vx_info *, void __user *);
11045+extern int vc_wait_exit(struct vx_info *, void __user *);
d337f35e
JR
11046+
11047+
4bf69007
AM
11048+extern int vc_get_pflags(uint32_t pid, void __user *);
11049+extern int vc_set_pflags(uint32_t pid, void __user *);
adc1caaa 11050+
4bf69007 11051+#endif /* _VSERVER_SIGNAL_CMD_H */
09a55596
AM
11052diff -NurpP --minimal linux-4.9.135/include/linux/vserver/space.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/space.h
11053--- linux-4.9.135/include/linux/vserver/space.h 1970-01-01 00:00:00.000000000 +0000
11054+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/space.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11055@@ -0,0 +1,12 @@
11056+#ifndef _VSERVER_SPACE_H
11057+#define _VSERVER_SPACE_H
d337f35e 11058+
4bf69007 11059+#include <linux/types.h>
d337f35e 11060+
4bf69007 11061+struct vx_info;
d337f35e 11062+
4bf69007 11063+int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index);
9f7054f1 11064+
4bf69007
AM
11065+#else /* _VSERVER_SPACE_H */
11066+#warning duplicate inclusion
11067+#endif /* _VSERVER_SPACE_H */
09a55596
AM
11068diff -NurpP --minimal linux-4.9.135/include/linux/vserver/space_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/space_cmd.h
11069--- linux-4.9.135/include/linux/vserver/space_cmd.h 1970-01-01 00:00:00.000000000 +0000
11070+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/space_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11071@@ -0,0 +1,13 @@
11072+#ifndef _VSERVER_SPACE_CMD_H
11073+#define _VSERVER_SPACE_CMD_H
9f7054f1 11074+
4bf69007 11075+#include <uapi/vserver/space_cmd.h>
d337f35e 11076+
d337f35e 11077+
4bf69007
AM
11078+extern int vc_enter_space_v1(struct vx_info *, void __user *);
11079+extern int vc_set_space_v1(struct vx_info *, void __user *);
11080+extern int vc_enter_space(struct vx_info *, void __user *);
11081+extern int vc_set_space(struct vx_info *, void __user *);
11082+extern int vc_get_space_mask(void __user *, int);
d337f35e 11083+
4bf69007 11084+#endif /* _VSERVER_SPACE_CMD_H */
09a55596
AM
11085diff -NurpP --minimal linux-4.9.135/include/linux/vserver/switch.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/switch.h
11086--- linux-4.9.135/include/linux/vserver/switch.h 1970-01-01 00:00:00.000000000 +0000
11087+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/switch.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11088@@ -0,0 +1,8 @@
11089+#ifndef _VSERVER_SWITCH_H
11090+#define _VSERVER_SWITCH_H
d337f35e 11091+
d337f35e 11092+
4bf69007
AM
11093+#include <linux/errno.h>
11094+#include <uapi/vserver/switch.h>
2380c486 11095+
4bf69007 11096+#endif /* _VSERVER_SWITCH_H */
09a55596
AM
11097diff -NurpP --minimal linux-4.9.135/include/linux/vserver/tag.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/tag.h
11098--- linux-4.9.135/include/linux/vserver/tag.h 1970-01-01 00:00:00.000000000 +0000
11099+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/tag.h 2018-10-20 04:58:14.000000000 +0000
a4a22af8 11100@@ -0,0 +1,160 @@
4bf69007
AM
11101+#ifndef _DX_TAG_H
11102+#define _DX_TAG_H
d337f35e 11103+
4bf69007 11104+#include <linux/types.h>
a4a22af8 11105+#include <linux/uidgid.h>
d337f35e 11106+
d337f35e 11107+
4bf69007 11108+#define DX_TAG(in) (IS_TAGGED(in))
9f7054f1 11109+
d337f35e 11110+
4bf69007
AM
11111+#ifdef CONFIG_TAG_NFSD
11112+#define DX_TAG_NFSD 1
11113+#else
11114+#define DX_TAG_NFSD 0
11115+#endif
2380c486 11116+
2380c486 11117+
4bf69007 11118+#ifdef CONFIG_TAGGING_NONE
d337f35e 11119+
4bf69007
AM
11120+#define MAX_UID 0xFFFFFFFF
11121+#define MAX_GID 0xFFFFFFFF
d337f35e 11122+
4bf69007 11123+#define INOTAG_TAG(cond, uid, gid, tag) (0)
d337f35e 11124+
4bf69007
AM
11125+#define TAGINO_UID(cond, uid, tag) (uid)
11126+#define TAGINO_GID(cond, gid, tag) (gid)
d337f35e 11127+
4bf69007 11128+#endif
d337f35e 11129+
d337f35e 11130+
4bf69007 11131+#ifdef CONFIG_TAGGING_GID16
d337f35e 11132+
4bf69007
AM
11133+#define MAX_UID 0xFFFFFFFF
11134+#define MAX_GID 0x0000FFFF
d337f35e 11135+
4bf69007
AM
11136+#define INOTAG_TAG(cond, uid, gid, tag) \
11137+ ((cond) ? (((gid) >> 16) & 0xFFFF) : 0)
d337f35e 11138+
4bf69007
AM
11139+#define TAGINO_UID(cond, uid, tag) (uid)
11140+#define TAGINO_GID(cond, gid, tag) \
11141+ ((cond) ? (((gid) & 0xFFFF) | ((tag) << 16)) : (gid))
d337f35e 11142+
4bf69007 11143+#endif
d337f35e 11144+
d337f35e 11145+
4bf69007 11146+#ifdef CONFIG_TAGGING_ID24
d337f35e 11147+
4bf69007
AM
11148+#define MAX_UID 0x00FFFFFF
11149+#define MAX_GID 0x00FFFFFF
d337f35e 11150+
4bf69007
AM
11151+#define INOTAG_TAG(cond, uid, gid, tag) \
11152+ ((cond) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
d337f35e 11153+
4bf69007
AM
11154+#define TAGINO_UID(cond, uid, tag) \
11155+ ((cond) ? (((uid) & 0xFFFFFF) | (((tag) & 0xFF00) << 16)) : (uid))
11156+#define TAGINO_GID(cond, gid, tag) \
11157+ ((cond) ? (((gid) & 0xFFFFFF) | (((tag) & 0x00FF) << 24)) : (gid))
d337f35e 11158+
4bf69007 11159+#endif
d337f35e 11160+
d337f35e 11161+
4bf69007 11162+#ifdef CONFIG_TAGGING_UID16
d337f35e 11163+
4bf69007
AM
11164+#define MAX_UID 0x0000FFFF
11165+#define MAX_GID 0xFFFFFFFF
3bac966d 11166+
4bf69007
AM
11167+#define INOTAG_TAG(cond, uid, gid, tag) \
11168+ ((cond) ? (((uid) >> 16) & 0xFFFF) : 0)
d337f35e 11169+
4bf69007
AM
11170+#define TAGINO_UID(cond, uid, tag) \
11171+ ((cond) ? (((uid) & 0xFFFF) | ((tag) << 16)) : (uid))
11172+#define TAGINO_GID(cond, gid, tag) (gid)
d337f35e 11173+
d33d7b00 11174+#endif
d337f35e
JR
11175+
11176+
4bf69007 11177+#ifdef CONFIG_TAGGING_INTERN
d337f35e 11178+
4bf69007
AM
11179+#define MAX_UID 0xFFFFFFFF
11180+#define MAX_GID 0xFFFFFFFF
d337f35e 11181+
4bf69007
AM
11182+#define INOTAG_TAG(cond, uid, gid, tag) \
11183+ ((cond) ? (tag) : 0)
d337f35e 11184+
4bf69007
AM
11185+#define TAGINO_UID(cond, uid, tag) (uid)
11186+#define TAGINO_GID(cond, gid, tag) (gid)
d337f35e 11187+
4bf69007 11188+#endif
d337f35e 11189+
d337f35e 11190+
4bf69007
AM
11191+#ifndef CONFIG_TAGGING_NONE
11192+#define dx_current_fstag(sb) \
11193+ ((sb)->s_flags & MS_TAGGED ? dx_current_tag() : 0)
11194+#else
11195+#define dx_current_fstag(sb) (0)
11196+#endif
d337f35e 11197+
4bf69007
AM
11198+#ifndef CONFIG_TAGGING_INTERN
11199+#define TAGINO_TAG(cond, tag) (0)
11200+#else
11201+#define TAGINO_TAG(cond, tag) ((cond) ? (tag) : 0)
11202+#endif
d337f35e 11203+
a4a22af8
AM
11204+#define TAGINO_KUID(cond, kuid, ktag) \
11205+ KUIDT_INIT(TAGINO_UID(cond, __kuid_val(kuid), __ktag_val(ktag)))
11206+#define TAGINO_KGID(cond, kgid, ktag) \
11207+ KGIDT_INIT(TAGINO_GID(cond, __kgid_val(kgid), __ktag_val(ktag)))
11208+#define TAGINO_KTAG(cond, ktag) \
11209+ KTAGT_INIT(TAGINO_TAG(cond, __ktag_val(ktag)))
11210+
11211+
4bf69007
AM
11212+#define INOTAG_UID(cond, uid, gid) \
11213+ ((cond) ? ((uid) & MAX_UID) : (uid))
11214+#define INOTAG_GID(cond, uid, gid) \
11215+ ((cond) ? ((gid) & MAX_GID) : (gid))
d337f35e 11216+
a4a22af8
AM
11217+#define INOTAG_KUID(cond, kuid, kgid) \
11218+ KUIDT_INIT(INOTAG_UID(cond, __kuid_val(kuid), __kgid_val(kgid)))
11219+#define INOTAG_KGID(cond, kuid, kgid) \
11220+ KGIDT_INIT(INOTAG_GID(cond, __kuid_val(kuid), __kgid_val(kgid)))
11221+#define INOTAG_KTAG(cond, kuid, kgid, ktag) \
11222+ KTAGT_INIT(INOTAG_TAG(cond, \
11223+ __kuid_val(kuid), __kgid_val(kgid), __ktag_val(ktag)))
11224+
d337f35e 11225+
4bf69007 11226+static inline uid_t dx_map_uid(uid_t uid)
3bac966d 11227+{
4bf69007
AM
11228+ if ((uid > MAX_UID) && (uid != -1))
11229+ uid = -2;
11230+ return (uid & MAX_UID);
d33d7b00 11231+}
d337f35e 11232+
4bf69007
AM
11233+static inline gid_t dx_map_gid(gid_t gid)
11234+{
11235+ if ((gid > MAX_GID) && (gid != -1))
11236+ gid = -2;
11237+ return (gid & MAX_GID);
11238+}
d337f35e 11239+
4bf69007
AM
11240+struct peer_tag {
11241+ int32_t xid;
11242+ int32_t nid;
d33d7b00 11243+};
d337f35e 11244+
4bf69007 11245+#define dx_notagcheck(sb) ((sb) && ((sb)->s_flags & MS_NOTAGCHECK))
2380c486 11246+
61333608 11247+int dx_parse_tag(char *string, vtag_t *tag, int remove, int *mnt_flags,
4bf69007 11248+ unsigned long *flags);
d337f35e 11249+
4bf69007 11250+#ifdef CONFIG_PROPAGATE
d337f35e 11251+
4bf69007 11252+void __dx_propagate_tag(struct nameidata *nd, struct inode *inode);
d337f35e 11253+
4bf69007 11254+#define dx_propagate_tag(n, i) __dx_propagate_tag(n, i)
d337f35e 11255+
4bf69007
AM
11256+#else
11257+#define dx_propagate_tag(n, i) do { } while (0)
11258+#endif
d337f35e 11259+
4bf69007 11260+#endif /* _DX_TAG_H */
09a55596
AM
11261diff -NurpP --minimal linux-4.9.135/include/linux/vserver/tag_cmd.h linux-4.9.135-vs2.3.9.8/include/linux/vserver/tag_cmd.h
11262--- linux-4.9.135/include/linux/vserver/tag_cmd.h 1970-01-01 00:00:00.000000000 +0000
11263+++ linux-4.9.135-vs2.3.9.8/include/linux/vserver/tag_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11264@@ -0,0 +1,10 @@
11265+#ifndef _VSERVER_TAG_CMD_H
11266+#define _VSERVER_TAG_CMD_H
d337f35e 11267+
4bf69007 11268+#include <uapi/vserver/tag_cmd.h>
d337f35e 11269+
4bf69007 11270+extern int vc_task_tag(uint32_t);
3bac966d 11271+
4bf69007 11272+extern int vc_tag_migrate(uint32_t);
3bac966d 11273+
4bf69007 11274+#endif /* _VSERVER_TAG_CMD_H */
09a55596
AM
11275diff -NurpP --minimal linux-4.9.135/include/net/addrconf.h linux-4.9.135-vs2.3.9.8/include/net/addrconf.h
11276--- linux-4.9.135/include/net/addrconf.h 2018-10-20 10:39:20.000000000 +0000
11277+++ linux-4.9.135-vs2.3.9.8/include/net/addrconf.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 11278@@ -85,7 +85,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(str
c2e5f7c8
JR
11279
11280 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
11281 const struct in6_addr *daddr, unsigned int srcprefs,
11282- struct in6_addr *saddr);
11283+ struct in6_addr *saddr, struct nx_info *nxi);
11284 int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
bb20add7 11285 u32 banned_flags);
c2e5f7c8 11286 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
09a55596
AM
11287diff -NurpP --minimal linux-4.9.135/include/net/af_unix.h linux-4.9.135-vs2.3.9.8/include/net/af_unix.h
11288--- linux-4.9.135/include/net/af_unix.h 2016-12-11 19:17:54.000000000 +0000
11289+++ linux-4.9.135-vs2.3.9.8/include/net/af_unix.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11290@@ -4,6 +4,7 @@
11291 #include <linux/socket.h>
11292 #include <linux/un.h>
11293 #include <linux/mutex.h>
cc23e853 11294+// #include <linux/vs_base.h>
4bf69007
AM
11295 #include <net/sock.h>
11296
cc23e853 11297 void unix_inflight(struct user_struct *user, struct file *fp);
09a55596
AM
11298diff -NurpP --minimal linux-4.9.135/include/net/inet_timewait_sock.h linux-4.9.135-vs2.3.9.8/include/net/inet_timewait_sock.h
11299--- linux-4.9.135/include/net/inet_timewait_sock.h 2018-10-20 10:39:20.000000000 +0000
11300+++ linux-4.9.135-vs2.3.9.8/include/net/inet_timewait_sock.h 2018-10-20 04:58:14.000000000 +0000
11301@@ -72,6 +72,10 @@ struct inet_timewait_sock {
b00e13aa 11302 #define tw_num __tw_common.skc_num
cc23e853
AM
11303 #define tw_cookie __tw_common.skc_cookie
11304 #define tw_dr __tw_common.skc_tw_dr
4bf69007
AM
11305+#define tw_xid __tw_common.skc_xid
11306+#define tw_vx_info __tw_common.skc_vx_info
11307+#define tw_nid __tw_common.skc_nid
11308+#define tw_nx_info __tw_common.skc_nx_info
b00e13aa 11309
4bf69007
AM
11310 int tw_timeout;
11311 volatile unsigned char tw_substate;
09a55596
AM
11312diff -NurpP --minimal linux-4.9.135/include/net/ip6_route.h linux-4.9.135-vs2.3.9.8/include/net/ip6_route.h
11313--- linux-4.9.135/include/net/ip6_route.h 2018-10-20 10:39:20.000000000 +0000
11314+++ linux-4.9.135-vs2.3.9.8/include/net/ip6_route.h 2018-10-20 04:58:14.000000000 +0000
cc23e853
AM
11315@@ -26,6 +26,7 @@ struct route_info {
11316 #include <linux/ip.h>
11317 #include <linux/ipv6.h>
11318 #include <linux/route.h>
11319+#include <linux/vs_inet6.h>
11320
11321 #define RT6_LOOKUP_F_IFACE 0x00000001
11322 #define RT6_LOOKUP_F_REACHABLE 0x00000002
11323@@ -98,17 +99,19 @@ int ip6_del_rt(struct rt6_info *);
11324 static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
11325 const struct in6_addr *daddr,
11326 unsigned int prefs,
11327- struct in6_addr *saddr)
11328+ struct in6_addr *saddr,
11329+ struct nx_info *nxi)
11330 {
11331 struct inet6_dev *idev =
11332 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
11333 int err = 0;
11334
11335- if (rt && rt->rt6i_prefsrc.plen)
11336+ if (rt && rt->rt6i_prefsrc.plen && (!nxi ||
11337+ v6_addr_in_nx_info(nxi, &rt->rt6i_prefsrc.addr, NXA_TYPE_ADDR)))
11338 *saddr = rt->rt6i_prefsrc.addr;
11339 else
11340 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
11341- daddr, prefs, saddr);
11342+ daddr, prefs, saddr, nxi);
11343
11344 return err;
11345 }
09a55596
AM
11346diff -NurpP --minimal linux-4.9.135/include/net/route.h linux-4.9.135-vs2.3.9.8/include/net/route.h
11347--- linux-4.9.135/include/net/route.h 2018-10-20 10:39:20.000000000 +0000
11348+++ linux-4.9.135-vs2.3.9.8/include/net/route.h 2018-10-20 04:58:14.000000000 +0000
11349@@ -226,6 +226,9 @@ static inline void ip_rt_put(struct rtab
b00e13aa 11350 dst_release(&rt->dst);
4bf69007
AM
11351 }
11352
11353+#include <linux/vs_base.h>
11354+#include <linux/vs_inet.h>
d337f35e 11355+
4bf69007
AM
11356 #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3)
11357
11358 extern const __u8 ip_tos2prio[16];
09a55596 11359@@ -273,6 +276,9 @@ static inline void ip_route_connect_init
4bf69007
AM
11360 protocol, flow_flags, dst, src, dport, sport);
11361 }
11362
11363+extern struct rtable *ip_v4_find_src(struct net *net, struct nx_info *,
11364+ struct flowi4 *);
d337f35e 11365+
4bf69007
AM
11366 static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
11367 __be32 dst, __be32 src, u32 tos,
11368 int oif, u8 protocol,
09a55596 11369@@ -281,11 +287,25 @@ static inline struct rtable *ip_route_co
4bf69007
AM
11370 {
11371 struct net *net = sock_net(sk);
11372 struct rtable *rt;
11373+ struct nx_info *nx_info = current_nx_info();
11374
11375 ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
f15949f2 11376 sport, dport, sk);
4bf69007
AM
11377
11378- if (!dst || !src) {
11379+ if (sk)
11380+ nx_info = sk->sk_nx_info;
d337f35e 11381+
4bf69007
AM
11382+ vxdprintk(VXD_CBIT(net, 4),
11383+ "ip_route_connect(%p) %p,%p;%lx",
11384+ sk, nx_info, sk->sk_socket,
11385+ (sk->sk_socket?sk->sk_socket->flags:0));
d337f35e 11386+
4bf69007
AM
11387+ rt = ip_v4_find_src(net, nx_info, fl4);
11388+ if (IS_ERR(rt))
11389+ return rt;
11390+ ip_rt_put(rt);
d337f35e 11391+
4bf69007
AM
11392+ if (!fl4->daddr || !fl4->saddr) {
11393 rt = __ip_route_output_key(net, fl4);
11394 if (IS_ERR(rt))
11395 return rt;
09a55596
AM
11396diff -NurpP --minimal linux-4.9.135/include/net/sock.h linux-4.9.135-vs2.3.9.8/include/net/sock.h
11397--- linux-4.9.135/include/net/sock.h 2018-10-20 10:39:20.000000000 +0000
11398+++ linux-4.9.135-vs2.3.9.8/include/net/sock.h 2018-10-20 04:58:14.000000000 +0000
cc23e853
AM
11399@@ -187,6 +187,10 @@ struct sock_common {
11400 struct in6_addr skc_v6_daddr;
11401 struct in6_addr skc_v6_rcv_saddr;
4bf69007 11402 #endif
61333608 11403+ vxid_t skc_xid;
4bf69007 11404+ struct vx_info *skc_vx_info;
61333608 11405+ vnid_t skc_nid;
4bf69007 11406+ struct nx_info *skc_nx_info;
c2e5f7c8 11407
cc23e853
AM
11408 atomic64_t skc_cookie;
11409
11410@@ -336,8 +340,12 @@ struct sock {
4bf69007
AM
11411 #define sk_prot __sk_common.skc_prot
11412 #define sk_net __sk_common.skc_net
c2e5f7c8
JR
11413 #define sk_v6_daddr __sk_common.skc_v6_daddr
11414-#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
11415+#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
cc23e853 11416 #define sk_cookie __sk_common.skc_cookie
4bf69007
AM
11417+#define sk_xid __sk_common.skc_xid
11418+#define sk_vx_info __sk_common.skc_vx_info
11419+#define sk_nid __sk_common.skc_nid
11420+#define sk_nx_info __sk_common.skc_nx_info
cc23e853
AM
11421 #define sk_incoming_cpu __sk_common.skc_incoming_cpu
11422 #define sk_flags __sk_common.skc_flags
11423 #define sk_rxhash __sk_common.skc_rxhash
09a55596
AM
11424diff -NurpP --minimal linux-4.9.135/include/uapi/Kbuild linux-4.9.135-vs2.3.9.8/include/uapi/Kbuild
11425--- linux-4.9.135/include/uapi/Kbuild 2016-12-11 19:17:54.000000000 +0000
11426+++ linux-4.9.135-vs2.3.9.8/include/uapi/Kbuild 2018-10-20 04:58:14.000000000 +0000
bb20add7 11427@@ -13,3 +13,4 @@ header-y += drm/
4bf69007
AM
11428 header-y += xen/
11429 header-y += scsi/
bb20add7 11430 header-y += misc/
4bf69007 11431+header-y += vserver/
09a55596
AM
11432diff -NurpP --minimal linux-4.9.135/include/uapi/linux/btrfs_tree.h linux-4.9.135-vs2.3.9.8/include/uapi/linux/btrfs_tree.h
11433--- linux-4.9.135/include/uapi/linux/btrfs_tree.h 2018-10-20 10:39:20.000000000 +0000
11434+++ linux-4.9.135-vs2.3.9.8/include/uapi/linux/btrfs_tree.h 2018-10-20 04:58:14.000000000 +0000
11435@@ -564,11 +564,14 @@ struct btrfs_inode_item {
cc23e853
AM
11436 /* modification sequence number for NFS */
11437 __le64 sequence;
11438
11439+ __le16 tag;
11440 /*
11441 * a little future expansion, for more than this we can
11442 * just grow the inode item and version it
11443 */
11444- __le64 reserved[4];
11445+ __le16 reserved16;
11446+ __le32 reserved32;
11447+ __le64 reserved[3];
11448 struct btrfs_timespec atime;
11449 struct btrfs_timespec ctime;
11450 struct btrfs_timespec mtime;
09a55596
AM
11451diff -NurpP --minimal linux-4.9.135/include/uapi/linux/capability.h linux-4.9.135-vs2.3.9.8/include/uapi/linux/capability.h
11452--- linux-4.9.135/include/uapi/linux/capability.h 2016-12-11 19:17:54.000000000 +0000
11453+++ linux-4.9.135-vs2.3.9.8/include/uapi/linux/capability.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 11454@@ -257,6 +257,7 @@ struct vfs_cap_data {
4bf69007
AM
11455 arbitrary SCSI commands */
11456 /* Allow setting encryption key on loopback filesystem */
11457 /* Allow setting zone reclaim policy */
11458+/* Allow the selection of a security context */
11459
11460 #define CAP_SYS_ADMIN 21
11461
cc23e853 11462@@ -352,7 +353,12 @@ struct vfs_cap_data {
4bf69007 11463
bb20add7 11464 #define CAP_LAST_CAP CAP_AUDIT_READ
4bf69007
AM
11465
11466-#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
11467+/* Allow context manipulations */
11468+/* Allow changing context info on files */
d337f35e 11469+
4bf69007 11470+#define CAP_CONTEXT 63
d337f35e 11471+
4bf69007
AM
11472+#define cap_valid(x) ((x) >= 0 && ((x) <= CAP_LAST_CAP || (x) == CAP_CONTEXT))
11473
11474 /*
11475 * Bit location of each capability (used by user-space library and kernel)
09a55596
AM
11476diff -NurpP --minimal linux-4.9.135/include/uapi/linux/fs.h linux-4.9.135-vs2.3.9.8/include/uapi/linux/fs.h
11477--- linux-4.9.135/include/uapi/linux/fs.h 2018-10-20 10:39:20.000000000 +0000
11478+++ linux-4.9.135-vs2.3.9.8/include/uapi/linux/fs.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 11479@@ -130,6 +130,9 @@ struct inodes_stat_t {
4bf69007
AM
11480 #define MS_I_VERSION (1<<23) /* Update inode I_version field */
11481 #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
cc23e853 11482 #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
b00e13aa
AM
11483+#define MS_TAGGED (1<<8) /* use generic inode tagging */
11484+#define MS_NOTAGCHECK (1<<9) /* don't check tags */
cc23e853 11485+#define MS_TAGID (1<<26) /* use specific tag for this mount */
b00e13aa
AM
11486
11487 /* These sb flags are internal to the kernel */
cc23e853
AM
11488 #define MS_SUBMOUNT (1<<26)
11489@@ -313,13 +316,16 @@ struct fscrypt_policy {
11490 #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
11491 #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
4bf69007
AM
11492 #define FS_NOCOW_FL 0x00800000 /* Do not cow file */
11493+#define FS_IXUNLINK_FL 0x08000000 /* Immutable invert on unlink */
cc23e853
AM
11494 #define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
11495 #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
4bf69007
AM
11496 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
11497
11498-#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
11499-#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
11500+#define FS_BARRIER_FL 0x04000000 /* Barrier for chroot() */
11501+#define FS_COW_FL 0x20000000 /* Copy on Write marker */
11502
11503+#define FS_FL_USER_VISIBLE 0x0103DFFF /* User visible flags */
11504+#define FS_FL_USER_MODIFIABLE 0x010380FF /* User modifiable flags */
11505
11506 #define SYNC_FILE_RANGE_WAIT_BEFORE 1
11507 #define SYNC_FILE_RANGE_WRITE 2
09a55596
AM
11508diff -NurpP --minimal linux-4.9.135/include/uapi/linux/gfs2_ondisk.h linux-4.9.135-vs2.3.9.8/include/uapi/linux/gfs2_ondisk.h
11509--- linux-4.9.135/include/uapi/linux/gfs2_ondisk.h 2016-12-11 19:17:54.000000000 +0000
11510+++ linux-4.9.135-vs2.3.9.8/include/uapi/linux/gfs2_ondisk.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11511@@ -225,6 +225,9 @@ enum {
11512 gfs2fl_Sync = 8,
11513 gfs2fl_System = 9,
11514 gfs2fl_TopLevel = 10,
11515+ gfs2fl_IXUnlink = 16,
11516+ gfs2fl_Barrier = 17,
11517+ gfs2fl_Cow = 18,
11518 gfs2fl_TruncInProg = 29,
11519 gfs2fl_InheritDirectio = 30,
11520 gfs2fl_InheritJdata = 31,
11521@@ -242,6 +245,9 @@ enum {
11522 #define GFS2_DIF_SYNC 0x00000100
11523 #define GFS2_DIF_SYSTEM 0x00000200 /* New in gfs2 */
11524 #define GFS2_DIF_TOPDIR 0x00000400 /* New in gfs2 */
11525+#define GFS2_DIF_IXUNLINK 0x00010000
11526+#define GFS2_DIF_BARRIER 0x00020000
11527+#define GFS2_DIF_COW 0x00040000
11528 #define GFS2_DIF_TRUNC_IN_PROG 0x20000000 /* New in gfs2 */
11529 #define GFS2_DIF_INHERIT_DIRECTIO 0x40000000 /* only in gfs1 */
11530 #define GFS2_DIF_INHERIT_JDATA 0x80000000
09a55596
AM
11531diff -NurpP --minimal linux-4.9.135/include/uapi/linux/if_tun.h linux-4.9.135-vs2.3.9.8/include/uapi/linux/if_tun.h
11532--- linux-4.9.135/include/uapi/linux/if_tun.h 2016-12-11 19:17:54.000000000 +0000
11533+++ linux-4.9.135-vs2.3.9.8/include/uapi/linux/if_tun.h 2018-10-20 04:58:14.000000000 +0000
cc23e853
AM
11534@@ -56,6 +56,7 @@
11535 */
11536 #define TUNSETVNETBE _IOW('T', 222, int)
11537 #define TUNGETVNETBE _IOR('T', 223, int)
11538+#define TUNSETNID _IOW('T', 224, int)
4bf69007
AM
11539
11540 /* TUNSETIFF ifr flags */
11541 #define IFF_TUN 0x0001
09a55596
AM
11542diff -NurpP --minimal linux-4.9.135/include/uapi/linux/major.h linux-4.9.135-vs2.3.9.8/include/uapi/linux/major.h
11543--- linux-4.9.135/include/uapi/linux/major.h 2016-12-11 19:17:54.000000000 +0000
11544+++ linux-4.9.135-vs2.3.9.8/include/uapi/linux/major.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11545@@ -15,6 +15,7 @@
11546 #define HD_MAJOR IDE0_MAJOR
11547 #define PTY_SLAVE_MAJOR 3
11548 #define TTY_MAJOR 4
11549+#define VROOT_MAJOR 4
11550 #define TTYAUX_MAJOR 5
11551 #define LP_MAJOR 6
11552 #define VCS_MAJOR 7
09a55596
AM
11553diff -NurpP --minimal linux-4.9.135/include/uapi/linux/nfs_mount.h linux-4.9.135-vs2.3.9.8/include/uapi/linux/nfs_mount.h
11554--- linux-4.9.135/include/uapi/linux/nfs_mount.h 2016-12-11 19:17:54.000000000 +0000
11555+++ linux-4.9.135-vs2.3.9.8/include/uapi/linux/nfs_mount.h 2018-10-20 04:58:14.000000000 +0000
4bf69007 11556@@ -63,7 +63,8 @@ struct nfs_mount_data {
c2e5f7c8 11557 #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 non-text parsed mount data only */
4bf69007
AM
11558 #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */
11559 #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
11560-#define NFS_MOUNT_FLAGMASK 0xFFFF
11561+#define NFS_MOUNT_TAGGED 0x10000 /* context tagging */
11562+#define NFS_MOUNT_FLAGMASK 0x1FFFF
11563
11564 /* The following are for internal use only */
11565 #define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
09a55596
AM
11566diff -NurpP --minimal linux-4.9.135/include/uapi/linux/reboot.h linux-4.9.135-vs2.3.9.8/include/uapi/linux/reboot.h
11567--- linux-4.9.135/include/uapi/linux/reboot.h 2016-12-11 19:17:54.000000000 +0000
11568+++ linux-4.9.135-vs2.3.9.8/include/uapi/linux/reboot.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11569@@ -33,7 +33,7 @@
11570 #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
11571 #define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2
11572 #define LINUX_REBOOT_CMD_KEXEC 0x45584543
11573-
11574+#define LINUX_REBOOT_CMD_OOM 0xDEADBEEF
11575
11576
11577 #endif /* _UAPI_LINUX_REBOOT_H */
09a55596
AM
11578diff -NurpP --minimal linux-4.9.135/include/uapi/linux/sysctl.h linux-4.9.135-vs2.3.9.8/include/uapi/linux/sysctl.h
11579--- linux-4.9.135/include/uapi/linux/sysctl.h 2016-12-11 19:17:54.000000000 +0000
11580+++ linux-4.9.135-vs2.3.9.8/include/uapi/linux/sysctl.h 2018-10-20 04:58:14.000000000 +0000
cc23e853 11581@@ -58,6 +58,7 @@ enum
4bf69007
AM
11582 CTL_ABI=9, /* Binary emulation */
11583 CTL_CPU=10, /* CPU stuff (speed scaling, etc) */
11584 CTL_ARLAN=254, /* arlan wireless driver */
11585+ CTL_VSERVER=4242, /* Linux-VServer debug */
11586 CTL_S390DBF=5677, /* s390 debug */
11587 CTL_SUNRPC=7249, /* sunrpc debug */
11588 CTL_PM=9899, /* frv power management */
cc23e853 11589@@ -92,6 +93,7 @@ enum
4bf69007
AM
11590
11591 KERN_PANIC=15, /* int: panic timeout */
11592 KERN_REALROOTDEV=16, /* real root device to mount after initrd */
11593+ KERN_VSHELPER=17, /* string: path to vshelper policy agent */
11594
11595 KERN_SPARC_REBOOT=21, /* reboot command on Sparc */
11596 KERN_CTLALTDEL=22, /* int: allow ctl-alt-del to reboot */
09a55596
AM
11597diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/Kbuild linux-4.9.135-vs2.3.9.8/include/uapi/vserver/Kbuild
11598--- linux-4.9.135/include/uapi/vserver/Kbuild 1970-01-01 00:00:00.000000000 +0000
11599+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/Kbuild 2018-10-20 04:58:14.000000000 +0000
4bf69007 11600@@ -0,0 +1,9 @@
d337f35e 11601+
4bf69007
AM
11602+header-y += context_cmd.h network_cmd.h space_cmd.h \
11603+ cacct_cmd.h cvirt_cmd.h limit_cmd.h dlimit_cmd.h \
11604+ inode_cmd.h tag_cmd.h sched_cmd.h signal_cmd.h \
11605+ debug_cmd.h device_cmd.h
2380c486 11606+
4bf69007
AM
11607+header-y += switch.h context.h network.h monitor.h \
11608+ limit.h inode.h device.h
2380c486 11609+
09a55596
AM
11610diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/cacct_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/cacct_cmd.h
11611--- linux-4.9.135/include/uapi/vserver/cacct_cmd.h 1970-01-01 00:00:00.000000000 +0000
11612+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/cacct_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11613@@ -0,0 +1,15 @@
11614+#ifndef _UAPI_VS_CACCT_CMD_H
11615+#define _UAPI_VS_CACCT_CMD_H
d337f35e
JR
11616+
11617+
4bf69007 11618+/* virtual host info name commands */
d337f35e 11619+
4bf69007 11620+#define VCMD_sock_stat VC_CMD(VSTAT, 5, 0)
d337f35e 11621+
4bf69007
AM
11622+struct vcmd_sock_stat_v0 {
11623+ uint32_t field;
11624+ uint32_t count[3];
11625+ uint64_t total[3];
11626+};
d337f35e 11627+
4bf69007 11628+#endif /* _UAPI_VS_CACCT_CMD_H */
09a55596
AM
11629diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/context.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/context.h
11630--- linux-4.9.135/include/uapi/vserver/context.h 1970-01-01 00:00:00.000000000 +0000
11631+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/context.h 2018-10-20 04:58:14.000000000 +0000
b00e13aa 11632@@ -0,0 +1,81 @@
4bf69007
AM
11633+#ifndef _UAPI_VS_CONTEXT_H
11634+#define _UAPI_VS_CONTEXT_H
d337f35e 11635+
4bf69007
AM
11636+#include <linux/types.h>
11637+#include <linux/capability.h>
d337f35e
JR
11638+
11639+
4bf69007 11640+/* context flags */
d337f35e 11641+
4bf69007
AM
11642+#define VXF_INFO_SCHED 0x00000002
11643+#define VXF_INFO_NPROC 0x00000004
11644+#define VXF_INFO_PRIVATE 0x00000008
d337f35e 11645+
4bf69007
AM
11646+#define VXF_INFO_INIT 0x00000010
11647+#define VXF_INFO_HIDE 0x00000020
11648+#define VXF_INFO_ULIMIT 0x00000040
11649+#define VXF_INFO_NSPACE 0x00000080
d337f35e 11650+
4bf69007
AM
11651+#define VXF_SCHED_HARD 0x00000100
11652+#define VXF_SCHED_PRIO 0x00000200
11653+#define VXF_SCHED_PAUSE 0x00000400
2380c486 11654+
4bf69007
AM
11655+#define VXF_VIRT_MEM 0x00010000
11656+#define VXF_VIRT_UPTIME 0x00020000
11657+#define VXF_VIRT_CPU 0x00040000
11658+#define VXF_VIRT_LOAD 0x00080000
11659+#define VXF_VIRT_TIME 0x00100000
d337f35e 11660+
4bf69007
AM
11661+#define VXF_HIDE_MOUNT 0x01000000
11662+/* was VXF_HIDE_NETIF 0x02000000 */
11663+#define VXF_HIDE_VINFO 0x04000000
d337f35e 11664+
4bf69007
AM
11665+#define VXF_STATE_SETUP (1ULL << 32)
11666+#define VXF_STATE_INIT (1ULL << 33)
11667+#define VXF_STATE_ADMIN (1ULL << 34)
d337f35e 11668+
4bf69007
AM
11669+#define VXF_SC_HELPER (1ULL << 36)
11670+#define VXF_REBOOT_KILL (1ULL << 37)
11671+#define VXF_PERSISTENT (1ULL << 38)
d337f35e 11672+
4bf69007
AM
11673+#define VXF_FORK_RSS (1ULL << 48)
11674+#define VXF_PROLIFIC (1ULL << 49)
d337f35e 11675+
4bf69007 11676+#define VXF_IGNEG_NICE (1ULL << 52)
d337f35e 11677+
4bf69007 11678+#define VXF_ONE_TIME (0x0007ULL << 32)
d337f35e 11679+
4bf69007 11680+#define VXF_INIT_SET (VXF_STATE_SETUP | VXF_STATE_INIT | VXF_STATE_ADMIN)
d337f35e
JR
11681+
11682+
4bf69007 11683+/* context migration */
d337f35e 11684+
4bf69007
AM
11685+#define VXM_SET_INIT 0x00000001
11686+#define VXM_SET_REAPER 0x00000002
d337f35e 11687+
4bf69007 11688+/* context caps */
d337f35e 11689+
4bf69007
AM
11690+#define VXC_SET_UTSNAME 0x00000001
11691+#define VXC_SET_RLIMIT 0x00000002
11692+#define VXC_FS_SECURITY 0x00000004
11693+#define VXC_FS_TRUSTED 0x00000008
11694+#define VXC_TIOCSTI 0x00000010
2380c486 11695+
4bf69007
AM
11696+/* was VXC_RAW_ICMP 0x00000100 */
11697+#define VXC_SYSLOG 0x00001000
11698+#define VXC_OOM_ADJUST 0x00002000
11699+#define VXC_AUDIT_CONTROL 0x00004000
d337f35e 11700+
c2e5f7c8
JR
11701+#define VXC_SECURE_MOUNT 0x00010000
11702+/* #define VXC_SECURE_REMOUNT 0x00020000 */
4bf69007 11703+#define VXC_BINARY_MOUNT 0x00040000
b00e13aa 11704+#define VXC_DEV_MOUNT 0x00080000
d337f35e 11705+
4bf69007
AM
11706+#define VXC_QUOTA_CTL 0x00100000
11707+#define VXC_ADMIN_MAPPER 0x00200000
11708+#define VXC_ADMIN_CLOOP 0x00400000
d337f35e 11709+
4bf69007
AM
11710+#define VXC_KTHREAD 0x01000000
11711+#define VXC_NAMESPACE 0x02000000
d337f35e 11712+
4bf69007 11713+#endif /* _UAPI_VS_CONTEXT_H */
09a55596
AM
11714diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/context_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/context_cmd.h
11715--- linux-4.9.135/include/uapi/vserver/context_cmd.h 1970-01-01 00:00:00.000000000 +0000
11716+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/context_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11717@@ -0,0 +1,115 @@
11718+#ifndef _UAPI_VS_CONTEXT_CMD_H
11719+#define _UAPI_VS_CONTEXT_CMD_H
d33d7b00
AM
11720+
11721+
4bf69007 11722+/* vinfo commands */
3bac966d 11723+
4bf69007 11724+#define VCMD_task_xid VC_CMD(VINFO, 1, 0)
3bac966d 11725+
3bac966d 11726+
4bf69007 11727+#define VCMD_vx_info VC_CMD(VINFO, 5, 0)
3bac966d 11728+
4bf69007
AM
11729+struct vcmd_vx_info_v0 {
11730+ uint32_t xid;
11731+ uint32_t initpid;
11732+ /* more to come */
11733+};
3bac966d
AM
11734+
11735+
4bf69007 11736+#define VCMD_ctx_stat VC_CMD(VSTAT, 0, 0)
3bac966d 11737+
4bf69007
AM
11738+struct vcmd_ctx_stat_v0 {
11739+ uint32_t usecnt;
11740+ uint32_t tasks;
11741+ /* more to come */
11742+};
3bac966d 11743+
3bac966d 11744+
4bf69007 11745+/* context commands */
3bac966d 11746+
4bf69007
AM
11747+#define VCMD_ctx_create_v0 VC_CMD(VPROC, 1, 0)
11748+#define VCMD_ctx_create VC_CMD(VPROC, 1, 1)
3bac966d 11749+
4bf69007
AM
11750+struct vcmd_ctx_create {
11751+ uint64_t flagword;
11752+};
3bac966d 11753+
4bf69007
AM
11754+#define VCMD_ctx_migrate_v0 VC_CMD(PROCMIG, 1, 0)
11755+#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 1)
3bac966d 11756+
4bf69007
AM
11757+struct vcmd_ctx_migrate {
11758+ uint64_t flagword;
11759+};
3bac966d 11760+
d33d7b00 11761+
d33d7b00 11762+
4bf69007 11763+/* flag commands */
d33d7b00 11764+
4bf69007
AM
11765+#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0)
11766+#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0)
d33d7b00 11767+
4bf69007
AM
11768+struct vcmd_ctx_flags_v0 {
11769+ uint64_t flagword;
11770+ uint64_t mask;
11771+};
3bac966d
AM
11772+
11773+
3bac966d 11774+
4bf69007 11775+/* context caps commands */
3bac966d 11776+
4bf69007
AM
11777+#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 1)
11778+#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 1)
d33d7b00 11779+
4bf69007
AM
11780+struct vcmd_ctx_caps_v1 {
11781+ uint64_t ccaps;
11782+ uint64_t cmask;
11783+};
d33d7b00 11784+
d33d7b00
AM
11785+
11786+
4bf69007 11787+/* bcaps commands */
d33d7b00 11788+
4bf69007
AM
11789+#define VCMD_get_bcaps VC_CMD(FLAGS, 9, 0)
11790+#define VCMD_set_bcaps VC_CMD(FLAGS, 10, 0)
d33d7b00 11791+
4bf69007
AM
11792+struct vcmd_bcaps {
11793+ uint64_t bcaps;
11794+ uint64_t bmask;
11795+};
3bac966d 11796+
d33d7b00 11797+
d33d7b00 11798+
4bf69007 11799+/* umask commands */
d33d7b00 11800+
4bf69007
AM
11801+#define VCMD_get_umask VC_CMD(FLAGS, 13, 0)
11802+#define VCMD_set_umask VC_CMD(FLAGS, 14, 0)
3bac966d 11803+
4bf69007
AM
11804+struct vcmd_umask {
11805+ uint64_t umask;
11806+ uint64_t mask;
11807+};
d33d7b00 11808+
d33d7b00
AM
11809+
11810+
4bf69007 11811+/* wmask commands */
d33d7b00 11812+
4bf69007
AM
11813+#define VCMD_get_wmask VC_CMD(FLAGS, 15, 0)
11814+#define VCMD_set_wmask VC_CMD(FLAGS, 16, 0)
d33d7b00 11815+
4bf69007
AM
11816+struct vcmd_wmask {
11817+ uint64_t wmask;
11818+ uint64_t mask;
d33d7b00
AM
11819+};
11820+
d33d7b00 11821+
d33d7b00 11822+
4bf69007 11823+/* OOM badness */
d33d7b00 11824+
4bf69007
AM
11825+#define VCMD_get_badness VC_CMD(MEMCTRL, 5, 0)
11826+#define VCMD_set_badness VC_CMD(MEMCTRL, 6, 0)
d33d7b00 11827+
4bf69007
AM
11828+struct vcmd_badness_v0 {
11829+ int64_t bias;
11830+};
d33d7b00 11831+
4bf69007 11832+#endif /* _UAPI_VS_CONTEXT_CMD_H */
09a55596
AM
11833diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/cvirt_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/cvirt_cmd.h
11834--- linux-4.9.135/include/uapi/vserver/cvirt_cmd.h 1970-01-01 00:00:00.000000000 +0000
11835+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/cvirt_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11836@@ -0,0 +1,41 @@
11837+#ifndef _UAPI_VS_CVIRT_CMD_H
11838+#define _UAPI_VS_CVIRT_CMD_H
d33d7b00 11839+
d33d7b00 11840+
4bf69007 11841+/* virtual host info name commands */
d33d7b00 11842+
4bf69007
AM
11843+#define VCMD_set_vhi_name VC_CMD(VHOST, 1, 0)
11844+#define VCMD_get_vhi_name VC_CMD(VHOST, 2, 0)
d33d7b00 11845+
4bf69007
AM
11846+struct vcmd_vhi_name_v0 {
11847+ uint32_t field;
11848+ char name[65];
11849+};
d33d7b00 11850+
d33d7b00 11851+
4bf69007
AM
11852+enum vhi_name_field {
11853+ VHIN_CONTEXT = 0,
11854+ VHIN_SYSNAME,
11855+ VHIN_NODENAME,
11856+ VHIN_RELEASE,
11857+ VHIN_VERSION,
11858+ VHIN_MACHINE,
11859+ VHIN_DOMAINNAME,
11860+};
d33d7b00 11861+
d33d7b00 11862+
d33d7b00 11863+
4bf69007 11864+#define VCMD_virt_stat VC_CMD(VSTAT, 3, 0)
d33d7b00 11865+
4bf69007
AM
11866+struct vcmd_virt_stat_v0 {
11867+ uint64_t offset;
11868+ uint64_t uptime;
11869+ uint32_t nr_threads;
11870+ uint32_t nr_running;
11871+ uint32_t nr_uninterruptible;
11872+ uint32_t nr_onhold;
11873+ uint32_t nr_forks;
11874+ uint32_t load[3];
11875+};
2380c486 11876+
4bf69007 11877+#endif /* _UAPI_VS_CVIRT_CMD_H */
09a55596
AM
11878diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/debug_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/debug_cmd.h
11879--- linux-4.9.135/include/uapi/vserver/debug_cmd.h 1970-01-01 00:00:00.000000000 +0000
11880+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/debug_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11881@@ -0,0 +1,24 @@
11882+#ifndef _UAPI_VS_DEBUG_CMD_H
11883+#define _UAPI_VS_DEBUG_CMD_H
537831f9 11884+
537831f9 11885+
4bf69007 11886+/* debug commands */
537831f9 11887+
4bf69007 11888+#define VCMD_dump_history VC_CMD(DEBUG, 1, 0)
537831f9 11889+
4bf69007
AM
11890+#define VCMD_read_history VC_CMD(DEBUG, 5, 0)
11891+#define VCMD_read_monitor VC_CMD(DEBUG, 6, 0)
537831f9 11892+
4bf69007
AM
11893+struct vcmd_read_history_v0 {
11894+ uint32_t index;
11895+ uint32_t count;
11896+ char __user *data;
11897+};
537831f9 11898+
4bf69007
AM
11899+struct vcmd_read_monitor_v0 {
11900+ uint32_t index;
11901+ uint32_t count;
11902+ char __user *data;
11903+};
537831f9 11904+
4bf69007 11905+#endif /* _UAPI_VS_DEBUG_CMD_H */
09a55596
AM
11906diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/device.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/device.h
11907--- linux-4.9.135/include/uapi/vserver/device.h 1970-01-01 00:00:00.000000000 +0000
11908+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/device.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11909@@ -0,0 +1,12 @@
11910+#ifndef _UAPI_VS_DEVICE_H
11911+#define _UAPI_VS_DEVICE_H
d337f35e 11912+
d337f35e 11913+
4bf69007
AM
11914+#define DATTR_CREATE 0x00000001
11915+#define DATTR_OPEN 0x00000002
d337f35e 11916+
4bf69007 11917+#define DATTR_REMAP 0x00000010
d337f35e 11918+
4bf69007 11919+#define DATTR_MASK 0x00000013
ec22aa5c 11920+
4bf69007 11921+#endif /* _UAPI_VS_DEVICE_H */
09a55596
AM
11922diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/device_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/device_cmd.h
11923--- linux-4.9.135/include/uapi/vserver/device_cmd.h 1970-01-01 00:00:00.000000000 +0000
11924+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/device_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11925@@ -0,0 +1,16 @@
11926+#ifndef _UAPI_VS_DEVICE_CMD_H
11927+#define _UAPI_VS_DEVICE_CMD_H
2380c486 11928+
1163e6ab 11929+
4bf69007 11930+/* device vserver commands */
1163e6ab 11931+
4bf69007
AM
11932+#define VCMD_set_mapping VC_CMD(DEVICE, 1, 0)
11933+#define VCMD_unset_mapping VC_CMD(DEVICE, 2, 0)
e915af4e 11934+
4bf69007
AM
11935+struct vcmd_set_mapping_v0 {
11936+ const char __user *device;
11937+ const char __user *target;
11938+ uint32_t flags;
11939+};
e915af4e 11940+
4bf69007 11941+#endif /* _UAPI_VS_DEVICE_CMD_H */
09a55596
AM
11942diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/dlimit_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/dlimit_cmd.h
11943--- linux-4.9.135/include/uapi/vserver/dlimit_cmd.h 1970-01-01 00:00:00.000000000 +0000
11944+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/dlimit_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
11945@@ -0,0 +1,67 @@
11946+#ifndef _UAPI_VS_DLIMIT_CMD_H
11947+#define _UAPI_VS_DLIMIT_CMD_H
e915af4e 11948+
42bc425c 11949+
4bf69007 11950+/* dlimit vserver commands */
d337f35e 11951+
4bf69007
AM
11952+#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0)
11953+#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0)
d337f35e 11954+
4bf69007
AM
11955+#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0)
11956+#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0)
d337f35e 11957+
4bf69007
AM
11958+struct vcmd_ctx_dlimit_base_v0 {
11959+ const char __user *name;
11960+ uint32_t flags;
11961+};
11962+
11963+struct vcmd_ctx_dlimit_v0 {
11964+ const char __user *name;
11965+ uint32_t space_used; /* used space in kbytes */
11966+ uint32_t space_total; /* maximum space in kbytes */
11967+ uint32_t inodes_used; /* used inodes */
11968+ uint32_t inodes_total; /* maximum inodes */
11969+ uint32_t reserved; /* reserved for root in % */
11970+ uint32_t flags;
11971+};
11972+
11973+#define CDLIM_UNSET ((uint32_t)0UL)
11974+#define CDLIM_INFINITY ((uint32_t)~0UL)
11975+#define CDLIM_KEEP ((uint32_t)~1UL)
11976+
11977+#define DLIME_UNIT 0
11978+#define DLIME_KILO 1
11979+#define DLIME_MEGA 2
11980+#define DLIME_GIGA 3
11981+
11982+#define DLIMF_SHIFT 0x10
11983+
11984+#define DLIMS_USED 0
11985+#define DLIMS_TOTAL 2
11986+
11987+static inline
11988+uint64_t dlimit_space_32to64(uint32_t val, uint32_t flags, int shift)
2380c486 11989+{
4bf69007
AM
11990+ int exp = (flags & DLIMF_SHIFT) ?
11991+ (flags >> shift) & DLIME_GIGA : DLIME_KILO;
11992+ return ((uint64_t)val) << (10 * exp);
2380c486
JR
11993+}
11994+
4bf69007
AM
11995+static inline
11996+uint32_t dlimit_space_64to32(uint64_t val, uint32_t *flags, int shift)
2380c486 11997+{
4bf69007 11998+ int exp = 0;
ec22aa5c 11999+
4bf69007
AM
12000+ if (*flags & DLIMF_SHIFT) {
12001+ while (val > (1LL << 32) && (exp < 3)) {
12002+ val >>= 10;
12003+ exp++;
12004+ }
12005+ *flags &= ~(DLIME_GIGA << shift);
12006+ *flags |= exp << shift;
12007+ } else
12008+ val >>= 10;
12009+ return val;
2380c486
JR
12010+}
12011+
4bf69007 12012+#endif /* _UAPI_VS_DLIMIT_CMD_H */
09a55596
AM
12013diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/inode.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/inode.h
12014--- linux-4.9.135/include/uapi/vserver/inode.h 1970-01-01 00:00:00.000000000 +0000
12015+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/inode.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12016@@ -0,0 +1,23 @@
12017+#ifndef _UAPI_VS_INODE_H
12018+#define _UAPI_VS_INODE_H
2380c486 12019+
d337f35e 12020+
4bf69007 12021+#define IATTR_TAG 0x01000000
2380c486 12022+
4bf69007
AM
12023+#define IATTR_ADMIN 0x00000001
12024+#define IATTR_WATCH 0x00000002
12025+#define IATTR_HIDE 0x00000004
12026+#define IATTR_FLAGS 0x00000007
2380c486 12027+
4bf69007
AM
12028+#define IATTR_BARRIER 0x00010000
12029+#define IATTR_IXUNLINK 0x00020000
12030+#define IATTR_IMMUTABLE 0x00040000
12031+#define IATTR_COW 0x00080000
d337f35e 12032+
ec22aa5c 12033+
4bf69007 12034+/* inode ioctls */
ec22aa5c 12035+
4bf69007
AM
12036+#define FIOC_GETXFLG _IOR('x', 5, long)
12037+#define FIOC_SETXFLG _IOW('x', 6, long)
d337f35e 12038+
4bf69007 12039+#endif /* _UAPI_VS_INODE_H */
09a55596
AM
12040diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/inode_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/inode_cmd.h
12041--- linux-4.9.135/include/uapi/vserver/inode_cmd.h 1970-01-01 00:00:00.000000000 +0000
12042+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/inode_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12043@@ -0,0 +1,26 @@
12044+#ifndef _UAPI_VS_INODE_CMD_H
12045+#define _UAPI_VS_INODE_CMD_H
d337f35e 12046+
db55b927 12047+
4bf69007 12048+/* inode vserver commands */
2c8c5bc5 12049+
4bf69007
AM
12050+#define VCMD_get_iattr VC_CMD(INODE, 1, 1)
12051+#define VCMD_set_iattr VC_CMD(INODE, 2, 1)
2bf5ad28 12052+
4bf69007
AM
12053+#define VCMD_fget_iattr VC_CMD(INODE, 3, 0)
12054+#define VCMD_fset_iattr VC_CMD(INODE, 4, 0)
4a036bed 12055+
4bf69007
AM
12056+struct vcmd_ctx_iattr_v1 {
12057+ const char __user *name;
12058+ uint32_t tag;
12059+ uint32_t flags;
12060+ uint32_t mask;
12061+};
4a036bed 12062+
4bf69007
AM
12063+struct vcmd_ctx_fiattr_v0 {
12064+ uint32_t tag;
12065+ uint32_t flags;
12066+ uint32_t mask;
12067+};
4a036bed 12068+
4bf69007 12069+#endif /* _UAPI_VS_INODE_CMD_H */
09a55596
AM
12070diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/limit.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/limit.h
12071--- linux-4.9.135/include/uapi/vserver/limit.h 1970-01-01 00:00:00.000000000 +0000
12072+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/limit.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12073@@ -0,0 +1,14 @@
12074+#ifndef _UAPI_VS_LIMIT_H
12075+#define _UAPI_VS_LIMIT_H
4a036bed 12076+
42bc425c 12077+
4bf69007
AM
12078+#define VLIMIT_NSOCK 16
12079+#define VLIMIT_OPENFD 17
12080+#define VLIMIT_ANON 18
12081+#define VLIMIT_SHMEM 19
12082+#define VLIMIT_SEMARY 20
12083+#define VLIMIT_NSEMS 21
12084+#define VLIMIT_DENTRY 22
12085+#define VLIMIT_MAPPED 23
adc1caaa 12086+
4bf69007 12087+#endif /* _UAPI_VS_LIMIT_H */
09a55596
AM
12088diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/limit_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/limit_cmd.h
12089--- linux-4.9.135/include/uapi/vserver/limit_cmd.h 1970-01-01 00:00:00.000000000 +0000
12090+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/limit_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12091@@ -0,0 +1,40 @@
12092+#ifndef _UAPI_VS_LIMIT_CMD_H
12093+#define _UAPI_VS_LIMIT_CMD_H
adc1caaa 12094+
adc1caaa 12095+
4bf69007 12096+/* rlimit vserver commands */
adc1caaa 12097+
4bf69007
AM
12098+#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0)
12099+#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0)
12100+#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0)
12101+#define VCMD_reset_hits VC_CMD(RLIMIT, 7, 0)
12102+#define VCMD_reset_minmax VC_CMD(RLIMIT, 9, 0)
adc1caaa 12103+
4bf69007
AM
12104+struct vcmd_ctx_rlimit_v0 {
12105+ uint32_t id;
12106+ uint64_t minimum;
12107+ uint64_t softlimit;
12108+ uint64_t maximum;
12109+};
d33d7b00 12110+
4bf69007
AM
12111+struct vcmd_ctx_rlimit_mask_v0 {
12112+ uint32_t minimum;
12113+ uint32_t softlimit;
12114+ uint32_t maximum;
12115+};
d33d7b00 12116+
4bf69007 12117+#define VCMD_rlimit_stat VC_CMD(VSTAT, 1, 0)
d33d7b00 12118+
4bf69007
AM
12119+struct vcmd_rlimit_stat_v0 {
12120+ uint32_t id;
12121+ uint32_t hits;
12122+ uint64_t value;
12123+ uint64_t minimum;
12124+ uint64_t maximum;
12125+};
d33d7b00 12126+
4bf69007
AM
12127+#define CRLIM_UNSET (0ULL)
12128+#define CRLIM_INFINITY (~0ULL)
12129+#define CRLIM_KEEP (~1ULL)
d33d7b00 12130+
4bf69007 12131+#endif /* _UAPI_VS_LIMIT_CMD_H */
09a55596
AM
12132diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/monitor.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/monitor.h
12133--- linux-4.9.135/include/uapi/vserver/monitor.h 1970-01-01 00:00:00.000000000 +0000
12134+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/monitor.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12135@@ -0,0 +1,96 @@
12136+#ifndef _UAPI_VS_MONITOR_H
12137+#define _UAPI_VS_MONITOR_H
d33d7b00 12138+
4bf69007 12139+#include <linux/types.h>
d33d7b00 12140+
d33d7b00 12141+
4bf69007
AM
12142+enum {
12143+ VXM_UNUSED = 0,
d33d7b00 12144+
4bf69007 12145+ VXM_SYNC = 0x10,
d33d7b00 12146+
4bf69007
AM
12147+ VXM_UPDATE = 0x20,
12148+ VXM_UPDATE_1,
12149+ VXM_UPDATE_2,
d33d7b00 12150+
4bf69007
AM
12151+ VXM_RQINFO_1 = 0x24,
12152+ VXM_RQINFO_2,
d33d7b00 12153+
4bf69007
AM
12154+ VXM_ACTIVATE = 0x40,
12155+ VXM_DEACTIVATE,
12156+ VXM_IDLE,
d33d7b00 12157+
4bf69007
AM
12158+ VXM_HOLD = 0x44,
12159+ VXM_UNHOLD,
d33d7b00 12160+
4bf69007
AM
12161+ VXM_MIGRATE = 0x48,
12162+ VXM_RESCHED,
d33d7b00 12163+
4bf69007
AM
12164+ /* all other bits are flags */
12165+ VXM_SCHED = 0x80,
12166+};
d33d7b00 12167+
4bf69007
AM
12168+struct _vxm_update_1 {
12169+ uint32_t tokens_max;
12170+ uint32_t fill_rate;
12171+ uint32_t interval;
12172+};
d33d7b00 12173+
4bf69007
AM
12174+struct _vxm_update_2 {
12175+ uint32_t tokens_min;
12176+ uint32_t fill_rate;
12177+ uint32_t interval;
12178+};
d33d7b00 12179+
4bf69007
AM
12180+struct _vxm_rqinfo_1 {
12181+ uint16_t running;
12182+ uint16_t onhold;
12183+ uint16_t iowait;
12184+ uint16_t uintr;
12185+ uint32_t idle_tokens;
12186+};
d33d7b00 12187+
4bf69007
AM
12188+struct _vxm_rqinfo_2 {
12189+ uint32_t norm_time;
12190+ uint32_t idle_time;
12191+ uint32_t idle_skip;
12192+};
d33d7b00 12193+
4bf69007
AM
12194+struct _vxm_sched {
12195+ uint32_t tokens;
12196+ uint32_t norm_time;
12197+ uint32_t idle_time;
12198+};
d33d7b00 12199+
4bf69007
AM
12200+struct _vxm_task {
12201+ uint16_t pid;
12202+ uint16_t state;
12203+};
d33d7b00 12204+
4bf69007
AM
12205+struct _vxm_event {
12206+ uint32_t jif;
12207+ union {
12208+ uint32_t seq;
12209+ uint32_t sec;
12210+ };
12211+ union {
12212+ uint32_t tokens;
12213+ uint32_t nsec;
12214+ struct _vxm_task tsk;
12215+ };
12216+};
61b0c03f 12217+
4bf69007
AM
12218+struct _vx_mon_entry {
12219+ uint16_t type;
12220+ uint16_t xid;
12221+ union {
12222+ struct _vxm_event ev;
12223+ struct _vxm_sched sd;
12224+ struct _vxm_update_1 u1;
12225+ struct _vxm_update_2 u2;
12226+ struct _vxm_rqinfo_1 q1;
12227+ struct _vxm_rqinfo_2 q2;
12228+ };
12229+};
d33d7b00 12230+
4bf69007 12231+#endif /* _UAPI_VS_MONITOR_H */
09a55596
AM
12232diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/network.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/network.h
12233--- linux-4.9.135/include/uapi/vserver/network.h 1970-01-01 00:00:00.000000000 +0000
12234+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/network.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12235@@ -0,0 +1,76 @@
12236+#ifndef _UAPI_VS_NETWORK_H
12237+#define _UAPI_VS_NETWORK_H
d33d7b00 12238+
4bf69007 12239+#include <linux/types.h>
d33d7b00 12240+
d33d7b00 12241+
4bf69007 12242+#define MAX_N_CONTEXT 65535 /* Arbitrary limit */
d33d7b00 12243+
d33d7b00 12244+
4bf69007 12245+/* network flags */
d33d7b00 12246+
4bf69007 12247+#define NXF_INFO_PRIVATE 0x00000008
d33d7b00 12248+
4bf69007
AM
12249+#define NXF_SINGLE_IP 0x00000100
12250+#define NXF_LBACK_REMAP 0x00000200
12251+#define NXF_LBACK_ALLOW 0x00000400
d33d7b00 12252+
4bf69007
AM
12253+#define NXF_HIDE_NETIF 0x02000000
12254+#define NXF_HIDE_LBACK 0x04000000
265d6dcc 12255+
4bf69007
AM
12256+#define NXF_STATE_SETUP (1ULL << 32)
12257+#define NXF_STATE_ADMIN (1ULL << 34)
d33d7b00 12258+
4bf69007
AM
12259+#define NXF_SC_HELPER (1ULL << 36)
12260+#define NXF_PERSISTENT (1ULL << 38)
d33d7b00 12261+
4bf69007 12262+#define NXF_ONE_TIME (0x0005ULL << 32)
d33d7b00 12263+
d33d7b00 12264+
4bf69007 12265+#define NXF_INIT_SET (__nxf_init_set())
d33d7b00 12266+
4bf69007
AM
12267+static inline uint64_t __nxf_init_set(void) {
12268+ return NXF_STATE_ADMIN
12269+#ifdef CONFIG_VSERVER_AUTO_LBACK
12270+ | NXF_LBACK_REMAP
12271+ | NXF_HIDE_LBACK
12272+#endif
12273+#ifdef CONFIG_VSERVER_AUTO_SINGLE
12274+ | NXF_SINGLE_IP
12275+#endif
12276+ | NXF_HIDE_NETIF;
12277+}
d33d7b00 12278+
d33d7b00 12279+
4bf69007 12280+/* network caps */
d33d7b00 12281+
4bf69007 12282+#define NXC_TUN_CREATE 0x00000001
d33d7b00 12283+
4bf69007 12284+#define NXC_RAW_ICMP 0x00000100
d33d7b00 12285+
4bf69007 12286+#define NXC_MULTICAST 0x00001000
d33d7b00 12287+
adc1caaa 12288+
4bf69007 12289+/* address types */
adc1caaa 12290+
4bf69007
AM
12291+#define NXA_TYPE_IPV4 0x0001
12292+#define NXA_TYPE_IPV6 0x0002
adc1caaa 12293+
4bf69007
AM
12294+#define NXA_TYPE_NONE 0x0000
12295+#define NXA_TYPE_ANY 0x00FF
adc1caaa 12296+
4bf69007
AM
12297+#define NXA_TYPE_ADDR 0x0010
12298+#define NXA_TYPE_MASK 0x0020
12299+#define NXA_TYPE_RANGE 0x0040
adc1caaa 12300+
4bf69007 12301+#define NXA_MASK_ALL (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
adc1caaa 12302+
4bf69007
AM
12303+#define NXA_MOD_BCAST 0x0100
12304+#define NXA_MOD_LBACK 0x0200
adc1caaa 12305+
4bf69007 12306+#define NXA_LOOPBACK 0x1000
2380c486 12307+
4bf69007
AM
12308+#define NXA_MASK_BIND (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
12309+#define NXA_MASK_SHOW (NXA_MASK_ALL | NXA_LOOPBACK)
2380c486 12310+
4bf69007 12311+#endif /* _UAPI_VS_NETWORK_H */
09a55596
AM
12312diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/network_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/network_cmd.h
12313--- linux-4.9.135/include/uapi/vserver/network_cmd.h 1970-01-01 00:00:00.000000000 +0000
12314+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/network_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12315@@ -0,0 +1,123 @@
12316+#ifndef _UAPI_VS_NETWORK_CMD_H
12317+#define _UAPI_VS_NETWORK_CMD_H
2380c486 12318+
2380c486 12319+
4bf69007 12320+/* vinfo commands */
2380c486 12321+
4bf69007 12322+#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
2380c486 12323+
2380c486 12324+
4bf69007 12325+#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
2380c486 12326+
4bf69007
AM
12327+struct vcmd_nx_info_v0 {
12328+ uint32_t nid;
12329+ /* more to come */
12330+};
2380c486 12331+
2380c486 12332+
4bf69007
AM
12333+#include <linux/in.h>
12334+#include <linux/in6.h>
2380c486 12335+
4bf69007
AM
12336+#define VCMD_net_create_v0 VC_CMD(VNET, 1, 0)
12337+#define VCMD_net_create VC_CMD(VNET, 1, 1)
2380c486 12338+
4bf69007
AM
12339+struct vcmd_net_create {
12340+ uint64_t flagword;
12341+};
2380c486 12342+
4bf69007 12343+#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0)
2380c486 12344+
4bf69007
AM
12345+#define VCMD_net_add VC_CMD(NETALT, 1, 0)
12346+#define VCMD_net_remove VC_CMD(NETALT, 2, 0)
2380c486 12347+
4bf69007
AM
12348+struct vcmd_net_addr_v0 {
12349+ uint16_t type;
12350+ uint16_t count;
12351+ struct in_addr ip[4];
12352+ struct in_addr mask[4];
12353+};
2380c486 12354+
4bf69007
AM
12355+#define VCMD_net_add_ipv4_v1 VC_CMD(NETALT, 1, 1)
12356+#define VCMD_net_rem_ipv4_v1 VC_CMD(NETALT, 2, 1)
2380c486 12357+
4bf69007
AM
12358+struct vcmd_net_addr_ipv4_v1 {
12359+ uint16_t type;
12360+ uint16_t flags;
12361+ struct in_addr ip;
12362+ struct in_addr mask;
12363+};
2380c486 12364+
4bf69007
AM
12365+#define VCMD_net_add_ipv4 VC_CMD(NETALT, 1, 2)
12366+#define VCMD_net_rem_ipv4 VC_CMD(NETALT, 2, 2)
2380c486 12367+
4bf69007
AM
12368+struct vcmd_net_addr_ipv4_v2 {
12369+ uint16_t type;
12370+ uint16_t flags;
12371+ struct in_addr ip;
12372+ struct in_addr ip2;
12373+ struct in_addr mask;
12374+};
2380c486 12375+
4bf69007
AM
12376+#define VCMD_net_add_ipv6 VC_CMD(NETALT, 3, 1)
12377+#define VCMD_net_remove_ipv6 VC_CMD(NETALT, 4, 1)
2380c486 12378+
4bf69007
AM
12379+struct vcmd_net_addr_ipv6_v1 {
12380+ uint16_t type;
12381+ uint16_t flags;
12382+ uint32_t prefix;
12383+ struct in6_addr ip;
12384+ struct in6_addr mask;
12385+};
2380c486 12386+
4bf69007
AM
12387+#define VCMD_add_match_ipv4 VC_CMD(NETALT, 5, 0)
12388+#define VCMD_get_match_ipv4 VC_CMD(NETALT, 6, 0)
2380c486 12389+
4bf69007
AM
12390+struct vcmd_match_ipv4_v0 {
12391+ uint16_t type;
12392+ uint16_t flags;
12393+ uint16_t parent;
12394+ uint16_t prefix;
12395+ struct in_addr ip;
12396+ struct in_addr ip2;
12397+ struct in_addr mask;
12398+};
2380c486 12399+
4bf69007
AM
12400+#define VCMD_add_match_ipv6 VC_CMD(NETALT, 7, 0)
12401+#define VCMD_get_match_ipv6 VC_CMD(NETALT, 8, 0)
2380c486 12402+
4bf69007
AM
12403+struct vcmd_match_ipv6_v0 {
12404+ uint16_t type;
12405+ uint16_t flags;
12406+ uint16_t parent;
12407+ uint16_t prefix;
12408+ struct in6_addr ip;
12409+ struct in6_addr ip2;
12410+ struct in6_addr mask;
12411+};
2380c486 12412+
2380c486 12413+
2380c486 12414+
2380c486 12415+
4bf69007 12416+/* flag commands */
2380c486 12417+
4bf69007
AM
12418+#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0)
12419+#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0)
2380c486 12420+
4bf69007
AM
12421+struct vcmd_net_flags_v0 {
12422+ uint64_t flagword;
12423+ uint64_t mask;
12424+};
2380c486 12425+
2380c486 12426+
ab30d09f 12427+
4bf69007 12428+/* network caps commands */
ab30d09f 12429+
4bf69007
AM
12430+#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0)
12431+#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0)
ec22aa5c 12432+
4bf69007
AM
12433+struct vcmd_net_caps_v0 {
12434+ uint64_t ncaps;
12435+ uint64_t cmask;
12436+};
3bac966d 12437+
4bf69007 12438+#endif /* _UAPI_VS_NETWORK_CMD_H */
09a55596
AM
12439diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/sched_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/sched_cmd.h
12440--- linux-4.9.135/include/uapi/vserver/sched_cmd.h 1970-01-01 00:00:00.000000000 +0000
12441+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/sched_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12442@@ -0,0 +1,13 @@
12443+#ifndef _UAPI_VS_SCHED_CMD_H
12444+#define _UAPI_VS_SCHED_CMD_H
d337f35e 12445+
d337f35e 12446+
4bf69007
AM
12447+struct vcmd_prio_bias {
12448+ int32_t cpu_id;
12449+ int32_t prio_bias;
12450+};
2380c486 12451+
4bf69007
AM
12452+#define VCMD_set_prio_bias VC_CMD(SCHED, 4, 0)
12453+#define VCMD_get_prio_bias VC_CMD(SCHED, 5, 0)
d337f35e 12454+
4bf69007 12455+#endif /* _UAPI_VS_SCHED_CMD_H */
09a55596
AM
12456diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/signal_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/signal_cmd.h
12457--- linux-4.9.135/include/uapi/vserver/signal_cmd.h 1970-01-01 00:00:00.000000000 +0000
12458+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/signal_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12459@@ -0,0 +1,31 @@
12460+#ifndef _UAPI_VS_SIGNAL_CMD_H
12461+#define _UAPI_VS_SIGNAL_CMD_H
d337f35e 12462+
d337f35e 12463+
4bf69007 12464+/* signalling vserver commands */
d337f35e 12465+
4bf69007
AM
12466+#define VCMD_ctx_kill VC_CMD(PROCTRL, 1, 0)
12467+#define VCMD_wait_exit VC_CMD(EVENT, 99, 0)
d337f35e 12468+
4bf69007
AM
12469+struct vcmd_ctx_kill_v0 {
12470+ int32_t pid;
12471+ int32_t sig;
12472+};
d337f35e 12473+
4bf69007
AM
12474+struct vcmd_wait_exit_v0 {
12475+ int32_t reboot_cmd;
12476+ int32_t exit_code;
12477+};
d337f35e 12478+
d337f35e 12479+
4bf69007 12480+/* process alteration commands */
ab30d09f 12481+
4bf69007
AM
12482+#define VCMD_get_pflags VC_CMD(PROCALT, 5, 0)
12483+#define VCMD_set_pflags VC_CMD(PROCALT, 6, 0)
d337f35e 12484+
4bf69007
AM
12485+struct vcmd_pflags_v0 {
12486+ uint32_t flagword;
12487+ uint32_t mask;
12488+};
3bac966d 12489+
4bf69007 12490+#endif /* _UAPI_VS_SIGNAL_CMD_H */
09a55596
AM
12491diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/space_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/space_cmd.h
12492--- linux-4.9.135/include/uapi/vserver/space_cmd.h 1970-01-01 00:00:00.000000000 +0000
12493+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/space_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12494@@ -0,0 +1,28 @@
12495+#ifndef _UAPI_VS_SPACE_CMD_H
12496+#define _UAPI_VS_SPACE_CMD_H
d337f35e 12497+
d337f35e 12498+
4bf69007
AM
12499+#define VCMD_enter_space_v0 VC_CMD(PROCALT, 1, 0)
12500+#define VCMD_enter_space_v1 VC_CMD(PROCALT, 1, 1)
12501+#define VCMD_enter_space VC_CMD(PROCALT, 1, 2)
2380c486 12502+
4bf69007
AM
12503+#define VCMD_set_space_v0 VC_CMD(PROCALT, 3, 0)
12504+#define VCMD_set_space_v1 VC_CMD(PROCALT, 3, 1)
12505+#define VCMD_set_space VC_CMD(PROCALT, 3, 2)
d337f35e 12506+
4bf69007 12507+#define VCMD_get_space_mask_v0 VC_CMD(PROCALT, 4, 0)
d337f35e 12508+
4bf69007
AM
12509+#define VCMD_get_space_mask VC_CMD(VSPACE, 0, 1)
12510+#define VCMD_get_space_default VC_CMD(VSPACE, 1, 0)
d337f35e 12511+
d337f35e 12512+
4bf69007
AM
12513+struct vcmd_space_mask_v1 {
12514+ uint64_t mask;
12515+};
d337f35e 12516+
4bf69007
AM
12517+struct vcmd_space_mask_v2 {
12518+ uint64_t mask;
12519+ uint32_t index;
12520+};
d337f35e 12521+
4bf69007 12522+#endif /* _UAPI_VS_SPACE_CMD_H */
09a55596
AM
12523diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/switch.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/switch.h
12524--- linux-4.9.135/include/uapi/vserver/switch.h 1970-01-01 00:00:00.000000000 +0000
12525+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/switch.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12526@@ -0,0 +1,90 @@
12527+#ifndef _UAPI_VS_SWITCH_H
12528+#define _UAPI_VS_SWITCH_H
d337f35e 12529+
4bf69007 12530+#include <linux/types.h>
d337f35e 12531+
d337f35e 12532+
4bf69007
AM
12533+#define VC_CATEGORY(c) (((c) >> 24) & 0x3F)
12534+#define VC_COMMAND(c) (((c) >> 16) & 0xFF)
12535+#define VC_VERSION(c) ((c) & 0xFFF)
d337f35e 12536+
4bf69007
AM
12537+#define VC_CMD(c, i, v) ((((VC_CAT_ ## c) & 0x3F) << 24) \
12538+ | (((i) & 0xFF) << 16) | ((v) & 0xFFF))
d337f35e 12539+
4bf69007 12540+/*
d337f35e 12541+
4bf69007 12542+ Syscall Matrix V2.8
d337f35e 12543+
4bf69007
AM
12544+ |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
12545+ |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | |
12546+ |INFO |SETUP | |MOVE | | | | | |
12547+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12548+ SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICE | |
12549+ HOST | 00| 01| 02| 03| 04| 05| | 06| 07|
12550+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12551+ CPU | |VPROC |PROCALT|PROCMIG|PROCTRL| | |SCHED. | |
12552+ PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15|
12553+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12554+ MEMORY | | | | |MEMCTRL| | |SWAP | |
12555+ | 16| 17| 18| 19| 20| 21| | 22| 23|
12556+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12557+ NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | |
12558+ | 24| 25| 26| 27| 28| 29| | 30| 31|
12559+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12560+ DISK | | | |TAGMIG |DLIMIT | | |INODE | |
12561+ VFS | 32| 33| 34| 35| 36| 37| | 38| 39|
12562+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12563+ OTHER |VSTAT | | | | | | |VINFO | |
12564+ | 40| 41| 42| 43| 44| 45| | 46| 47|
12565+ =======+=======+=======+=======+=======+=======+=======+ +=======+=======+
12566+ SPECIAL|EVENT | | | |FLAGS | | |VSPACE | |
12567+ | 48| 49| 50| 51| 52| 53| | 54| 55|
12568+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
12569+ SPECIAL|DEBUG | | | |RLIMIT |SYSCALL| | |COMPAT |
12570+ | 56| 57| 58| 59| 60|TEST 61| | 62| 63|
12571+ -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
d337f35e 12572+
4bf69007 12573+*/
d337f35e 12574+
4bf69007 12575+#define VC_CAT_VERSION 0
d337f35e 12576+
4bf69007
AM
12577+#define VC_CAT_VSETUP 1
12578+#define VC_CAT_VHOST 2
d337f35e 12579+
4bf69007 12580+#define VC_CAT_DEVICE 6
d337f35e 12581+
4bf69007
AM
12582+#define VC_CAT_VPROC 9
12583+#define VC_CAT_PROCALT 10
12584+#define VC_CAT_PROCMIG 11
12585+#define VC_CAT_PROCTRL 12
d337f35e 12586+
4bf69007
AM
12587+#define VC_CAT_SCHED 14
12588+#define VC_CAT_MEMCTRL 20
d337f35e 12589+
4bf69007
AM
12590+#define VC_CAT_VNET 25
12591+#define VC_CAT_NETALT 26
12592+#define VC_CAT_NETMIG 27
12593+#define VC_CAT_NETCTRL 28
d337f35e 12594+
4bf69007
AM
12595+#define VC_CAT_TAGMIG 35
12596+#define VC_CAT_DLIMIT 36
12597+#define VC_CAT_INODE 38
d337f35e 12598+
4bf69007
AM
12599+#define VC_CAT_VSTAT 40
12600+#define VC_CAT_VINFO 46
12601+#define VC_CAT_EVENT 48
d337f35e 12602+
4bf69007
AM
12603+#define VC_CAT_FLAGS 52
12604+#define VC_CAT_VSPACE 54
12605+#define VC_CAT_DEBUG 56
12606+#define VC_CAT_RLIMIT 60
d337f35e 12607+
4bf69007
AM
12608+#define VC_CAT_SYSTEST 61
12609+#define VC_CAT_COMPAT 63
d337f35e 12610+
4bf69007 12611+/* query version */
d337f35e 12612+
4bf69007
AM
12613+#define VCMD_get_version VC_CMD(VERSION, 0, 0)
12614+#define VCMD_get_vci VC_CMD(VERSION, 1, 0)
2380c486 12615+
4bf69007 12616+#endif /* _UAPI_VS_SWITCH_H */
09a55596
AM
12617diff -NurpP --minimal linux-4.9.135/include/uapi/vserver/tag_cmd.h linux-4.9.135-vs2.3.9.8/include/uapi/vserver/tag_cmd.h
12618--- linux-4.9.135/include/uapi/vserver/tag_cmd.h 1970-01-01 00:00:00.000000000 +0000
12619+++ linux-4.9.135-vs2.3.9.8/include/uapi/vserver/tag_cmd.h 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12620@@ -0,0 +1,14 @@
12621+#ifndef _UAPI_VS_TAG_CMD_H
12622+#define _UAPI_VS_TAG_CMD_H
d337f35e 12623+
d337f35e 12624+
4bf69007 12625+/* vinfo commands */
d337f35e 12626+
4bf69007 12627+#define VCMD_task_tag VC_CMD(VINFO, 3, 0)
d337f35e
JR
12628+
12629+
4bf69007 12630+/* context commands */
d337f35e 12631+
4bf69007 12632+#define VCMD_tag_migrate VC_CMD(TAGMIG, 1, 0)
2380c486 12633+
4bf69007 12634+#endif /* _UAPI_VS_TAG_CMD_H */
09a55596
AM
12635diff -NurpP --minimal linux-4.9.135/init/Kconfig linux-4.9.135-vs2.3.9.8/init/Kconfig
12636--- linux-4.9.135/init/Kconfig 2018-10-20 10:39:20.000000000 +0000
12637+++ linux-4.9.135-vs2.3.9.8/init/Kconfig 2018-10-20 04:58:14.000000000 +0000
cc23e853 12638@@ -958,6 +958,7 @@ config NUMA_BALANCING_DEFAULT_ENABLED
4bf69007 12639 menuconfig CGROUPS
cc23e853 12640 bool "Control Group support"
265de2f7 12641 select KERNFS
4bf69007
AM
12642+ default y
12643 help
12644 This option adds support for grouping sets of processes together, for
12645 use with process control subsystems such as Cpusets, CFS, memory
09a55596
AM
12646diff -NurpP --minimal linux-4.9.135/init/main.c linux-4.9.135-vs2.3.9.8/init/main.c
12647--- linux-4.9.135/init/main.c 2018-10-20 10:39:20.000000000 +0000
12648+++ linux-4.9.135-vs2.3.9.8/init/main.c 2018-10-20 05:55:43.000000000 +0000
12649@@ -82,6 +82,7 @@
cc23e853
AM
12650 #include <linux/io.h>
12651 #include <linux/kaiser.h>
9b2a4327 12652 #include <linux/cache.h>
4bf69007
AM
12653+#include <linux/vserver/percpu.h>
12654
12655 #include <asm/io.h>
12656 #include <asm/bugs.h>
09a55596
AM
12657diff -NurpP --minimal linux-4.9.135/ipc/mqueue.c linux-4.9.135-vs2.3.9.8/ipc/mqueue.c
12658--- linux-4.9.135/ipc/mqueue.c 2018-10-20 10:39:20.000000000 +0000
12659+++ linux-4.9.135-vs2.3.9.8/ipc/mqueue.c 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12660@@ -35,6 +35,8 @@
12661 #include <linux/ipc_namespace.h>
12662 #include <linux/user_namespace.h>
12663 #include <linux/slab.h>
12664+#include <linux/vs_context.h>
12665+#include <linux/vs_limit.h>
12666
12667 #include <net/sock.h>
12668 #include "util.h"
cc23e853 12669@@ -75,6 +77,7 @@ struct mqueue_inode_info {
bb20add7 12670 struct pid *notify_owner;
4bf69007
AM
12671 struct user_namespace *notify_user_ns;
12672 struct user_struct *user; /* user who created, for accounting */
12673+ struct vx_info *vxi;
12674 struct sock *notify_sock;
12675 struct sk_buff *notify_cookie;
12676
cc23e853 12677@@ -230,6 +233,7 @@ static struct inode *mqueue_get_inode(st
4bf69007
AM
12678 if (S_ISREG(mode)) {
12679 struct mqueue_inode_info *info;
12680 unsigned long mq_bytes, mq_treesize;
12681+ struct vx_info *vxi = current_vx_info();
12682
12683 inode->i_fop = &mqueue_file_operations;
12684 inode->i_size = FILENT_SIZE;
cc23e853 12685@@ -243,6 +247,7 @@ static struct inode *mqueue_get_inode(st
4bf69007
AM
12686 info->notify_user_ns = NULL;
12687 info->qsize = 0;
12688 info->user = NULL; /* set when all is ok */
12689+ info->vxi = NULL;
12690 info->msg_tree = RB_ROOT;
12691 info->node_cache = NULL;
12692 memset(&info->attr, 0, sizeof(info->attr));
cc23e853 12693@@ -276,17 +281,20 @@ static struct inode *mqueue_get_inode(st
4bf69007
AM
12694
12695 spin_lock(&mq_lock);
12696 if (u->mq_bytes + mq_bytes < u->mq_bytes ||
12697- u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
12698+ u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE) ||
12699+ !vx_ipcmsg_avail(vxi, mq_bytes)) {
12700 spin_unlock(&mq_lock);
12701 /* mqueue_evict_inode() releases info->messages */
12702 ret = -EMFILE;
12703 goto out_inode;
12704 }
12705 u->mq_bytes += mq_bytes;
12706+ vx_ipcmsg_add(vxi, u, mq_bytes);
12707 spin_unlock(&mq_lock);
12708
12709 /* all is ok */
12710 info->user = get_uid(u);
12711+ info->vxi = get_vx_info(vxi);
12712 } else if (S_ISDIR(mode)) {
12713 inc_nlink(inode);
12714 /* Some things misbehave if size == 0 on a directory */
d42736e7 12715@@ -402,6 +402,7 @@ static void mqueue_evict_inode(struct in
4bf69007
AM
12716 user = info->user;
12717 if (user) {
d42736e7 12718 unsigned long mq_bytes, mq_treesize;
4bf69007 12719+ struct vx_info *vxi = info->vxi;
d42736e7
AM
12720
12721 /* Total amount of bytes accounted for the mqueue */
12722 mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
12723@@ -413,6 +414,7 @@ static void mqueue_evict_inode(struct in
12724
4bf69007
AM
12725 spin_lock(&mq_lock);
12726 user->mq_bytes -= mq_bytes;
12727+ vx_ipcmsg_sub(vxi, user, mq_bytes);
12728 /*
12729 * get_ns_from_inode() ensures that the
12730 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
cc23e853 12731@@ -407,6 +418,7 @@ static void mqueue_evict_inode(struct in
4bf69007
AM
12732 if (ipc_ns)
12733 ipc_ns->mq_queues_count--;
12734 spin_unlock(&mq_lock);
12735+ put_vx_info(vxi);
12736 free_uid(user);
12737 }
12738 if (ipc_ns)
09a55596
AM
12739diff -NurpP --minimal linux-4.9.135/ipc/msg.c linux-4.9.135-vs2.3.9.8/ipc/msg.c
12740--- linux-4.9.135/ipc/msg.c 2018-10-20 10:39:20.000000000 +0000
12741+++ linux-4.9.135-vs2.3.9.8/ipc/msg.c 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12742@@ -37,6 +37,7 @@
12743 #include <linux/rwsem.h>
12744 #include <linux/nsproxy.h>
12745 #include <linux/ipc_namespace.h>
12746+#include <linux/vs_base.h>
12747
12748 #include <asm/current.h>
bb20add7 12749 #include <linux/uaccess.h>
cc23e853 12750@@ -124,6 +125,7 @@ static int newque(struct ipc_namespace *
4bf69007
AM
12751
12752 msq->q_perm.mode = msgflg & S_IRWXUGO;
12753 msq->q_perm.key = key;
12754+ msq->q_perm.xid = vx_current_xid();
12755
12756 msq->q_perm.security = NULL;
12757 retval = security_msg_queue_alloc(msq);
09a55596
AM
12758diff -NurpP --minimal linux-4.9.135/ipc/namespace.c linux-4.9.135-vs2.3.9.8/ipc/namespace.c
12759--- linux-4.9.135/ipc/namespace.c 2016-12-11 19:17:54.000000000 +0000
12760+++ linux-4.9.135-vs2.3.9.8/ipc/namespace.c 2018-10-20 04:58:14.000000000 +0000
cc23e853
AM
12761@@ -13,6 +13,7 @@
12762 #include <linux/mount.h>
12763 #include <linux/user_namespace.h>
12764 #include <linux/proc_ns.h>
12765+#include <linux/vserver/global.h>
12766
12767 #include "util.h"
12768
12769@@ -59,6 +60,7 @@ static struct ipc_namespace *create_ipc_
12770 sem_init_ns(ns);
12771 msg_init_ns(ns);
12772 shm_init_ns(ns);
12773+ atomic_inc(&vs_global_ipc_ns);
12774
12775 return ns;
12776
12777@@ -121,6 +123,7 @@ static void free_ipc_ns(struct ipc_names
12778 dec_ipc_namespaces(ns->ucounts);
12779 put_user_ns(ns->user_ns);
12780 ns_free_inum(&ns->ns);
12781+ atomic_dec(&vs_global_ipc_ns);
12782 kfree(ns);
12783 }
12784
09a55596
AM
12785diff -NurpP --minimal linux-4.9.135/ipc/sem.c linux-4.9.135-vs2.3.9.8/ipc/sem.c
12786--- linux-4.9.135/ipc/sem.c 2016-12-11 19:17:54.000000000 +0000
12787+++ linux-4.9.135-vs2.3.9.8/ipc/sem.c 2018-10-20 04:58:14.000000000 +0000
bb20add7 12788@@ -85,6 +85,8 @@
4bf69007
AM
12789 #include <linux/rwsem.h>
12790 #include <linux/nsproxy.h>
12791 #include <linux/ipc_namespace.h>
12792+#include <linux/vs_base.h>
12793+#include <linux/vs_limit.h>
12794
bb20add7 12795 #include <linux/uaccess.h>
4bf69007 12796 #include "util.h"
cc23e853 12797@@ -537,6 +539,7 @@ static int newary(struct ipc_namespace *
4bf69007
AM
12798
12799 sma->sem_perm.mode = (semflg & S_IRWXUGO);
12800 sma->sem_perm.key = key;
12801+ sma->sem_perm.xid = vx_current_xid();
12802
12803 sma->sem_perm.security = NULL;
12804 retval = security_sem_alloc(sma);
cc23e853 12805@@ -567,6 +570,9 @@ static int newary(struct ipc_namespace *
4bf69007
AM
12806 return id;
12807 }
12808 ns->used_sems += nsems;
12809+ /* FIXME: obsoleted? */
12810+ vx_semary_inc(sma);
12811+ vx_nsems_add(sma, nsems);
12812
bb20add7
AM
12813 sem_unlock(sma, -1);
12814 rcu_read_unlock();
cc23e853 12815@@ -1155,6 +1161,9 @@ static void freeary(struct ipc_namespace
4bf69007
AM
12816
12817 wake_up_sem_queue_do(&tasks);
12818 ns->used_sems -= sma->sem_nsems;
12819+ /* FIXME: obsoleted? */
12820+ vx_nsems_sub(sma, sma->sem_nsems);
12821+ vx_semary_dec(sma);
926e38e0 12822 ipc_rcu_putref(sma, sem_rcu_free);
4bf69007 12823 }
926e38e0 12824
09a55596
AM
12825diff -NurpP --minimal linux-4.9.135/ipc/shm.c linux-4.9.135-vs2.3.9.8/ipc/shm.c
12826--- linux-4.9.135/ipc/shm.c 2018-10-20 10:39:20.000000000 +0000
12827+++ linux-4.9.135-vs2.3.9.8/ipc/shm.c 2018-10-20 04:58:14.000000000 +0000
c2e5f7c8 12828@@ -42,6 +42,8 @@
4bf69007
AM
12829 #include <linux/nsproxy.h>
12830 #include <linux/mount.h>
12831 #include <linux/ipc_namespace.h>
12832+#include <linux/vs_context.h>
12833+#include <linux/vs_limit.h>
12834
bb20add7 12835 #include <linux/uaccess.h>
4bf69007 12836
09a55596 12837@@ -234,10 +236,14 @@ static void shm_open(struct vm_area_stru
4bf69007
AM
12838 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
12839 {
c2e5f7c8 12840 struct file *shm_file;
4bf69007
AM
12841+ struct vx_info *vxi = lookup_vx_info(shp->shm_perm.xid);
12842+ int numpages = (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
c2e5f7c8
JR
12843
12844 shm_file = shp->shm_file;
12845 shp->shm_file = NULL;
12846- ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
4bf69007
AM
12847+ vx_ipcshm_sub(vxi, shp, numpages);
12848+ ns->shm_tot -= numpages;
d337f35e 12849+
4bf69007
AM
12850 shm_rmid(ns, shp);
12851 shm_unlock(shp);
c2e5f7c8 12852 if (!is_file_hugepages(shm_file))
09a55596 12853@@ -246,6 +252,7 @@ static void shm_destroy(struct ipc_names
cc23e853
AM
12854 user_shm_unlock(i_size_read(file_inode(shm_file)),
12855 shp->mlock_user);
c2e5f7c8 12856 fput(shm_file);
4bf69007 12857+ put_vx_info(vxi);
926e38e0 12858 ipc_rcu_putref(shp, shm_rcu_free);
4bf69007
AM
12859 }
12860
09a55596 12861@@ -559,11 +566,15 @@ static int newseg(struct ipc_namespace *
bb20add7 12862 ns->shm_tot + numpages > ns->shm_ctlall)
4bf69007
AM
12863 return -ENOSPC;
12864
12865+ if (!vx_ipcshm_avail(current_vx_info(), numpages))
12866+ return -ENOSPC;
d337f35e 12867+
4bf69007
AM
12868 shp = ipc_rcu_alloc(sizeof(*shp));
12869 if (!shp)
12870 return -ENOMEM;
12871
12872 shp->shm_perm.key = key;
12873+ shp->shm_perm.xid = vx_current_xid();
12874 shp->shm_perm.mode = (shmflg & S_IRWXUGO);
12875 shp->mlock_user = NULL;
12876
09a55596 12877@@ -634,6 +645,7 @@ static int newseg(struct ipc_namespace *
926e38e0
JR
12878
12879 ipc_unlock_object(&shp->shm_perm);
12880 rcu_read_unlock();
4bf69007
AM
12881+ vx_ipcshm_add(current_vx_info(), key, numpages);
12882 return error;
12883
12884 no_id:
09a55596
AM
12885diff -NurpP --minimal linux-4.9.135/kernel/Makefile linux-4.9.135-vs2.3.9.8/kernel/Makefile
12886--- linux-4.9.135/kernel/Makefile 2018-10-20 10:39:20.000000000 +0000
12887+++ linux-4.9.135-vs2.3.9.8/kernel/Makefile 2018-10-20 04:58:14.000000000 +0000
cc23e853 12888@@ -39,6 +39,7 @@ obj-y += printk/
c2e5f7c8
JR
12889 obj-y += irq/
12890 obj-y += rcu/
cc23e853 12891 obj-y += livepatch/
4bf69007
AM
12892+obj-y += vserver/
12893
b00e13aa
AM
12894 obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
12895 obj-$(CONFIG_FREEZER) += freezer.o
09a55596
AM
12896diff -NurpP --minimal linux-4.9.135/kernel/auditsc.c linux-4.9.135-vs2.3.9.8/kernel/auditsc.c
12897--- linux-4.9.135/kernel/auditsc.c 2018-10-20 10:39:21.000000000 +0000
12898+++ linux-4.9.135-vs2.3.9.8/kernel/auditsc.c 2018-10-20 05:55:43.000000000 +0000
12899@@ -1967,7 +1967,7 @@ static int audit_set_loginuid_perm(kuid_
c2e5f7c8 12900 if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
4bf69007 12901 return -EPERM;
c2e5f7c8 12902 /* it is set, you need permission */
4bf69007
AM
12903- if (!capable(CAP_AUDIT_CONTROL))
12904+ if (!vx_capable(CAP_AUDIT_CONTROL, VXC_AUDIT_CONTROL))
12905 return -EPERM;
c2e5f7c8
JR
12906 /* reject if this is not an unset and we don't allow that */
12907 if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid))
09a55596
AM
12908diff -NurpP --minimal linux-4.9.135/kernel/capability.c linux-4.9.135-vs2.3.9.8/kernel/capability.c
12909--- linux-4.9.135/kernel/capability.c 2018-10-20 10:39:21.000000000 +0000
12910+++ linux-4.9.135-vs2.3.9.8/kernel/capability.c 2018-10-20 04:58:14.000000000 +0000
bb20add7 12911@@ -17,6 +17,7 @@
4bf69007
AM
12912 #include <linux/syscalls.h>
12913 #include <linux/pid_namespace.h>
12914 #include <linux/user_namespace.h>
12915+#include <linux/vs_context.h>
12916 #include <asm/uaccess.h>
12917
12918 /*
cc23e853 12919@@ -107,6 +108,7 @@ static int cap_validate_magic(cap_user_h
4bf69007
AM
12920 return 0;
12921 }
12922
2380c486 12923+
4bf69007
AM
12924 /*
12925 * The only thing that can change the capabilities of the current
12926 * process is the current process. As such, we can't be in this code
cc23e853 12927@@ -344,6 +346,8 @@ bool has_ns_capability_noaudit(struct ta
4bf69007
AM
12928 return (ret == 0);
12929 }
12930
12931+#include <linux/vserver/base.h>
d337f35e 12932+
4bf69007
AM
12933 /**
12934 * has_capability_noaudit - Does a task have a capability (unaudited) in the
12935 * initial user ns
09a55596
AM
12936diff -NurpP --minimal linux-4.9.135/kernel/compat.c linux-4.9.135-vs2.3.9.8/kernel/compat.c
12937--- linux-4.9.135/kernel/compat.c 2016-12-11 19:17:54.000000000 +0000
12938+++ linux-4.9.135-vs2.3.9.8/kernel/compat.c 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
12939@@ -27,6 +27,7 @@
12940 #include <linux/times.h>
12941 #include <linux/ptrace.h>
12942 #include <linux/gfp.h>
12943+#include <linux/vs_time.h>
12944
12945 #include <asm/uaccess.h>
12946
cc23e853 12947@@ -1059,7 +1060,7 @@ COMPAT_SYSCALL_DEFINE1(stime, compat_tim
4bf69007
AM
12948 if (err)
12949 return err;
12950
12951- do_settimeofday(&tv);
12952+ vx_settimeofday(&tv);
12953 return 0;
12954 }
12955
09a55596
AM
12956diff -NurpP --minimal linux-4.9.135/kernel/cred.c linux-4.9.135-vs2.3.9.8/kernel/cred.c
12957--- linux-4.9.135/kernel/cred.c 2016-12-11 19:17:54.000000000 +0000
12958+++ linux-4.9.135-vs2.3.9.8/kernel/cred.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 12959@@ -64,31 +64,6 @@ struct cred init_cred = {
b00e13aa 12960 .group_info = &init_groups,
4bf69007
AM
12961 };
12962
12963-static inline void set_cred_subscribers(struct cred *cred, int n)
12964-{
12965-#ifdef CONFIG_DEBUG_CREDENTIALS
12966- atomic_set(&cred->subscribers, n);
12967-#endif
12968-}
12969-
12970-static inline int read_cred_subscribers(const struct cred *cred)
12971-{
12972-#ifdef CONFIG_DEBUG_CREDENTIALS
12973- return atomic_read(&cred->subscribers);
12974-#else
12975- return 0;
12976-#endif
12977-}
12978-
12979-static inline void alter_cred_subscribers(const struct cred *_cred, int n)
12980-{
12981-#ifdef CONFIG_DEBUG_CREDENTIALS
12982- struct cred *cred = (struct cred *) _cred;
12983-
12984- atomic_add(n, &cred->subscribers);
12985-#endif
12986-}
12987-
12988 /*
b00e13aa 12989 * The RCU callback to actually dispose of a set of credentials
4bf69007 12990 */
cc23e853 12991@@ -240,21 +215,16 @@ error:
4bf69007
AM
12992 *
12993 * Call commit_creds() or abort_creds() to clean up.
12994 */
12995-struct cred *prepare_creds(void)
12996+struct cred *__prepare_creds(const struct cred *old)
12997 {
12998- struct task_struct *task = current;
12999- const struct cred *old;
13000 struct cred *new;
13001
13002- validate_process_creds();
13003-
13004 new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
13005 if (!new)
13006 return NULL;
13007
13008 kdebug("prepare_creds() alloc %p", new);
13009
13010- old = task->cred;
13011 memcpy(new, old, sizeof(struct cred));
13012
13013 atomic_set(&new->usage, 1);
cc23e853 13014@@ -283,6 +253,13 @@ error:
4bf69007
AM
13015 abort_creds(new);
13016 return NULL;
13017 }
d337f35e 13018+
4bf69007 13019+struct cred *prepare_creds(void)
2380c486 13020+{
4bf69007 13021+ validate_process_creds();
d337f35e 13022+
4bf69007 13023+ return __prepare_creds(current->cred);
2380c486 13024+}
4bf69007
AM
13025 EXPORT_SYMBOL(prepare_creds);
13026
13027 /*
09a55596
AM
13028diff -NurpP --minimal linux-4.9.135/kernel/exit.c linux-4.9.135-vs2.3.9.8/kernel/exit.c
13029--- linux-4.9.135/kernel/exit.c 2018-10-20 10:39:21.000000000 +0000
13030+++ linux-4.9.135-vs2.3.9.8/kernel/exit.c 2018-10-20 04:58:14.000000000 +0000
4bf69007
AM
13031@@ -48,6 +48,10 @@
13032 #include <linux/fs_struct.h>
13033 #include <linux/init_task.h>
13034 #include <linux/perf_event.h>
13035+#include <linux/vs_limit.h>
13036+#include <linux/vs_context.h>
13037+#include <linux/vs_network.h>
13038+#include <linux/vs_pid.h>
13039 #include <trace/events/sched.h>
13040 #include <linux/hw_breakpoint.h>
13041 #include <linux/oom.h>
cc23e853 13042@@ -531,14 +535,24 @@ static struct task_struct *find_child_re
4bf69007 13043 struct pid_namespace *pid_ns = task_active_pid_ns(father);
cc23e853 13044 struct task_struct *reaper = pid_ns->child_reaper;
bc641624 13045 struct task_struct *p, *n;
4bf69007 13046+ struct vx_info *vxi = task_get_vx_info(father);
d337f35e 13047+
4bf69007
AM
13048+ if (vxi) {
13049+ BUG_ON(!vxi->vx_reaper);
13050+ if (vxi->vx_reaper != init_pid_ns.child_reaper &&
cc23e853 13051+ vxi->vx_reaper != father) {
4bf69007 13052+ reaper = vxi->vx_reaper;
cc23e853
AM
13053+ goto out_put;
13054+ }
13055+ }
4bf69007 13056
cc23e853
AM
13057 if (likely(reaper != father))
13058- return reaper;
13059+ goto out_put;
13060
13061 reaper = find_alive_thread(father);
13062 if (reaper) {
13063 pid_ns->child_reaper = reaper;
13064- return reaper;
13065+ goto out_put;
4bf69007
AM
13066 }
13067
cc23e853
AM
13068 write_unlock_irq(&tasklist_lock);
13069@@ -549,7 +563,10 @@ static struct task_struct *find_child_re
13070 zap_pid_ns_processes(pid_ns);
13071 write_lock_irq(&tasklist_lock);
13072
13073- return father;
13074+ reaper = father;
4bf69007
AM
13075+out_put:
13076+ put_vx_info(vxi);
13077+ return reaper;
13078 }
13079
13080 /*
cc23e853
AM
13081@@ -637,9 +654,13 @@ static void forget_original_parent(struc
13082 return;
bb20add7 13083
cc23e853
AM
13084 reaper = find_new_reaper(father, reaper);
13085- list_for_each_entry(p, &father->children, sibling) {
13086+ for (p = list_first_entry(&father->children, struct task_struct, sibling);
13087+ &p->sibling != &father->children; ) {
13088+ struct task_struct *next, *this_reaper = reaper;
13089+ if (p == reaper)
13090+ this_reaper = task_active_pid_ns(reaper)->child_reaper;
13091 for_each_thread(p, t) {
4bf69007 13092- t->real_parent = reaper;
cc23e853
AM
13093+ t->real_parent = this_reaper;
13094 BUG_ON((!t->ptrace) != (t->parent == father));
13095 if (likely(!t->ptrace))
13096 t->parent = t->real_parent;
13097@@ -651,10 +672,13 @@ static void forget_original_parent(struc
13098 * If this is a threaded reparent there is no need to
13099 * notify anyone anything has happened.
13100 */
13101- if (!same_thread_group(reaper, father))
13102+ if (!same_thread_group(this_reaper, father))
13103 reparent_leader(father, p, dead);
13104+ next = list_next_entry(p, sibling);
13105+ list_add(&p->sibling, &this_reaper->children);
13106+ p = next;
13107 }
13108- list_splice_tail_init(&father->children, &reaper->children);
13109+ INIT_LIST_HEAD(&father->children);
13110 }
13111
13112 /*
13113@@ -844,6 +868,9 @@ void __noreturn do_exit(long code)
4bf69007 13114 */
c2e5f7c8 13115 flush_ptrace_hw_breakpoint(tsk);
4bf69007
AM
13116
13117+ /* needs to stay before exit_notify() */
13118+ exit_vx_info_early(tsk, code);
d337f35e 13119+
cc23e853 13120 TASKS_RCU(preempt_disable());
bb20add7 13121 TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
cc23e853 13122 TASKS_RCU(preempt_enable());
5ba7a31c 13123@@ -876,6 +903,10 @@ void __noreturn do_exit(long code)
4bf69007 13124
5ba7a31c
AM
13125 validate_creds_for_do_exit(tsk);
13126
13127+ /* needs to stay after exit_notify() and before preempt_disable() */
4bf69007
AM
13128+ exit_vx_info(tsk, code);
13129+ exit_nx_info(tsk);
d337f35e 13130+
5ba7a31c
AM
13131 check_stack_usage();
13132 preempt_disable();
13133 if (tsk->nr_dirtied)
09a55596
AM
13134diff -NurpP --minimal linux-4.9.135/kernel/fork.c linux-4.9.135-vs2.3.9.8/kernel/fork.c
13135--- linux-4.9.135/kernel/fork.c 2018-10-20 10:39:21.000000000 +0000
13136+++ linux-4.9.135-vs2.3.9.8/kernel/fork.c 2018-10-20 05:55:43.000000000 +0000
cc23e853 13137@@ -77,6 +77,9 @@
265de2f7 13138 #include <linux/compiler.h>
cc23e853
AM
13139 #include <linux/sysctl.h>
13140 #include <linux/kcov.h>
4bf69007
AM
13141+#include <linux/vs_context.h>
13142+#include <linux/vs_network.h>
13143+#include <linux/vs_limit.h>
13144
13145 #include <asm/pgtable.h>
13146 #include <asm/pgalloc.h>
09a55596 13147@@ -356,6 +359,8 @@ void free_task(struct task_struct *tsk)
cc23e853
AM
13148 WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
13149 #endif
4bf69007
AM
13150 rt_mutex_debug_task_free(tsk);
13151+ clr_vx_info(&tsk->vx_info);
13152+ clr_nx_info(&tsk->nx_info);
13153 ftrace_graph_exit_task(tsk);
13154 put_seccomp_filter(tsk);
13155 arch_release_task_struct(tsk);
09a55596 13156@@ -1480,6 +1485,8 @@ static __latent_entropy struct task_stru
8d50a2ea 13157 {
4bf69007
AM
13158 int retval;
13159 struct task_struct *p;
4bf69007
AM
13160+ struct vx_info *vxi;
13161+ struct nx_info *nxi;
13162
13163 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
13164 return ERR_PTR(-EINVAL);
09a55596 13165@@ -1552,7 +1559,12 @@ static __latent_entropy struct task_stru
4bf69007
AM
13166 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
13167 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
13168 #endif
13169+ init_vx_info(&p->vx_info, current_vx_info());
13170+ init_nx_info(&p->nx_info, current_nx_info());
13171+
13172 retval = -EAGAIN;
13173+ if (!vx_nproc_avail(1))
13174+ goto bad_fork_free;
13175 if (atomic_read(&p->real_cred->user->processes) >=
13176 task_rlimit(p, RLIMIT_NPROC)) {
c2e5f7c8 13177 if (p->real_cred->user != INIT_USER &&
09a55596 13178@@ -1844,6 +1856,18 @@ static __latent_entropy struct task_stru
4bf69007
AM
13179 total_forks++;
13180 spin_unlock(&current->sighand->siglock);
bb20add7 13181 syscall_tracepoint_update(p);
4bf69007
AM
13182+
13183+ /* p is copy of current */
13184+ vxi = p->vx_info;
13185+ if (vxi) {
13186+ claim_vx_info(vxi, p);
13187+ atomic_inc(&vxi->cvirt.nr_threads);
13188+ atomic_inc(&vxi->cvirt.total_forks);
13189+ vx_nproc_inc(p);
2380c486 13190+ }
4bf69007
AM
13191+ nxi = p->nx_info;
13192+ if (nxi)
13193+ claim_nx_info(nxi, p);
13194 write_unlock_irq(&tasklist_lock);
bb20add7 13195
4bf69007 13196 proc_fork_connector(p);
09a55596
AM
13197diff -NurpP --minimal linux-4.9.135/kernel/kthread.c linux-4.9.135-vs2.3.9.8/kernel/kthread.c
13198--- linux-4.9.135/kernel/kthread.c 2018-10-20 10:39:21.000000000 +0000
13199+++ linux-4.9.135-vs2.3.9.8/kernel/kthread.c 2018-10-20 05:55:43.000000000 +0000
cc23e853 13200@@ -19,6 +19,7 @@
4bf69007 13201 #include <linux/ptrace.h>
09be7631 13202 #include <linux/uaccess.h>
cc23e853 13203 #include <linux/cgroup.h>
4bf69007
AM
13204+#include <linux/vs_pid.h>
13205 #include <trace/events/sched.h>
13206
13207 static DEFINE_SPINLOCK(kthread_create_lock);
09a55596
AM
13208diff -NurpP --minimal linux-4.9.135/kernel/nsproxy.c linux-4.9.135-vs2.3.9.8/kernel/nsproxy.c
13209--- linux-4.9.135/kernel/nsproxy.c 2016-12-11 19:17:54.000000000 +0000
13210+++ linux-4.9.135-vs2.3.9.8/kernel/nsproxy.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 13211@@ -20,12 +20,15 @@
4bf69007
AM
13212 #include <linux/mnt_namespace.h>
13213 #include <linux/utsname.h>
13214 #include <linux/pid_namespace.h>
13215+#include <linux/vserver/global.h>
13216+#include <linux/vserver/debug.h>
13217 #include <net/net_namespace.h>
13218 #include <linux/ipc_namespace.h>
09be7631 13219 #include <linux/proc_ns.h>
4bf69007
AM
13220 #include <linux/file.h>
13221 #include <linux/syscalls.h>
cc23e853 13222 #include <linux/cgroup.h>
4bf69007
AM
13223+#include "../fs/mount.h"
13224
13225 static struct kmem_cache *nsproxy_cachep;
13226
cc23e853 13227@@ -50,8 +53,11 @@ static inline struct nsproxy *create_nsp
4bf69007
AM
13228 struct nsproxy *nsproxy;
13229
13230 nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
13231- if (nsproxy)
13232+ if (nsproxy) {
13233 atomic_set(&nsproxy->count, 1);
13234+ atomic_inc(&vs_global_nsproxy);
13235+ }
13236+ vxdprintk(VXD_CBIT(space, 2), "create_nsproxy = %p[1]", nsproxy);
13237 return nsproxy;
13238 }
13239
cc23e853 13240@@ -60,9 +66,12 @@ static inline struct nsproxy *create_nsp
4bf69007
AM
13241 * Return the newly created nsproxy. Do not attach this to the task,
13242 * leave it to the caller to do proper locking and attach it to task.
13243 */
13244-static struct nsproxy *create_new_namespaces(unsigned long flags,
b00e13aa
AM
13245- struct task_struct *tsk, struct user_namespace *user_ns,
13246- struct fs_struct *new_fs)
13247+static struct nsproxy *unshare_namespaces(
13248+ unsigned long flags,
13249+ struct nsproxy *orig,
13250+ struct fs_struct *new_fs,
13251+ struct user_namespace *new_user,
13252+ struct pid_namespace *new_pid)
4bf69007
AM
13253 {
13254 struct nsproxy *new_nsp;
13255 int err;
cc23e853 13256@@ -71,39 +80,37 @@ static struct nsproxy *create_new_namesp
4bf69007
AM
13257 if (!new_nsp)
13258 return ERR_PTR(-ENOMEM);
13259
b00e13aa
AM
13260- new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
13261+ new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_user, new_fs);
4bf69007
AM
13262 if (IS_ERR(new_nsp->mnt_ns)) {
13263 err = PTR_ERR(new_nsp->mnt_ns);
13264 goto out_ns;
13265 }
13266
b00e13aa
AM
13267- new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
13268+ new_nsp->uts_ns = copy_utsname(flags, new_user, orig->uts_ns);
4bf69007
AM
13269 if (IS_ERR(new_nsp->uts_ns)) {
13270 err = PTR_ERR(new_nsp->uts_ns);
13271 goto out_uts;
13272 }
13273
b00e13aa
AM
13274- new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
13275+ new_nsp->ipc_ns = copy_ipcs(flags, new_user, orig->ipc_ns);
4bf69007
AM
13276 if (IS_ERR(new_nsp->ipc_ns)) {
13277 err = PTR_ERR(new_nsp->ipc_ns);
13278 goto out_ipc;
13279 }
13280
c2e5f7c8
JR
13281- new_nsp->pid_ns_for_children =
13282- copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
13283+ new_nsp->pid_ns_for_children = copy_pid_ns(flags, new_user, new_pid);
13284 if (IS_ERR(new_nsp->pid_ns_for_children)) {
13285 err = PTR_ERR(new_nsp->pid_ns_for_children);
4bf69007
AM
13286 goto out_pid;
13287 }
13288
cc23e853
AM
13289- new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
13290- tsk->nsproxy->cgroup_ns);
13291+ new_nsp->cgroup_ns = copy_cgroup_ns(flags, new_user, orig->cgroup_ns);
13292 if (IS_ERR(new_nsp->cgroup_ns)) {
13293 err = PTR_ERR(new_nsp->cgroup_ns);
13294 goto out_cgroup;
13295 }
13296
b00e13aa
AM
13297- new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
13298+ new_nsp->net_ns = copy_net_ns(flags, new_user, orig->net_ns);
4bf69007
AM
13299 if (IS_ERR(new_nsp->net_ns)) {
13300 err = PTR_ERR(new_nsp->net_ns);
13301 goto out_net;
cc23e853 13302@@ -130,6 +137,43 @@ out_ns:
4bf69007
AM
13303 return ERR_PTR(err);
13304 }
13305
13306+static struct nsproxy *create_new_namespaces(unsigned long flags,
b00e13aa
AM
13307+ struct task_struct *tsk, struct user_namespace *user_ns,
13308+ struct fs_struct *new_fs)
13309+
4bf69007
AM
13310+{
13311+ return unshare_namespaces(flags, tsk->nsproxy,
b00e13aa 13312+ new_fs, user_ns, task_active_pid_ns(tsk));
2380c486 13313+}
d337f35e 13314+
4bf69007
AM
13315+/*
13316+ * copies the nsproxy, setting refcount to 1, and grabbing a
13317+ * reference to all contained namespaces.
13318+ */
13319+struct nsproxy *copy_nsproxy(struct nsproxy *orig)
2380c486 13320+{
4bf69007 13321+ struct nsproxy *ns = create_nsproxy();
d337f35e 13322+
4bf69007
AM
13323+ if (ns) {
13324+ memcpy(ns, orig, sizeof(struct nsproxy));
13325+ atomic_set(&ns->count, 1);
d337f35e 13326+
4bf69007
AM
13327+ if (ns->mnt_ns)
13328+ get_mnt_ns(ns->mnt_ns);
13329+ if (ns->uts_ns)
13330+ get_uts_ns(ns->uts_ns);
13331+ if (ns->ipc_ns)
13332+ get_ipc_ns(ns->ipc_ns);
c2e5f7c8
JR
13333+ if (ns->pid_ns_for_children)
13334+ get_pid_ns(ns->pid_ns_for_children);
4bf69007
AM
13335+ if (ns->net_ns)
13336+ get_net(ns->net_ns);
cc23e853
AM
13337+ if (ns->cgroup_ns)
13338+ get_cgroup_ns(ns->cgroup_ns);
4bf69007
AM
13339+ }
13340+ return ns;
13341+}
d337f35e 13342+
4bf69007
AM
13343 /*
13344 * called from clone. This now handles copy for nsproxy and all
13345 * namespaces therein.
cc23e853 13346@@ -138,7 +182,10 @@ int copy_namespaces(unsigned long flags,
4bf69007
AM
13347 {
13348 struct nsproxy *old_ns = tsk->nsproxy;
b00e13aa 13349 struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
4bf69007
AM
13350- struct nsproxy *new_ns;
13351+ struct nsproxy *new_ns = NULL;
c2e5f7c8 13352+
4bf69007
AM
13353+ vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08lx,%p[%p])",
13354+ flags, tsk, old_ns);
4bf69007 13355
c2e5f7c8 13356 if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
cc23e853
AM
13357 CLONE_NEWPID | CLONE_NEWNET |
13358@@ -147,7 +194,7 @@ int copy_namespaces(unsigned long flags,
4bf69007 13359 return 0;
4bf69007 13360 }
4bf69007 13361
c2e5f7c8
JR
13362- if (!ns_capable(user_ns, CAP_SYS_ADMIN))
13363+ if (!vx_ns_can_unshare(user_ns, CAP_SYS_ADMIN, flags))
13364 return -EPERM;
13365
13366 /*
cc23e853 13367@@ -166,6 +213,9 @@ int copy_namespaces(unsigned long flags,
c2e5f7c8
JR
13368 return PTR_ERR(new_ns);
13369
13370 tsk->nsproxy = new_ns;
4bf69007 13371+ vxdprintk(VXD_CBIT(space, 3),
c2e5f7c8
JR
13372+ "copy_namespaces(0x%08lx,%p[%p]) = [%p]",
13373+ flags, tsk, old_ns, new_ns);
13374 return 0;
4bf69007
AM
13375 }
13376
cc23e853 13377@@ -179,8 +229,10 @@ void free_nsproxy(struct nsproxy *ns)
4bf69007 13378 put_ipc_ns(ns->ipc_ns);
c2e5f7c8
JR
13379 if (ns->pid_ns_for_children)
13380 put_pid_ns(ns->pid_ns_for_children);
4bf69007
AM
13381+ if (ns->net_ns)
13382+ put_net(ns->net_ns);
cc23e853
AM
13383 put_cgroup_ns(ns->cgroup_ns);
13384- put_net(ns->net_ns);
4bf69007
AM
13385+ atomic_dec(&vs_global_nsproxy);
13386 kmem_cache_free(nsproxy_cachep, ns);
13387 }
13388
cc23e853 13389@@ -194,12 +246,16 @@ int unshare_nsproxy_namespaces(unsigned
b00e13aa 13390 struct user_namespace *user_ns;
4bf69007
AM
13391 int err = 0;
13392
13393+ vxdprintk(VXD_CBIT(space, 4),
13394+ "unshare_nsproxy_namespaces(0x%08lx,[%p])",
13395+ unshare_flags, current->nsproxy);
d337f35e 13396+
4bf69007 13397 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
cc23e853 13398 CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP)))
4bf69007
AM
13399 return 0;
13400
b00e13aa
AM
13401 user_ns = new_cred ? new_cred->user_ns : current_user_ns();
13402- if (!ns_capable(user_ns, CAP_SYS_ADMIN))
13403+ if (!vx_ns_can_unshare(user_ns, CAP_SYS_ADMIN, unshare_flags))
4bf69007
AM
13404 return -EPERM;
13405
b00e13aa 13406 *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
09a55596
AM
13407diff -NurpP --minimal linux-4.9.135/kernel/pid.c linux-4.9.135-vs2.3.9.8/kernel/pid.c
13408--- linux-4.9.135/kernel/pid.c 2018-10-20 10:39:21.000000000 +0000
13409+++ linux-4.9.135-vs2.3.9.8/kernel/pid.c 2018-10-20 04:58:14.000000000 +0000
09be7631 13410@@ -38,6 +38,7 @@
4bf69007 13411 #include <linux/syscalls.h>
09be7631 13412 #include <linux/proc_ns.h>
b00e13aa 13413 #include <linux/proc_fs.h>
4bf69007
AM
13414+#include <linux/vs_pid.h>
13415
13416 #define pid_hashfn(nr, ns) \
13417 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
09a55596 13418@@ -381,7 +382,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
4bf69007
AM
13419
13420 struct pid *find_vpid(int nr)
13421 {
b00e13aa
AM
13422- return find_pid_ns(nr, task_active_pid_ns(current));
13423+ return find_pid_ns(vx_rmap_pid(nr), task_active_pid_ns(current));
4bf69007
AM
13424 }
13425 EXPORT_SYMBOL_GPL(find_vpid);
13426
09a55596 13427@@ -437,6 +438,9 @@ void transfer_pid(struct task_struct *ol
4bf69007
AM
13428 struct task_struct *pid_task(struct pid *pid, enum pid_type type)
13429 {
13430 struct task_struct *result = NULL;
d337f35e 13431+
cc23e853 13432+ if (type == __PIDTYPE_REALPID)
4bf69007
AM
13433+ type = PIDTYPE_PID;
13434 if (pid) {
13435 struct hlist_node *first;
13436 first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
09a55596 13437@@ -455,7 +459,7 @@ struct task_struct *find_task_by_pid_ns(
cc23e853
AM
13438 {
13439 RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
13440 "find_task_by_pid_ns() needs rcu_read_lock() protection");
4bf69007
AM
13441- return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
13442+ return pid_task(find_pid_ns(vx_rmap_pid(nr), ns), PIDTYPE_PID);
13443 }
13444
13445 struct task_struct *find_task_by_vpid(pid_t vnr)
09a55596 13446@@ -499,7 +503,7 @@ struct pid *find_get_pid(pid_t nr)
4bf69007
AM
13447 }
13448 EXPORT_SYMBOL_GPL(find_get_pid);
13449
13450-pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
13451+pid_t pid_unmapped_nr_ns(struct pid *pid, struct pid_namespace *ns)
13452 {
13453 struct upid *upid;
13454 pid_t nr = 0;
09a55596 13455@@ -513,6 +517,11 @@ pid_t pid_nr_ns(struct pid *pid, struct
4bf69007
AM
13456 }
13457 EXPORT_SYMBOL_GPL(pid_nr_ns);
13458
13459+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
2380c486 13460+{
4bf69007
AM
13461+ return vx_map_pid(pid_unmapped_nr_ns(pid, ns));
13462+}
d337f35e 13463+
4bf69007
AM
13464 pid_t pid_vnr(struct pid *pid)
13465 {
b00e13aa 13466 return pid_nr_ns(pid, task_active_pid_ns(current));
09a55596
AM
13467diff -NurpP --minimal linux-4.9.135/kernel/pid_namespace.c linux-4.9.135-vs2.3.9.8/kernel/pid_namespace.c
13468--- linux-4.9.135/kernel/pid_namespace.c 2018-10-20 10:39:21.000000000 +0000
13469+++ linux-4.9.135-vs2.3.9.8/kernel/pid_namespace.c 2018-10-20 04:58:14.000000000 +0000
b00e13aa 13470@@ -18,6 +18,7 @@
09be7631 13471 #include <linux/proc_ns.h>
4bf69007
AM
13472 #include <linux/reboot.h>
13473 #include <linux/export.h>
13474+#include <linux/vserver/global.h>
13475
09be7631
JR
13476 struct pid_cache {
13477 int nr_ids;
cc23e853
AM
13478@@ -124,6 +125,7 @@ static struct pid_namespace *create_pid_
13479 ns->ns.ops = &pidns_operations;
4bf69007
AM
13480
13481 kref_init(&ns->kref);
13482+ atomic_inc(&vs_global_pid_ns);
13483 ns->level = level;
13484 ns->parent = get_pid_ns(parent_pid_ns);
b00e13aa 13485 ns->user_ns = get_user_ns(user_ns);
cc23e853 13486@@ -142,6 +144,7 @@ static struct pid_namespace *create_pid_
c2e5f7c8
JR
13487 out_free_map:
13488 kfree(ns->pidmap[0].page);
13489 out_free:
4bf69007
AM
13490+ atomic_dec(&vs_global_pid_ns);
13491 kmem_cache_free(pid_ns_cachep, ns);
cc23e853
AM
13492 out_dec:
13493 dec_pid_namespaces(ucounts);
09a55596
AM
13494diff -NurpP --minimal linux-4.9.135/kernel/printk/printk.c linux-4.9.135-vs2.3.9.8/kernel/printk/printk.c
13495--- linux-4.9.135/kernel/printk/printk.c 2018-10-20 10:39:21.000000000 +0000
13496+++ linux-4.9.135-vs2.3.9.8/kernel/printk/printk.c 2018-10-20 04:58:14.000000000 +0000
cc23e853 13497@@ -45,6 +45,7 @@
09be7631 13498 #include <linux/utsname.h>
bb20add7 13499 #include <linux/ctype.h>
cc23e853 13500 #include <linux/uio.h>
4bf69007
AM
13501+#include <linux/vs_cvirt.h>
13502
13503 #include <asm/uaccess.h>
cc23e853
AM
13504 #include <asm/sections.h>
13505@@ -612,7 +613,7 @@ int check_syslog_permissions(int type, i
13506 goto ok;
4bf69007
AM
13507
13508 if (syslog_action_restricted(type)) {
13509- if (capable(CAP_SYSLOG))
13510+ if (vx_capable(CAP_SYSLOG, VXC_SYSLOG))
cc23e853 13511 goto ok;
092a4f51
JR
13512 /*
13513 * For historical reasons, accept CAP_SYS_ADMIN too, with
cc23e853 13514@@ -1432,12 +1433,9 @@ int do_syslog(int type, char __user *buf
4bf69007 13515 if (error)
cc23e853 13516 goto out;
4bf69007
AM
13517
13518- switch (type) {
13519- case SYSLOG_ACTION_CLOSE: /* Close log */
13520- break;
13521- case SYSLOG_ACTION_OPEN: /* Open log */
13522- break;
13523- case SYSLOG_ACTION_READ: /* Read from log */
13524+ if ((type == SYSLOG_ACTION_READ) ||
13525+ (type == SYSLOG_ACTION_READ_ALL) ||
13526+ (type == SYSLOG_ACTION_READ_CLEAR)) {
13527 error = -EINVAL;
13528 if (!buf || len < 0)
13529 goto out;
cc23e853 13530@@ -1448,6 +1446,16 @@ int do_syslog(int type, char __user *buf
4bf69007
AM
13531 error = -EFAULT;
13532 goto out;
13533 }
13534+ }
13535+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
13536+ return vx_do_syslog(type, buf, len);
d337f35e 13537+
4bf69007
AM
13538+ switch (type) {
13539+ case SYSLOG_ACTION_CLOSE: /* Close log */
13540+ break;
13541+ case SYSLOG_ACTION_OPEN: /* Open log */
13542+ break;
13543+ case SYSLOG_ACTION_READ: /* Read from log */
13544 error = wait_event_interruptible(log_wait,
13545 syslog_seq != log_next_seq);
13546 if (error)
cc23e853 13547@@ -1460,16 +1468,6 @@ int do_syslog(int type, char __user *buf
4bf69007
AM
13548 /* FALL THRU */
13549 /* Read last kernel messages */
13550 case SYSLOG_ACTION_READ_ALL:
13551- error = -EINVAL;
13552- if (!buf || len < 0)
13553- goto out;
13554- error = 0;
13555- if (!len)
13556- goto out;
13557- if (!access_ok(VERIFY_WRITE, buf, len)) {
13558- error = -EFAULT;
13559- goto out;
13560- }
13561 error = syslog_print_all(buf, len, clear);
13562 break;
13563 /* Clear ring buffer */
09a55596
AM
13564diff -NurpP --minimal linux-4.9.135/kernel/ptrace.c linux-4.9.135-vs2.3.9.8/kernel/ptrace.c
13565--- linux-4.9.135/kernel/ptrace.c 2018-10-20 10:39:21.000000000 +0000
13566+++ linux-4.9.135-vs2.3.9.8/kernel/ptrace.c 2018-10-20 04:58:15.000000000 +0000
09be7631 13567@@ -23,6 +23,7 @@
4bf69007
AM
13568 #include <linux/syscalls.h>
13569 #include <linux/uaccess.h>
13570 #include <linux/regset.h>
13571+#include <linux/vs_context.h>
13572 #include <linux/hw_breakpoint.h>
13573 #include <linux/cn_proc.h>
09be7631 13574 #include <linux/compat.h>
325b8c0a 13575@@ -331,6 +331,12 @@ ok:
b00e13aa 13576
325b8c0a
AM
13577 if (mode & PTRACE_MODE_SCHED)
13578 return 0;
13579+
4bf69007
AM
13580+ if (!vx_check(task->xid, VS_ADMIN_P|VS_WATCH_P|VS_IDENT))
13581+ return -EPERM;
13582+ if (!vx_check(task->xid, VS_IDENT) &&
325b8c0a 13583+ !task_vx_flags(task, VXF_STATE_ADMIN, 0))
4bf69007 13584+ return -EACCES;
4bf69007
AM
13585 return security_ptrace_access_check(task, mode);
13586 }
b00e13aa 13587
09a55596
AM
13588diff -NurpP --minimal linux-4.9.135/kernel/reboot.c linux-4.9.135-vs2.3.9.8/kernel/reboot.c
13589--- linux-4.9.135/kernel/reboot.c 2016-12-11 19:17:54.000000000 +0000
13590+++ linux-4.9.135-vs2.3.9.8/kernel/reboot.c 2018-10-20 04:58:15.000000000 +0000
c2e5f7c8
JR
13591@@ -16,6 +16,7 @@
13592 #include <linux/syscalls.h>
13593 #include <linux/syscore_ops.h>
13594 #include <linux/uaccess.h>
13595+#include <linux/vs_pid.h>
13596
13597 /*
13598 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
bb20add7 13599@@ -269,6 +270,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off);
c2e5f7c8
JR
13600
13601 static DEFINE_MUTEX(reboot_mutex);
13602
13603+long vs_reboot(unsigned int, void __user *);
13604+
13605 /*
13606 * Reboot system call: for obvious reasons only root may call it,
13607 * and even root needs to set up some magic numbers in the registers
bb20add7 13608@@ -311,6 +314,9 @@ SYSCALL_DEFINE4(reboot, int, magic1, int
c2e5f7c8
JR
13609 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
13610 cmd = LINUX_REBOOT_CMD_HALT;
13611
13612+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
13613+ return vs_reboot(cmd, arg);
13614+
13615 mutex_lock(&reboot_mutex);
13616 switch (cmd) {
13617 case LINUX_REBOOT_CMD_RESTART:
09a55596
AM
13618diff -NurpP --minimal linux-4.9.135/kernel/sched/core.c linux-4.9.135-vs2.3.9.8/kernel/sched/core.c
13619--- linux-4.9.135/kernel/sched/core.c 2018-10-20 10:39:21.000000000 +0000
13620+++ linux-4.9.135-vs2.3.9.8/kernel/sched/core.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 13621@@ -75,6 +75,8 @@
265de2f7 13622 #include <linux/compiler.h>
cc23e853
AM
13623 #include <linux/frame.h>
13624 #include <linux/prefetch.h>
4bf69007
AM
13625+#include <linux/vs_sched.h>
13626+#include <linux/vs_cvirt.h>
13627
13628 #include <asm/switch_to.h>
13629 #include <asm/tlb.h>
09a55596 13630@@ -3431,6 +3433,7 @@ void __noreturn do_task_dead(void)
cc23e853
AM
13631 __set_current_state(TASK_DEAD);
13632 current->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
13633 __schedule(false);
13634+ printk("bad task: %p [%lx]\n", current, current->state);
13635 BUG();
13636 /* Avoid "noreturn function does return". */
13637 for (;;)
09a55596 13638@@ -3824,7 +3827,7 @@ SYSCALL_DEFINE1(nice, int, increment)
4bf69007 13639
bb20add7 13640 nice = clamp_val(nice, MIN_NICE, MAX_NICE);
4bf69007
AM
13641 if (increment < 0 && !can_nice(current, nice))
13642- return -EPERM;
13643+ return vx_flags(VXF_IGNEG_NICE, 0) ? 0 : -EPERM;
13644
13645 retval = security_task_setnice(current, nice);
13646 if (retval)
09a55596
AM
13647diff -NurpP --minimal linux-4.9.135/kernel/sched/cputime.c linux-4.9.135-vs2.3.9.8/kernel/sched/cputime.c
13648--- linux-4.9.135/kernel/sched/cputime.c 2018-10-20 10:39:21.000000000 +0000
13649+++ linux-4.9.135-vs2.3.9.8/kernel/sched/cputime.c 2018-10-20 11:46:17.000000000 +0000
b00e13aa 13650@@ -4,6 +4,7 @@
4bf69007
AM
13651 #include <linux/kernel_stat.h>
13652 #include <linux/static_key.h>
b00e13aa 13653 #include <linux/context_tracking.h>
4bf69007
AM
13654+#include <linux/vs_sched.h>
13655 #include "sched.h"
cc23e853
AM
13656 #ifdef CONFIG_PARAVIRT
13657 #include <asm/paravirt.h>
09a55596 13658@@ -125,14 +126,17 @@ static inline void task_group_account_fi
4bf69007
AM
13659 void account_user_time(struct task_struct *p, cputime_t cputime,
13660 cputime_t cputime_scaled)
13661 {
13662+ struct vx_info *vxi = p->vx_info; /* p is _always_ current */
ca5d134c 13663+ int nice = (task_nice(p) > 0);
4bf69007
AM
13664 int index;
13665
13666 /* Add user time to process. */
13667 p->utime += cputime;
13668 p->utimescaled += cputime_scaled;
13669+ vx_account_user(vxi, cputime, nice);
13670 account_group_user_time(p, cputime);
13671
ca5d134c 13672- index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
4bf69007
AM
13673+ index = (nice) ? CPUTIME_NICE : CPUTIME_USER;
13674
13675 /* Add user time to cpustat. */
09a55596
AM
13676 task_group_account_field(p, index, cputime_to_nsecs(cputime));
13677@@ -179,9 +183,12 @@ static inline
ca5d134c
JR
13678 void __account_system_time(struct task_struct *p, cputime_t cputime,
13679 cputime_t cputime_scaled, int index)
13680 {
13681+ struct vx_info *vxi = p->vx_info; /* p is _always_ current */
13682+
13683 /* Add system time to process. */
13684 p->stime += cputime;
13685 p->stimescaled += cputime_scaled;
13686+ vx_account_system(vxi, cputime, 0 /* do we have idle time? */);
13687 account_group_system_time(p, cputime);
13688
13689 /* Add system time to cpustat. */
09a55596
AM
13690diff -NurpP --minimal linux-4.9.135/kernel/sched/fair.c linux-4.9.135-vs2.3.9.8/kernel/sched/fair.c
13691--- linux-4.9.135/kernel/sched/fair.c 2018-10-20 10:39:21.000000000 +0000
13692+++ linux-4.9.135-vs2.3.9.8/kernel/sched/fair.c 2018-10-20 05:55:43.000000000 +0000
bb20add7 13693@@ -30,6 +30,7 @@
b00e13aa
AM
13694 #include <linux/mempolicy.h>
13695 #include <linux/migrate.h>
13696 #include <linux/task_work.h>
4bf69007
AM
13697+#include <linux/vs_cvirt.h>
13698
13699 #include <trace/events/sched.h>
13700
09a55596 13701@@ -3411,6 +3412,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, st
4bf69007
AM
13702 __enqueue_entity(cfs_rq, se);
13703 se->on_rq = 1;
13704
13705+ if (entity_is_task(se))
13706+ vx_activate_task(task_of(se));
13707 if (cfs_rq->nr_running == 1) {
13708 list_add_leaf_cfs_rq(cfs_rq);
13709 check_enqueue_throttle(cfs_rq);
09a55596 13710@@ -3480,6 +3483,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
4bf69007
AM
13711 if (se != cfs_rq->curr)
13712 __dequeue_entity(cfs_rq, se);
13713 se->on_rq = 0;
13714+ if (entity_is_task(se))
13715+ vx_deactivate_task(task_of(se));
4bf69007
AM
13716 account_entity_dequeue(cfs_rq, se);
13717
b00e13aa 13718 /*
09a55596
AM
13719diff -NurpP --minimal linux-4.9.135/kernel/sched/loadavg.c linux-4.9.135-vs2.3.9.8/kernel/sched/loadavg.c
13720--- linux-4.9.135/kernel/sched/loadavg.c 2018-10-20 10:39:21.000000000 +0000
13721+++ linux-4.9.135-vs2.3.9.8/kernel/sched/loadavg.c 2018-10-20 04:58:15.000000000 +0000
5ba7a31c
AM
13722@@ -73,9 +73,16 @@ EXPORT_SYMBOL(avenrun); /* should be rem
13723 */
13724 void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
13725 {
13726- loads[0] = (avenrun[0] + offset) << shift;
13727- loads[1] = (avenrun[1] + offset) << shift;
13728- loads[2] = (avenrun[2] + offset) << shift;
13729+ if (vx_flags(VXF_VIRT_LOAD, 0)) {
13730+ struct vx_info *vxi = current_vx_info();
13731+ loads[0] = (vxi->cvirt.load[0] + offset) << shift;
13732+ loads[1] = (vxi->cvirt.load[1] + offset) << shift;
13733+ loads[2] = (vxi->cvirt.load[2] + offset) << shift;
13734+ } else {
13735+ loads[0] = (avenrun[0] + offset) << shift;
13736+ loads[1] = (avenrun[1] + offset) << shift;
13737+ loads[2] = (avenrun[2] + offset) << shift;
13738+ }
13739 }
13740
13741 long calc_load_fold_active(struct rq *this_rq, long adjust)
09a55596
AM
13742diff -NurpP --minimal linux-4.9.135/kernel/signal.c linux-4.9.135-vs2.3.9.8/kernel/signal.c
13743--- linux-4.9.135/kernel/signal.c 2018-10-20 10:39:21.000000000 +0000
13744+++ linux-4.9.135-vs2.3.9.8/kernel/signal.c 2018-10-20 04:58:15.000000000 +0000
bb20add7 13745@@ -34,6 +34,8 @@
b00e13aa 13746 #include <linux/compat.h>
09be7631 13747 #include <linux/cn_proc.h>
265de2f7 13748 #include <linux/compiler.h>
4bf69007
AM
13749+#include <linux/vs_context.h>
13750+#include <linux/vs_pid.h>
265de2f7 13751
4bf69007
AM
13752 #define CREATE_TRACE_POINTS
13753 #include <trace/events/signal.h>
cc23e853 13754@@ -726,9 +728,18 @@ static int check_kill_permission(int sig
4bf69007
AM
13755 struct pid *sid;
13756 int error;
13757
13758+ vxdprintk(VXD_CBIT(misc, 7),
13759+ "check_kill_permission(%d,%p,%p[#%u,%u])",
13760+ sig, info, t, vx_task_xid(t), t->pid);
d337f35e 13761+
4bf69007
AM
13762 if (!valid_signal(sig))
13763 return -EINVAL;
13764
13765+/* FIXME: needed? if so, why?
13766+ if ((info != SEND_SIG_NOINFO) &&
13767+ (is_si_special(info) || !si_fromuser(info)))
13768+ goto skip; */
d337f35e 13769+
4bf69007
AM
13770 if (!si_fromuser(info))
13771 return 0;
13772
cc23e853 13773@@ -752,6 +763,20 @@ static int check_kill_permission(int sig
4bf69007
AM
13774 }
13775 }
13776
13777+ error = -EPERM;
13778+ if (t->pid == 1 && current->xid)
13779+ return error;
d337f35e 13780+
4bf69007
AM
13781+ error = -ESRCH;
13782+ /* FIXME: we shouldn't return ESRCH ever, to avoid
13783+ loops, maybe ENOENT or EACCES? */
13784+ if (!vx_check(vx_task_xid(t), VS_WATCH_P | VS_IDENT)) {
13785+ vxdprintk(current->xid || VXD_CBIT(misc, 7),
13786+ "signal %d[%p] xid mismatch %p[#%u,%u] xid=#%u",
13787+ sig, info, t, vx_task_xid(t), t->pid, current->xid);
13788+ return error;
2380c486 13789+ }
4bf69007
AM
13790+/* skip: */
13791 return security_task_kill(t, info, sig, 0);
13792 }
13793
cc23e853
AM
13794@@ -1303,8 +1328,14 @@ int kill_pid_info(int sig, struct siginf
13795 for (;;) {
13796 rcu_read_lock();
13797 p = pid_task(pid, PIDTYPE_PID);
13798- if (p)
13799- error = group_send_sig_info(sig, info, p);
13800+ if (p) {
13801+ if (vx_check(vx_task_xid(p), VS_IDENT))
13802+ error = group_send_sig_info(sig, info, p);
13803+ else {
13804+ rcu_read_unlock();
13805+ return -ESRCH;
13806+ }
13807+ }
13808 rcu_read_unlock();
13809 if (likely(!p || error != -ESRCH))
13810 return error;
13811@@ -1349,7 +1380,7 @@ int kill_pid_info_as_cred(int sig, struc
4bf69007
AM
13812
13813 rcu_read_lock();
13814 p = pid_task(pid, PIDTYPE_PID);
13815- if (!p) {
13816+ if (!p || !vx_check(vx_task_xid(p), VS_IDENT)) {
13817 ret = -ESRCH;
13818 goto out_unlock;
13819 }
09a55596 13820@@ -1405,8 +1436,10 @@ static int kill_something_info(int sig,
4bf69007
AM
13821 struct task_struct * p;
13822
13823 for_each_process(p) {
13824- if (task_pid_vnr(p) > 1 &&
13825- !same_thread_group(p, current)) {
13826+ if (vx_check(vx_task_xid(p), VS_ADMIN|VS_IDENT) &&
13827+ task_pid_vnr(p) > 1 &&
13828+ !same_thread_group(p, current) &&
13829+ !vx_current_initpid(p->pid)) {
13830 int err = group_send_sig_info(sig, info, p);
13831 ++count;
13832 if (err != -EPERM)
09a55596 13833@@ -2259,6 +2292,11 @@ relock:
4bf69007
AM
13834 !sig_kernel_only(signr))
13835 continue;
13836
13837+ /* virtual init is protected against user signals */
bb20add7 13838+ if ((ksig->info.si_code == SI_USER) &&
4bf69007
AM
13839+ vx_current_initpid(current->pid))
13840+ continue;
d337f35e 13841+
4bf69007
AM
13842 if (sig_kernel_stop(signr)) {
13843 /*
13844 * The default action is to stop all threads in
09a55596
AM
13845diff -NurpP --minimal linux-4.9.135/kernel/softirq.c linux-4.9.135-vs2.3.9.8/kernel/softirq.c
13846--- linux-4.9.135/kernel/softirq.c 2018-10-20 10:39:21.000000000 +0000
13847+++ linux-4.9.135-vs2.3.9.8/kernel/softirq.c 2018-10-20 05:55:43.000000000 +0000
bb20add7 13848@@ -26,6 +26,7 @@
4bf69007
AM
13849 #include <linux/smpboot.h>
13850 #include <linux/tick.h>
265de2f7 13851 #include <linux/irq.h>
4bf69007
AM
13852+#include <linux/vs_context.h>
13853
13854 #define CREATE_TRACE_POINTS
13855 #include <trace/events/irq.h>
09a55596
AM
13856diff -NurpP --minimal linux-4.9.135/kernel/sys.c linux-4.9.135-vs2.3.9.8/kernel/sys.c
13857--- linux-4.9.135/kernel/sys.c 2018-10-20 10:39:21.000000000 +0000
13858+++ linux-4.9.135-vs2.3.9.8/kernel/sys.c 2018-10-20 05:55:43.000000000 +0000
13859@@ -56,6 +56,7 @@
13860 #include <linux/nospec.h>
4bf69007
AM
13861
13862 #include <linux/kmsg_dump.h>
b00e13aa 13863+#include <linux/vs_pid.h>
4bf69007 13864 /* Move somewhere else to avoid recompiling? */
b00e13aa
AM
13865 #include <generated/utsrelease.h>
13866
09a55596 13867@@ -159,7 +160,10 @@ static int set_one_prio(struct task_stru
4bf69007
AM
13868 goto out;
13869 }
13870 if (niceval < task_nice(p) && !can_nice(p, niceval)) {
13871- error = -EACCES;
13872+ if (vx_flags(VXF_IGNEG_NICE, 0))
13873+ error = 0;
13874+ else
13875+ error = -EACCES;
13876 goto out;
13877 }
13878 no_nice = security_task_setnice(p, niceval);
09a55596 13879@@ -210,6 +214,8 @@ SYSCALL_DEFINE3(setpriority, int, which,
bb20add7
AM
13880 else
13881 pgrp = task_pgrp(current);
13882 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
13883+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13884+ continue;
13885 error = set_one_prio(p, niceval, error);
13886 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
13887 break;
09a55596 13888@@ -276,6 +282,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
bb20add7
AM
13889 else
13890 pgrp = task_pgrp(current);
13891 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
13892+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13893+ continue;
13894 niceval = nice_to_rlimit(task_nice(p));
13895 if (niceval > retval)
13896 retval = niceval;
09a55596 13897@@ -292,6 +300,8 @@ SYSCALL_DEFINE2(getpriority, int, which,
bb20add7
AM
13898 goto out_unlock; /* No processes for this user */
13899 }
13900 do_each_thread(g, p) {
13901+ if (!vx_check(p->xid, VS_ADMIN_P | VS_IDENT))
13902+ continue;
cc23e853 13903 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) {
bb20add7 13904 niceval = nice_to_rlimit(task_nice(p));
4bf69007 13905 if (niceval > retval)
09a55596 13906@@ -1211,7 +1221,8 @@ SYSCALL_DEFINE2(sethostname, char __user
4bf69007
AM
13907 int errno;
13908 char tmp[__NEW_UTS_LEN];
13909
13910- if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
13911+ if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
13912+ CAP_SYS_ADMIN, VXC_SET_UTSNAME))
13913 return -EPERM;
13914
13915 if (len < 0 || len > __NEW_UTS_LEN)
09a55596 13916@@ -1264,7 +1275,8 @@ SYSCALL_DEFINE2(setdomainname, char __us
4bf69007
AM
13917 int errno;
13918 char tmp[__NEW_UTS_LEN];
13919
13920- if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
13921+ if (!vx_ns_capable(current->nsproxy->uts_ns->user_ns,
13922+ CAP_SYS_ADMIN, VXC_SET_UTSNAME))
13923 return -EPERM;
13924 if (len < 0 || len > __NEW_UTS_LEN)
13925 return -EINVAL;
09a55596 13926@@ -1384,7 +1396,7 @@ int do_prlimit(struct task_struct *tsk,
4bf69007
AM
13927 /* Keep the capable check against init_user_ns until
13928 cgroups can contain all limits */
13929 if (new_rlim->rlim_max > rlim->rlim_max &&
13930- !capable(CAP_SYS_RESOURCE))
13931+ !vx_capable(CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
13932 retval = -EPERM;
13933 if (!retval)
13934 retval = security_task_setrlimit(tsk->group_leader,
09a55596 13935@@ -1437,7 +1449,8 @@ static int check_prlimit_permission(stru
4bf69007
AM
13936 gid_eq(cred->gid, tcred->sgid) &&
13937 gid_eq(cred->gid, tcred->gid))
13938 return 0;
13939- if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
13940+ if (vx_ns_capable(tcred->user_ns,
13941+ CAP_SYS_RESOURCE, VXC_SET_RLIMIT))
13942 return 0;
13943
13944 return -EPERM;
09a55596
AM
13945diff -NurpP --minimal linux-4.9.135/kernel/sysctl.c linux-4.9.135-vs2.3.9.8/kernel/sysctl.c
13946--- linux-4.9.135/kernel/sysctl.c 2018-10-20 10:39:21.000000000 +0000
13947+++ linux-4.9.135-vs2.3.9.8/kernel/sysctl.c 2018-10-20 05:55:43.000000000 +0000
cc23e853 13948@@ -87,6 +87,7 @@
4bf69007
AM
13949 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
13950 #include <linux/lockdep.h>
13951 #endif
13952+extern char vshelper_path[];
13953 #ifdef CONFIG_CHR_DEV_SG
13954 #include <scsi/sg.h>
13955 #endif
cc23e853 13956@@ -282,6 +283,13 @@ static int max_extfrag_threshold = 1000;
bb20add7
AM
13957
13958 static struct ctl_table kern_table[] = {
13959 {
4bf69007
AM
13960+ .procname = "vshelper",
13961+ .data = &vshelper_path,
13962+ .maxlen = 256,
13963+ .mode = 0644,
bb20add7 13964+ .proc_handler = proc_dostring,
4bf69007 13965+ },
bb20add7
AM
13966+ {
13967 .procname = "sched_child_runs_first",
13968 .data = &sysctl_sched_child_runs_first,
13969 .maxlen = sizeof(unsigned int),
09a55596 13970@@ -1427,7 +1435,6 @@ static struct ctl_table vm_table[] = {
cc23e853
AM
13971 .extra1 = &zero,
13972 .extra2 = &one,
bb20add7
AM
13973 },
13974-
13975 #endif /* CONFIG_COMPACTION */
4bf69007 13976 {
bb20add7 13977 .procname = "min_free_kbytes",
09a55596
AM
13978diff -NurpP --minimal linux-4.9.135/kernel/sysctl_binary.c linux-4.9.135-vs2.3.9.8/kernel/sysctl_binary.c
13979--- linux-4.9.135/kernel/sysctl_binary.c 2016-12-11 19:17:54.000000000 +0000
13980+++ linux-4.9.135-vs2.3.9.8/kernel/sysctl_binary.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 13981@@ -74,6 +74,7 @@ static const struct bin_table bin_kern_t
4bf69007
AM
13982
13983 { CTL_INT, KERN_PANIC, "panic" },
13984 { CTL_INT, KERN_REALROOTDEV, "real-root-dev" },
13985+ { CTL_STR, KERN_VSHELPER, "vshelper" },
13986
13987 { CTL_STR, KERN_SPARC_REBOOT, "reboot-cmd" },
13988 { CTL_INT, KERN_CTLALTDEL, "ctrl-alt-del" },
09a55596
AM
13989diff -NurpP --minimal linux-4.9.135/kernel/time/posix-timers.c linux-4.9.135-vs2.3.9.8/kernel/time/posix-timers.c
13990--- linux-4.9.135/kernel/time/posix-timers.c 2018-10-20 10:39:21.000000000 +0000
13991+++ linux-4.9.135-vs2.3.9.8/kernel/time/posix-timers.c 2018-10-20 04:58:15.000000000 +0000
bb20add7
AM
13992@@ -48,6 +48,7 @@
13993 #include <linux/workqueue.h>
13994 #include <linux/export.h>
13995 #include <linux/hashtable.h>
13996+#include <linux/vs_context.h>
4bf69007 13997
bb20add7 13998 #include "timekeeping.h"
4bf69007 13999
cc23e853 14000@@ -407,6 +408,7 @@ int posix_timer_event(struct k_itimer *t
bb20add7
AM
14001 {
14002 struct task_struct *task;
14003 int shared, ret = -1;
14004+
14005 /*
14006 * FIXME: if ->sigq is queued we can race with
14007 * dequeue_signal()->do_schedule_next_timer().
cc23e853 14008@@ -423,10 +425,18 @@ int posix_timer_event(struct k_itimer *t
bb20add7
AM
14009 rcu_read_lock();
14010 task = pid_task(timr->it_pid, PIDTYPE_PID);
14011 if (task) {
14012+ struct vx_info_save vxis;
14013+ struct vx_info *vxi;
14014+
14015+ vxi = get_vx_info(task->vx_info);
14016+ enter_vx_info(vxi, &vxis);
14017 shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
14018 ret = send_sigqueue(timr->sigq, task, shared);
14019+ leave_vx_info(&vxis);
14020+ put_vx_info(vxi);
14021 }
14022 rcu_read_unlock();
14023+
14024 /* If we failed to send the signal the timer stops. */
14025 return ret > 0;
4bf69007 14026 }
09a55596
AM
14027diff -NurpP --minimal linux-4.9.135/kernel/time/time.c linux-4.9.135-vs2.3.9.8/kernel/time/time.c
14028--- linux-4.9.135/kernel/time/time.c 2018-10-20 10:39:21.000000000 +0000
14029+++ linux-4.9.135-vs2.3.9.8/kernel/time/time.c 2018-10-20 05:55:43.000000000 +0000
14030@@ -38,6 +38,7 @@
4bf69007
AM
14031 #include <linux/fs.h>
14032 #include <linux/math64.h>
14033 #include <linux/ptrace.h>
14034+#include <linux/vs_time.h>
14035
14036 #include <asm/uaccess.h>
14037 #include <asm/unistd.h>
09a55596 14038@@ -94,7 +95,7 @@ SYSCALL_DEFINE1(stime, time_t __user *,
4bf69007
AM
14039 if (err)
14040 return err;
14041
14042- do_settimeofday(&tv);
14043+ vx_settimeofday(&tv);
14044 return 0;
14045 }
14046
09a55596 14047@@ -187,7 +188,7 @@ int do_sys_settimeofday64(const struct t
4bf69007
AM
14048 }
14049 }
14050 if (tv)
cc23e853
AM
14051- return do_settimeofday64(tv);
14052+ return vx_settimeofday64(tv);
4bf69007
AM
14053 return 0;
14054 }
14055
09a55596
AM
14056diff -NurpP --minimal linux-4.9.135/kernel/time/timekeeping.c linux-4.9.135-vs2.3.9.8/kernel/time/timekeeping.c
14057--- linux-4.9.135/kernel/time/timekeeping.c 2018-10-20 10:39:21.000000000 +0000
14058+++ linux-4.9.135-vs2.3.9.8/kernel/time/timekeeping.c 2018-10-20 04:58:15.000000000 +0000
bb20add7
AM
14059@@ -23,6 +23,7 @@
14060 #include <linux/stop_machine.h>
14061 #include <linux/pvclock_gtod.h>
14062 #include <linux/compiler.h>
14063+#include <linux/vs_time.h>
14064
14065 #include "tick-internal.h"
14066 #include "ntp_internal.h"
09a55596
AM
14067diff -NurpP --minimal linux-4.9.135/kernel/time/timer.c linux-4.9.135-vs2.3.9.8/kernel/time/timer.c
14068--- linux-4.9.135/kernel/time/timer.c 2018-10-20 10:39:21.000000000 +0000
14069+++ linux-4.9.135-vs2.3.9.8/kernel/time/timer.c 2018-10-20 05:55:43.000000000 +0000
09be7631 14070@@ -42,6 +42,10 @@
b00e13aa 14071 #include <linux/sched/sysctl.h>
4bf69007 14072 #include <linux/slab.h>
09be7631 14073 #include <linux/compat.h>
4bf69007
AM
14074+#include <linux/vs_base.h>
14075+#include <linux/vs_cvirt.h>
14076+#include <linux/vs_pid.h>
14077+#include <linux/vserver/sched.h>
14078
14079 #include <asm/uaccess.h>
14080 #include <asm/unistd.h>
09a55596
AM
14081diff -NurpP --minimal linux-4.9.135/kernel/user_namespace.c linux-4.9.135-vs2.3.9.8/kernel/user_namespace.c
14082--- linux-4.9.135/kernel/user_namespace.c 2018-10-20 10:39:23.000000000 +0000
14083+++ linux-4.9.135-vs2.3.9.8/kernel/user_namespace.c 2018-10-20 05:55:43.000000000 +0000
b00e13aa 14084@@ -22,6 +22,7 @@
4bf69007
AM
14085 #include <linux/ctype.h>
14086 #include <linux/projid.h>
b00e13aa 14087 #include <linux/fs_struct.h>
4bf69007
AM
14088+#include <linux/vserver/global.h>
14089
14090 static struct kmem_cache *user_ns_cachep __read_mostly;
bb20add7 14091 static DEFINE_MUTEX(userns_state_mutex);
cc23e853 14092@@ -115,6 +116,7 @@ int create_user_ns(struct cred *new)
4bf69007 14093
b00e13aa
AM
14094 atomic_set(&ns->count, 1);
14095 /* Leave the new->user_ns reference with the new user namespace. */
4bf69007
AM
14096+ atomic_inc(&vs_global_user_ns);
14097 ns->parent = parent_ns;
09be7631 14098 ns->level = parent_ns->level + 1;
4bf69007 14099 ns->owner = owner;
cc23e853
AM
14100@@ -185,6 +187,7 @@ static void free_user_ns(struct work_str
14101 key_put(ns->persistent_keyring_register);
14102 #endif
14103 ns_free_inum(&ns->ns);
14104+ atomic_dec(&vs_global_user_ns);
14105 kmem_cache_free(user_ns_cachep, ns);
14106 dec_user_namespaces(ucounts);
14107 ns = parent;
14108@@ -404,6 +407,18 @@ gid_t from_kgid_munged(struct user_names
bb20add7
AM
14109 }
14110 EXPORT_SYMBOL(from_kgid_munged);
14111
14112+ktag_t make_ktag(struct user_namespace *from, vtag_t tag)
14113+{
14114+ return KTAGT_INIT(tag);
14115+}
14116+EXPORT_SYMBOL(make_ktag);
14117+
14118+vtag_t from_ktag(struct user_namespace *to, ktag_t tag)
14119+{
14120+ return __ktag_val(tag);
14121+}
14122+EXPORT_SYMBOL(from_ktag);
14123+
14124 /**
14125 * make_kprojid - Map a user-namespace projid pair into a kprojid.
14126 * @ns: User namespace that the projid is in
09a55596
AM
14127diff -NurpP --minimal linux-4.9.135/kernel/utsname.c linux-4.9.135-vs2.3.9.8/kernel/utsname.c
14128--- linux-4.9.135/kernel/utsname.c 2016-12-11 19:17:54.000000000 +0000
14129+++ linux-4.9.135-vs2.3.9.8/kernel/utsname.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 14130@@ -16,6 +16,7 @@
4bf69007
AM
14131 #include <linux/slab.h>
14132 #include <linux/user_namespace.h>
09be7631 14133 #include <linux/proc_ns.h>
4bf69007
AM
14134+#include <linux/vserver/global.h>
14135
cc23e853 14136 static struct ucounts *inc_uts_namespaces(struct user_namespace *ns)
4bf69007 14137 {
cc23e853 14138@@ -32,8 +33,10 @@ static struct uts_namespace *create_uts_
4bf69007
AM
14139 struct uts_namespace *uts_ns;
14140
14141 uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
14142- if (uts_ns)
14143+ if (uts_ns) {
c2e5f7c8 14144 kref_init(&uts_ns->kref);
4bf69007
AM
14145+ atomic_inc(&vs_global_uts_ns);
14146+ }
14147 return uts_ns;
14148 }
14149
cc23e853
AM
14150@@ -111,6 +114,7 @@ void free_uts_ns(struct kref *kref)
14151 dec_uts_namespaces(ns->ucounts);
4bf69007 14152 put_user_ns(ns->user_ns);
cc23e853 14153 ns_free_inum(&ns->ns);
4bf69007
AM
14154+ atomic_dec(&vs_global_uts_ns);
14155 kfree(ns);
14156 }
14157
09a55596
AM
14158diff -NurpP --minimal linux-4.9.135/kernel/vserver/Kconfig linux-4.9.135-vs2.3.9.8/kernel/vserver/Kconfig
14159--- linux-4.9.135/kernel/vserver/Kconfig 1970-01-01 00:00:00.000000000 +0000
14160+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/Kconfig 2018-10-20 04:58:15.000000000 +0000
c2e5f7c8 14161@@ -0,0 +1,230 @@
4bf69007
AM
14162+#
14163+# Linux VServer configuration
14164+#
d337f35e 14165+
4bf69007 14166+menu "Linux VServer"
d337f35e 14167+
4bf69007
AM
14168+config VSERVER_AUTO_LBACK
14169+ bool "Automatically Assign Loopback IP"
14170+ default y
14171+ help
14172+ Automatically assign a guest specific loopback
14173+ IP and add it to the kernel network stack on
14174+ startup.
d337f35e 14175+
4bf69007
AM
14176+config VSERVER_AUTO_SINGLE
14177+ bool "Automatic Single IP Special Casing"
c2e5f7c8 14178+ default n
4bf69007
AM
14179+ help
14180+ This allows network contexts with a single IP to
14181+ automatically remap 0.0.0.0 bindings to that IP,
14182+ avoiding further network checks and improving
14183+ performance.
d337f35e 14184+
4bf69007
AM
14185+ (note: such guests do not allow to change the ip
14186+ on the fly and do not show loopback addresses)
2380c486 14187+
4bf69007
AM
14188+config VSERVER_COWBL
14189+ bool "Enable COW Immutable Link Breaking"
14190+ default y
14191+ help
14192+ This enables the COW (Copy-On-Write) link break code.
14193+ It allows you to treat unified files like normal files
14194+ when writing to them (which will implicitely break the
14195+ link and create a copy of the unified file)
d337f35e 14196+
4bf69007 14197+config VSERVER_VTIME
c2e5f7c8 14198+ bool "Enable Virtualized Guest Time (EXPERIMENTAL)"
4bf69007
AM
14199+ default n
14200+ help
14201+ This enables per guest time offsets to allow for
14202+ adjusting the system clock individually per guest.
14203+ this adds some overhead to the time functions and
14204+ therefore should not be enabled without good reason.
d337f35e 14205+
4bf69007 14206+config VSERVER_DEVICE
c2e5f7c8 14207+ bool "Enable Guest Device Mapping (EXPERIMENTAL)"
4bf69007
AM
14208+ default n
14209+ help
14210+ This enables generic device remapping.
d337f35e 14211+
4bf69007
AM
14212+config VSERVER_PROC_SECURE
14213+ bool "Enable Proc Security"
14214+ depends on PROC_FS
14215+ default y
14216+ help
14217+ This configures ProcFS security to initially hide
14218+ non-process entries for all contexts except the main and
14219+ spectator context (i.e. for all guests), which is a secure
14220+ default.
d337f35e 14221+
4bf69007 14222+ (note: on 1.2x the entries were visible by default)
d337f35e 14223+
4bf69007
AM
14224+choice
14225+ prompt "Persistent Inode Tagging"
14226+ default TAGGING_ID24
14227+ help
14228+ This adds persistent context information to filesystems
14229+ mounted with the tagxid option. Tagging is a requirement
14230+ for per-context disk limits and per-context quota.
d337f35e 14231+
d337f35e 14232+
4bf69007
AM
14233+config TAGGING_NONE
14234+ bool "Disabled"
14235+ help
14236+ do not store per-context information in inodes.
d337f35e 14237+
4bf69007
AM
14238+config TAGGING_UID16
14239+ bool "UID16/GID32"
14240+ help
14241+ reduces UID to 16 bit, but leaves GID at 32 bit.
d337f35e 14242+
4bf69007
AM
14243+config TAGGING_GID16
14244+ bool "UID32/GID16"
14245+ help
14246+ reduces GID to 16 bit, but leaves UID at 32 bit.
d337f35e 14247+
4bf69007
AM
14248+config TAGGING_ID24
14249+ bool "UID24/GID24"
14250+ help
14251+ uses the upper 8bit from UID and GID for XID tagging
14252+ which leaves 24bit for UID/GID each, which should be
14253+ more than sufficient for normal use.
d337f35e 14254+
4bf69007
AM
14255+config TAGGING_INTERN
14256+ bool "UID32/GID32"
14257+ help
14258+ this uses otherwise reserved inode fields in the on
14259+ disk representation, which limits the use to a few
14260+ filesystems (currently ext2 and ext3)
d337f35e 14261+
4bf69007 14262+endchoice
d337f35e 14263+
4bf69007
AM
14264+config TAG_NFSD
14265+ bool "Tag NFSD User Auth and Files"
14266+ default n
14267+ help
14268+ Enable this if you do want the in-kernel NFS
14269+ Server to use the tagging specified above.
14270+ (will require patched clients too)
2380c486 14271+
4bf69007
AM
14272+config VSERVER_PRIVACY
14273+ bool "Honor Privacy Aspects of Guests"
14274+ default n
14275+ help
14276+ When enabled, most context checks will disallow
14277+ access to structures assigned to a specific context,
14278+ like ptys or loop devices.
2380c486 14279+
4bf69007
AM
14280+config VSERVER_CONTEXTS
14281+ int "Maximum number of Contexts (1-65533)" if EMBEDDED
14282+ range 1 65533
14283+ default "768" if 64BIT
14284+ default "256"
14285+ help
14286+ This setting will optimize certain data structures
14287+ and memory allocations according to the expected
14288+ maximum.
2380c486 14289+
4bf69007 14290+ note: this is not a strict upper limit.
2380c486 14291+
4bf69007
AM
14292+config VSERVER_WARN
14293+ bool "VServer Warnings"
14294+ default y
14295+ help
14296+ This enables various runtime warnings, which will
14297+ notify about potential manipulation attempts or
14298+ resource shortage. It is generally considered to
14299+ be a good idea to have that enabled.
2380c486 14300+
4bf69007
AM
14301+config VSERVER_WARN_DEVPTS
14302+ bool "VServer DevPTS Warnings"
14303+ depends on VSERVER_WARN
14304+ default y
14305+ help
14306+ This enables DevPTS related warnings, issued when a
14307+ process inside a context tries to lookup or access
14308+ a dynamic pts from the host or a different context.
d337f35e 14309+
4bf69007
AM
14310+config VSERVER_DEBUG
14311+ bool "VServer Debugging Code"
14312+ default n
14313+ help
14314+ Set this to yes if you want to be able to activate
14315+ debugging output at runtime. It adds a very small
14316+ overhead to all vserver related functions and
14317+ increases the kernel size by about 20k.
d337f35e 14318+
4bf69007
AM
14319+config VSERVER_HISTORY
14320+ bool "VServer History Tracing"
14321+ depends on VSERVER_DEBUG
14322+ default n
14323+ help
14324+ Set this to yes if you want to record the history of
14325+ linux-vserver activities, so they can be replayed in
14326+ the event of a kernel panic or oops.
d337f35e 14327+
4bf69007
AM
14328+config VSERVER_HISTORY_SIZE
14329+ int "Per-CPU History Size (32-65536)"
14330+ depends on VSERVER_HISTORY
14331+ range 32 65536
14332+ default 64
14333+ help
14334+ This allows you to specify the number of entries in
14335+ the per-CPU history buffer.
d337f35e 14336+
4bf69007
AM
14337+config VSERVER_EXTRA_MNT_CHECK
14338+ bool "Extra Checks for Reachability"
14339+ default n
14340+ help
14341+ Set this to yes if you want to do extra checks for
14342+ vfsmount reachability in the proc filesystem code.
14343+ This shouldn't be required on any setup utilizing
14344+ mnt namespaces.
d337f35e 14345+
4bf69007
AM
14346+choice
14347+ prompt "Quotes used in debug and warn messages"
14348+ default QUOTES_ISO8859
d337f35e 14349+
4bf69007
AM
14350+config QUOTES_ISO8859
14351+ bool "Extended ASCII (ISO 8859) angle quotes"
14352+ help
14353+ This uses the extended ASCII characters \xbb
14354+ and \xab for quoting file and process names.
d337f35e 14355+
4bf69007
AM
14356+config QUOTES_UTF8
14357+ bool "UTF-8 angle quotes"
14358+ help
14359+ This uses the the UTF-8 sequences for angle
14360+ quotes to quote file and process names.
d337f35e 14361+
4bf69007
AM
14362+config QUOTES_ASCII
14363+ bool "ASCII single quotes"
14364+ help
14365+ This uses the ASCII single quote character
14366+ (\x27) to quote file and process names.
d337f35e 14367+
4bf69007 14368+endchoice
d337f35e 14369+
4bf69007 14370+endmenu
d337f35e 14371+
d337f35e 14372+
4bf69007
AM
14373+config VSERVER
14374+ bool
14375+ default y
14376+ select NAMESPACES
14377+ select UTS_NS
14378+ select IPC_NS
14379+# select USER_NS
14380+ select SYSVIPC
d337f35e 14381+
4bf69007
AM
14382+config VSERVER_SECURITY
14383+ bool
14384+ depends on SECURITY
14385+ default y
14386+ select SECURITY_CAPABILITIES
d337f35e 14387+
4bf69007
AM
14388+config VSERVER_DISABLED
14389+ bool
14390+ default n
d337f35e 14391+
09a55596
AM
14392diff -NurpP --minimal linux-4.9.135/kernel/vserver/Makefile linux-4.9.135-vs2.3.9.8/kernel/vserver/Makefile
14393--- linux-4.9.135/kernel/vserver/Makefile 1970-01-01 00:00:00.000000000 +0000
14394+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/Makefile 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
14395@@ -0,0 +1,18 @@
14396+#
14397+# Makefile for the Linux vserver routines.
14398+#
d337f35e 14399+
d337f35e 14400+
4bf69007 14401+obj-y += vserver.o
2380c486 14402+
4bf69007
AM
14403+vserver-y := switch.o context.o space.o sched.o network.o inode.o \
14404+ limit.o cvirt.o cacct.o signal.o helper.o init.o \
14405+ dlimit.o tag.o
d337f35e 14406+
4bf69007
AM
14407+vserver-$(CONFIG_INET) += inet.o
14408+vserver-$(CONFIG_PROC_FS) += proc.o
14409+vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o debug.o
14410+vserver-$(CONFIG_VSERVER_HISTORY) += history.o
14411+vserver-$(CONFIG_VSERVER_MONITOR) += monitor.o
14412+vserver-$(CONFIG_VSERVER_DEVICE) += device.o
d337f35e 14413+
09a55596
AM
14414diff -NurpP --minimal linux-4.9.135/kernel/vserver/cacct.c linux-4.9.135-vs2.3.9.8/kernel/vserver/cacct.c
14415--- linux-4.9.135/kernel/vserver/cacct.c 1970-01-01 00:00:00.000000000 +0000
14416+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/cacct.c 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
14417@@ -0,0 +1,42 @@
14418+/*
14419+ * linux/kernel/vserver/cacct.c
14420+ *
14421+ * Virtual Server: Context Accounting
14422+ *
cc23e853 14423+ * Copyright (C) 2006-2007 Herbert P?tzl
4bf69007
AM
14424+ *
14425+ * V0.01 added accounting stats
14426+ *
14427+ */
d337f35e 14428+
4bf69007
AM
14429+#include <linux/types.h>
14430+#include <linux/vs_context.h>
14431+#include <linux/vserver/cacct_cmd.h>
14432+#include <linux/vserver/cacct_int.h>
d337f35e 14433+
4bf69007
AM
14434+#include <asm/errno.h>
14435+#include <asm/uaccess.h>
14436+
14437+
14438+int vc_sock_stat(struct vx_info *vxi, void __user *data)
d337f35e 14439+{
4bf69007
AM
14440+ struct vcmd_sock_stat_v0 vc_data;
14441+ int j, field;
d337f35e 14442+
2380c486
JR
14443+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
14444+ return -EFAULT;
14445+
4bf69007
AM
14446+ field = vc_data.field;
14447+ if ((field < 0) || (field >= VXA_SOCK_SIZE))
14448+ return -EINVAL;
7e46296a 14449+
4bf69007
AM
14450+ for (j = 0; j < 3; j++) {
14451+ vc_data.count[j] = vx_sock_count(&vxi->cacct, field, j);
14452+ vc_data.total[j] = vx_sock_total(&vxi->cacct, field, j);
14453+ }
7e46296a
AM
14454+
14455+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
14456+ return -EFAULT;
14457+ return 0;
14458+}
14459+
09a55596
AM
14460diff -NurpP --minimal linux-4.9.135/kernel/vserver/cacct_init.h linux-4.9.135-vs2.3.9.8/kernel/vserver/cacct_init.h
14461--- linux-4.9.135/kernel/vserver/cacct_init.h 1970-01-01 00:00:00.000000000 +0000
14462+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/cacct_init.h 2018-10-20 04:58:15.000000000 +0000
4bf69007 14463@@ -0,0 +1,25 @@
7e46296a
AM
14464+
14465+
4bf69007 14466+static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
265d6dcc 14467+{
4bf69007 14468+ int i, j;
265d6dcc 14469+
265d6dcc 14470+
4bf69007
AM
14471+ for (i = 0; i < VXA_SOCK_SIZE; i++) {
14472+ for (j = 0; j < 3; j++) {
14473+ atomic_long_set(&cacct->sock[i][j].count, 0);
14474+ atomic_long_set(&cacct->sock[i][j].total, 0);
14475+ }
14476+ }
14477+ for (i = 0; i < 8; i++)
14478+ atomic_set(&cacct->slab[i], 0);
14479+ for (i = 0; i < 5; i++)
14480+ for (j = 0; j < 4; j++)
14481+ atomic_set(&cacct->page[i][j], 0);
265d6dcc
JR
14482+}
14483+
4bf69007 14484+static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
265d6dcc 14485+{
4bf69007 14486+ return;
265d6dcc
JR
14487+}
14488+
09a55596
AM
14489diff -NurpP --minimal linux-4.9.135/kernel/vserver/cacct_proc.h linux-4.9.135-vs2.3.9.8/kernel/vserver/cacct_proc.h
14490--- linux-4.9.135/kernel/vserver/cacct_proc.h 1970-01-01 00:00:00.000000000 +0000
14491+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/cacct_proc.h 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
14492@@ -0,0 +1,53 @@
14493+#ifndef _VX_CACCT_PROC_H
14494+#define _VX_CACCT_PROC_H
265d6dcc 14495+
4bf69007 14496+#include <linux/vserver/cacct_int.h>
d337f35e 14497+
d337f35e 14498+
4bf69007
AM
14499+#define VX_SOCKA_TOP \
14500+ "Type\t recv #/bytes\t\t send #/bytes\t\t fail #/bytes\n"
d337f35e 14501+
4bf69007 14502+static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
d337f35e 14503+{
4bf69007
AM
14504+ int i, j, length = 0;
14505+ static char *type[VXA_SOCK_SIZE] = {
14506+ "UNSPEC", "UNIX", "INET", "INET6", "PACKET", "OTHER"
14507+ };
d337f35e 14508+
4bf69007
AM
14509+ length += sprintf(buffer + length, VX_SOCKA_TOP);
14510+ for (i = 0; i < VXA_SOCK_SIZE; i++) {
14511+ length += sprintf(buffer + length, "%s:", type[i]);
14512+ for (j = 0; j < 3; j++) {
14513+ length += sprintf(buffer + length,
14514+ "\t%10lu/%-10lu",
14515+ vx_sock_count(cacct, i, j),
14516+ vx_sock_total(cacct, i, j));
14517+ }
14518+ buffer[length++] = '\n';
14519+ }
d337f35e 14520+
4bf69007
AM
14521+ length += sprintf(buffer + length, "\n");
14522+ length += sprintf(buffer + length,
14523+ "slab:\t %8u %8u %8u %8u\n",
14524+ atomic_read(&cacct->slab[1]),
14525+ atomic_read(&cacct->slab[4]),
14526+ atomic_read(&cacct->slab[0]),
14527+ atomic_read(&cacct->slab[2]));
d337f35e 14528+
4bf69007
AM
14529+ length += sprintf(buffer + length, "\n");
14530+ for (i = 0; i < 5; i++) {
14531+ length += sprintf(buffer + length,
14532+ "page[%d]: %8u %8u %8u %8u\t %8u %8u %8u %8u\n", i,
14533+ atomic_read(&cacct->page[i][0]),
14534+ atomic_read(&cacct->page[i][1]),
14535+ atomic_read(&cacct->page[i][2]),
14536+ atomic_read(&cacct->page[i][3]),
14537+ atomic_read(&cacct->page[i][4]),
14538+ atomic_read(&cacct->page[i][5]),
14539+ atomic_read(&cacct->page[i][6]),
14540+ atomic_read(&cacct->page[i][7]));
14541+ }
14542+ return length;
14543+}
d337f35e 14544+
4bf69007 14545+#endif /* _VX_CACCT_PROC_H */
09a55596
AM
14546diff -NurpP --minimal linux-4.9.135/kernel/vserver/context.c linux-4.9.135-vs2.3.9.8/kernel/vserver/context.c
14547--- linux-4.9.135/kernel/vserver/context.c 1970-01-01 00:00:00.000000000 +0000
14548+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/context.c 2018-10-20 04:58:15.000000000 +0000
4bf69007 14549@@ -0,0 +1,1119 @@
2380c486 14550+/*
4bf69007 14551+ * linux/kernel/vserver/context.c
2380c486 14552+ *
4bf69007 14553+ * Virtual Server: Context Support
2380c486 14554+ *
cc23e853 14555+ * Copyright (C) 2003-2011 Herbert P?tzl
2380c486 14556+ *
4bf69007
AM
14557+ * V0.01 context helper
14558+ * V0.02 vx_ctx_kill syscall command
14559+ * V0.03 replaced context_info calls
14560+ * V0.04 redesign of struct (de)alloc
14561+ * V0.05 rlimit basic implementation
14562+ * V0.06 task_xid and info commands
14563+ * V0.07 context flags and caps
14564+ * V0.08 switch to RCU based hash
14565+ * V0.09 revert to non RCU for now
14566+ * V0.10 and back to working RCU hash
14567+ * V0.11 and back to locking again
14568+ * V0.12 referenced context store
14569+ * V0.13 separate per cpu data
14570+ * V0.14 changed vcmds to vxi arg
14571+ * V0.15 added context stat
14572+ * V0.16 have __create claim() the vxi
14573+ * V0.17 removed older and legacy stuff
14574+ * V0.18 added user credentials
14575+ * V0.19 added warn mask
2380c486
JR
14576+ *
14577+ */
d337f35e 14578+
4bf69007 14579+#include <linux/slab.h>
2380c486 14580+#include <linux/types.h>
4bf69007
AM
14581+#include <linux/security.h>
14582+#include <linux/pid_namespace.h>
14583+#include <linux/capability.h>
1e8b8f9b 14584+
4bf69007
AM
14585+#include <linux/vserver/context.h>
14586+#include <linux/vserver/network.h>
14587+#include <linux/vserver/debug.h>
14588+#include <linux/vserver/limit.h>
14589+#include <linux/vserver/limit_int.h>
14590+#include <linux/vserver/space.h>
14591+#include <linux/init_task.h>
14592+#include <linux/fs_struct.h>
14593+#include <linux/cred.h>
1e8b8f9b 14594+
4bf69007
AM
14595+#include <linux/vs_context.h>
14596+#include <linux/vs_limit.h>
14597+#include <linux/vs_pid.h>
14598+#include <linux/vserver/context_cmd.h>
d337f35e 14599+
4bf69007
AM
14600+#include "cvirt_init.h"
14601+#include "cacct_init.h"
14602+#include "limit_init.h"
14603+#include "sched_init.h"
d337f35e 14604+
d337f35e 14605+
4bf69007
AM
14606+atomic_t vx_global_ctotal = ATOMIC_INIT(0);
14607+atomic_t vx_global_cactive = ATOMIC_INIT(0);
d337f35e 14608+
d337f35e 14609+
4bf69007 14610+/* now inactive context structures */
d337f35e 14611+
4bf69007 14612+static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
2380c486 14613+
4bf69007 14614+static DEFINE_SPINLOCK(vx_info_inactive_lock);
d337f35e 14615+
2380c486 14616+
4bf69007 14617+/* __alloc_vx_info()
d337f35e 14618+
4bf69007
AM
14619+ * allocate an initialized vx_info struct
14620+ * doesn't make it visible (hash) */
d337f35e 14621+
61333608 14622+static struct vx_info *__alloc_vx_info(vxid_t xid)
4bf69007
AM
14623+{
14624+ struct vx_info *new = NULL;
14625+ int cpu, index;
d337f35e 14626+
4bf69007 14627+ vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
d337f35e 14628+
4bf69007
AM
14629+ /* would this benefit from a slab cache? */
14630+ new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
14631+ if (!new)
14632+ return 0;
2380c486 14633+
4bf69007
AM
14634+ memset(new, 0, sizeof(struct vx_info));
14635+#ifdef CONFIG_SMP
14636+ new->ptr_pc = alloc_percpu(struct _vx_info_pc);
14637+ if (!new->ptr_pc)
14638+ goto error;
14639+#endif
14640+ new->vx_id = xid;
14641+ INIT_HLIST_NODE(&new->vx_hlist);
14642+ atomic_set(&new->vx_usecnt, 0);
14643+ atomic_set(&new->vx_tasks, 0);
14644+ new->vx_parent = NULL;
14645+ new->vx_state = 0;
14646+ init_waitqueue_head(&new->vx_wait);
2380c486 14647+
4bf69007
AM
14648+ /* prepare reaper */
14649+ get_task_struct(init_pid_ns.child_reaper);
14650+ new->vx_reaper = init_pid_ns.child_reaper;
14651+ new->vx_badness_bias = 0;
d337f35e 14652+
4bf69007
AM
14653+ /* rest of init goes here */
14654+ vx_info_init_limit(&new->limit);
14655+ vx_info_init_sched(&new->sched);
14656+ vx_info_init_cvirt(&new->cvirt);
14657+ vx_info_init_cacct(&new->cacct);
d337f35e 14658+
4bf69007
AM
14659+ /* per cpu data structures */
14660+ for_each_possible_cpu(cpu) {
14661+ vx_info_init_sched_pc(
14662+ &vx_per_cpu(new, sched_pc, cpu), cpu);
14663+ vx_info_init_cvirt_pc(
14664+ &vx_per_cpu(new, cvirt_pc, cpu), cpu);
14665+ }
d337f35e 14666+
4bf69007
AM
14667+ new->vx_flags = VXF_INIT_SET;
14668+ new->vx_bcaps = CAP_FULL_SET; // maybe ~CAP_SETPCAP
14669+ new->vx_ccaps = 0;
14670+ new->vx_umask = 0;
14671+ new->vx_wmask = 0;
d337f35e 14672+
4bf69007
AM
14673+ new->reboot_cmd = 0;
14674+ new->exit_code = 0;
d337f35e 14675+
4bf69007
AM
14676+ // preconfig spaces
14677+ for (index = 0; index < VX_SPACES; index++) {
14678+ struct _vx_space *space = &new->space[index];
d337f35e 14679+
4bf69007
AM
14680+ // filesystem
14681+ spin_lock(&init_fs.lock);
14682+ init_fs.users++;
14683+ spin_unlock(&init_fs.lock);
14684+ space->vx_fs = &init_fs;
2380c486 14685+
4bf69007
AM
14686+ /* FIXME: do we want defaults? */
14687+ // space->vx_real_cred = 0;
14688+ // space->vx_cred = 0;
2380c486 14689+ }
4bf69007
AM
14690+
14691+
14692+ vxdprintk(VXD_CBIT(xid, 0),
14693+ "alloc_vx_info(%d) = %p", xid, new);
14694+ vxh_alloc_vx_info(new);
14695+ atomic_inc(&vx_global_ctotal);
14696+ return new;
14697+#ifdef CONFIG_SMP
14698+error:
14699+ kfree(new);
14700+ return 0;
14701+#endif
d337f35e
JR
14702+}
14703+
4bf69007 14704+/* __dealloc_vx_info()
d337f35e 14705+
4bf69007 14706+ * final disposal of vx_info */
d337f35e 14707+
4bf69007 14708+static void __dealloc_vx_info(struct vx_info *vxi)
d337f35e 14709+{
4bf69007
AM
14710+#ifdef CONFIG_VSERVER_WARN
14711+ struct vx_info_save vxis;
14712+ int cpu;
14713+#endif
14714+ vxdprintk(VXD_CBIT(xid, 0),
14715+ "dealloc_vx_info(%p)", vxi);
14716+ vxh_dealloc_vx_info(vxi);
d337f35e 14717+
4bf69007
AM
14718+#ifdef CONFIG_VSERVER_WARN
14719+ enter_vx_info(vxi, &vxis);
14720+ vx_info_exit_limit(&vxi->limit);
14721+ vx_info_exit_sched(&vxi->sched);
14722+ vx_info_exit_cvirt(&vxi->cvirt);
14723+ vx_info_exit_cacct(&vxi->cacct);
d337f35e 14724+
4bf69007
AM
14725+ for_each_possible_cpu(cpu) {
14726+ vx_info_exit_sched_pc(
14727+ &vx_per_cpu(vxi, sched_pc, cpu), cpu);
14728+ vx_info_exit_cvirt_pc(
14729+ &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
14730+ }
14731+ leave_vx_info(&vxis);
14732+#endif
d337f35e 14733+
4bf69007
AM
14734+ vxi->vx_id = -1;
14735+ vxi->vx_state |= VXS_RELEASED;
d337f35e 14736+
4bf69007
AM
14737+#ifdef CONFIG_SMP
14738+ free_percpu(vxi->ptr_pc);
14739+#endif
14740+ kfree(vxi);
14741+ atomic_dec(&vx_global_ctotal);
d337f35e
JR
14742+}
14743+
4bf69007 14744+static void __shutdown_vx_info(struct vx_info *vxi)
d337f35e 14745+{
4bf69007
AM
14746+ struct nsproxy *nsproxy;
14747+ struct fs_struct *fs;
14748+ struct cred *cred;
14749+ int index, kill;
d337f35e 14750+
4bf69007 14751+ might_sleep();
d337f35e 14752+
4bf69007
AM
14753+ vxi->vx_state |= VXS_SHUTDOWN;
14754+ vs_state_change(vxi, VSC_SHUTDOWN);
d337f35e 14755+
4bf69007
AM
14756+ for (index = 0; index < VX_SPACES; index++) {
14757+ struct _vx_space *space = &vxi->space[index];
d337f35e 14758+
4bf69007
AM
14759+ nsproxy = xchg(&space->vx_nsproxy, NULL);
14760+ if (nsproxy)
14761+ put_nsproxy(nsproxy);
2380c486 14762+
4bf69007
AM
14763+ fs = xchg(&space->vx_fs, NULL);
14764+ spin_lock(&fs->lock);
14765+ kill = !--fs->users;
14766+ spin_unlock(&fs->lock);
14767+ if (kill)
14768+ free_fs_struct(fs);
d337f35e 14769+
4bf69007
AM
14770+ cred = (struct cred *)xchg(&space->vx_cred, NULL);
14771+ if (cred)
14772+ abort_creds(cred);
14773+ }
d337f35e
JR
14774+}
14775+
4bf69007 14776+/* exported stuff */
d337f35e 14777+
4bf69007 14778+void free_vx_info(struct vx_info *vxi)
d337f35e 14779+{
4bf69007
AM
14780+ unsigned long flags;
14781+ unsigned index;
d337f35e 14782+
4bf69007
AM
14783+ /* check for reference counts first */
14784+ BUG_ON(atomic_read(&vxi->vx_usecnt));
14785+ BUG_ON(atomic_read(&vxi->vx_tasks));
2380c486 14786+
4bf69007
AM
14787+ /* context must not be hashed */
14788+ BUG_ON(vx_info_state(vxi, VXS_HASHED));
d337f35e 14789+
4bf69007
AM
14790+ /* context shutdown is mandatory */
14791+ BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
d337f35e 14792+
4bf69007
AM
14793+ /* spaces check */
14794+ for (index = 0; index < VX_SPACES; index++) {
14795+ struct _vx_space *space = &vxi->space[index];
d337f35e 14796+
4bf69007
AM
14797+ BUG_ON(space->vx_nsproxy);
14798+ BUG_ON(space->vx_fs);
14799+ // BUG_ON(space->vx_real_cred);
14800+ // BUG_ON(space->vx_cred);
14801+ }
d337f35e 14802+
4bf69007
AM
14803+ spin_lock_irqsave(&vx_info_inactive_lock, flags);
14804+ hlist_del(&vxi->vx_hlist);
14805+ spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
d337f35e 14806+
4bf69007
AM
14807+ __dealloc_vx_info(vxi);
14808+}
eab5a9a6 14809+
d337f35e 14810+
4bf69007 14811+/* hash table for vx_info hash */
93de0823 14812+
4bf69007 14813+#define VX_HASH_SIZE 13
d337f35e 14814+
4bf69007
AM
14815+static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
14816+ { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
d337f35e 14817+
4bf69007 14818+static DEFINE_SPINLOCK(vx_info_hash_lock);
d337f35e 14819+
93de0823 14820+
61333608 14821+static inline unsigned int __hashval(vxid_t xid)
4bf69007
AM
14822+{
14823+ return (xid % VX_HASH_SIZE);
d337f35e
JR
14824+}
14825+
14826+
d337f35e 14827+
4bf69007 14828+/* __hash_vx_info()
d337f35e 14829+
4bf69007
AM
14830+ * add the vxi to the global hash table
14831+ * requires the hash_lock to be held */
d337f35e 14832+
4bf69007 14833+static inline void __hash_vx_info(struct vx_info *vxi)
d337f35e 14834+{
4bf69007 14835+ struct hlist_head *head;
d337f35e 14836+
4bf69007
AM
14837+ vxd_assert_lock(&vx_info_hash_lock);
14838+ vxdprintk(VXD_CBIT(xid, 4),
14839+ "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
14840+ vxh_hash_vx_info(vxi);
d337f35e 14841+
4bf69007
AM
14842+ /* context must not be hashed */
14843+ BUG_ON(vx_info_state(vxi, VXS_HASHED));
d337f35e 14844+
4bf69007
AM
14845+ vxi->vx_state |= VXS_HASHED;
14846+ head = &vx_info_hash[__hashval(vxi->vx_id)];
14847+ hlist_add_head(&vxi->vx_hlist, head);
14848+ atomic_inc(&vx_global_cactive);
2380c486 14849+}
d337f35e 14850+
4bf69007 14851+/* __unhash_vx_info()
d337f35e 14852+
4bf69007
AM
14853+ * remove the vxi from the global hash table
14854+ * requires the hash_lock to be held */
d337f35e 14855+
4bf69007 14856+static inline void __unhash_vx_info(struct vx_info *vxi)
d337f35e 14857+{
4bf69007
AM
14858+ unsigned long flags;
14859+
14860+ vxd_assert_lock(&vx_info_hash_lock);
14861+ vxdprintk(VXD_CBIT(xid, 4),
14862+ "__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
14863+ atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
14864+ vxh_unhash_vx_info(vxi);
14865+
14866+ /* context must be hashed */
14867+ BUG_ON(!vx_info_state(vxi, VXS_HASHED));
14868+ /* but without tasks */
14869+ BUG_ON(atomic_read(&vxi->vx_tasks));
14870+
14871+ vxi->vx_state &= ~VXS_HASHED;
14872+ hlist_del_init(&vxi->vx_hlist);
14873+ spin_lock_irqsave(&vx_info_inactive_lock, flags);
14874+ hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
14875+ spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
14876+ atomic_dec(&vx_global_cactive);
2380c486 14877+}
d337f35e 14878+
d337f35e 14879+
4bf69007 14880+/* __lookup_vx_info()
d337f35e 14881+
4bf69007
AM
14882+ * requires the hash_lock to be held
14883+ * doesn't increment the vx_refcnt */
2380c486 14884+
61333608 14885+static inline struct vx_info *__lookup_vx_info(vxid_t xid)
d337f35e 14886+{
4bf69007
AM
14887+ struct hlist_head *head = &vx_info_hash[__hashval(xid)];
14888+ struct hlist_node *pos;
14889+ struct vx_info *vxi;
d337f35e 14890+
4bf69007
AM
14891+ vxd_assert_lock(&vx_info_hash_lock);
14892+ hlist_for_each(pos, head) {
14893+ vxi = hlist_entry(pos, struct vx_info, vx_hlist);
d337f35e 14894+
4bf69007
AM
14895+ if (vxi->vx_id == xid)
14896+ goto found;
14897+ }
14898+ vxi = NULL;
14899+found:
14900+ vxdprintk(VXD_CBIT(xid, 0),
14901+ "__lookup_vx_info(#%u): %p[#%u]",
14902+ xid, vxi, vxi ? vxi->vx_id : 0);
14903+ vxh_lookup_vx_info(vxi, xid);
14904+ return vxi;
14905+}
d337f35e 14906+
d337f35e 14907+
4bf69007 14908+/* __create_vx_info()
d337f35e 14909+
4bf69007
AM
14910+ * create the requested context
14911+ * get(), claim() and hash it */
2380c486 14912+
4bf69007
AM
14913+static struct vx_info *__create_vx_info(int id)
14914+{
14915+ struct vx_info *new, *vxi = NULL;
2380c486 14916+
4bf69007 14917+ vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
d337f35e 14918+
4bf69007
AM
14919+ if (!(new = __alloc_vx_info(id)))
14920+ return ERR_PTR(-ENOMEM);
d337f35e 14921+
4bf69007
AM
14922+ /* required to make dynamic xids unique */
14923+ spin_lock(&vx_info_hash_lock);
d337f35e 14924+
4bf69007
AM
14925+ /* static context requested */
14926+ if ((vxi = __lookup_vx_info(id))) {
14927+ vxdprintk(VXD_CBIT(xid, 0),
14928+ "create_vx_info(%d) = %p (already there)", id, vxi);
14929+ if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
14930+ vxi = ERR_PTR(-EBUSY);
14931+ else
14932+ vxi = ERR_PTR(-EEXIST);
14933+ goto out_unlock;
14934+ }
14935+ /* new context */
14936+ vxdprintk(VXD_CBIT(xid, 0),
14937+ "create_vx_info(%d) = %p (new)", id, new);
14938+ claim_vx_info(new, NULL);
14939+ __hash_vx_info(get_vx_info(new));
14940+ vxi = new, new = NULL;
d337f35e 14941+
4bf69007
AM
14942+out_unlock:
14943+ spin_unlock(&vx_info_hash_lock);
14944+ vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
14945+ if (new)
14946+ __dealloc_vx_info(new);
14947+ return vxi;
14948+}
d337f35e 14949+
d337f35e 14950+
4bf69007 14951+/* exported stuff */
d337f35e 14952+
d337f35e 14953+
4bf69007 14954+void unhash_vx_info(struct vx_info *vxi)
d337f35e 14955+{
4bf69007
AM
14956+ spin_lock(&vx_info_hash_lock);
14957+ __unhash_vx_info(vxi);
14958+ spin_unlock(&vx_info_hash_lock);
14959+ __shutdown_vx_info(vxi);
14960+ __wakeup_vx_info(vxi);
2380c486 14961+}
d337f35e 14962+
2380c486 14963+
4bf69007 14964+/* lookup_vx_info()
2380c486 14965+
4bf69007
AM
14966+ * search for a vx_info and get() it
14967+ * negative id means current */
2380c486 14968+
4bf69007 14969+struct vx_info *lookup_vx_info(int id)
2380c486 14970+{
4bf69007
AM
14971+ struct vx_info *vxi = NULL;
14972+
14973+ if (id < 0) {
14974+ vxi = get_vx_info(current_vx_info());
14975+ } else if (id > 1) {
14976+ spin_lock(&vx_info_hash_lock);
14977+ vxi = get_vx_info(__lookup_vx_info(id));
14978+ spin_unlock(&vx_info_hash_lock);
2380c486 14979+ }
4bf69007 14980+ return vxi;
d337f35e
JR
14981+}
14982+
4bf69007 14983+/* xid_is_hashed()
d337f35e 14984+
4bf69007 14985+ * verify that xid is still hashed */
d337f35e 14986+
61333608 14987+int xid_is_hashed(vxid_t xid)
4bf69007
AM
14988+{
14989+ int hashed;
d337f35e 14990+
4bf69007
AM
14991+ spin_lock(&vx_info_hash_lock);
14992+ hashed = (__lookup_vx_info(xid) != NULL);
14993+ spin_unlock(&vx_info_hash_lock);
14994+ return hashed;
14995+}
d337f35e 14996+
4bf69007 14997+#ifdef CONFIG_PROC_FS
d337f35e 14998+
4bf69007 14999+/* get_xid_list()
d337f35e 15000+
4bf69007
AM
15001+ * get a subset of hashed xids for proc
15002+ * assumes size is at least one */
d337f35e 15003+
4bf69007
AM
15004+int get_xid_list(int index, unsigned int *xids, int size)
15005+{
15006+ int hindex, nr_xids = 0;
d337f35e 15007+
4bf69007
AM
15008+ /* only show current and children */
15009+ if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
15010+ if (index > 0)
15011+ return 0;
15012+ xids[nr_xids] = vx_current_xid();
15013+ return 1;
15014+ }
d337f35e 15015+
4bf69007
AM
15016+ for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
15017+ struct hlist_head *head = &vx_info_hash[hindex];
15018+ struct hlist_node *pos;
d337f35e 15019+
4bf69007
AM
15020+ spin_lock(&vx_info_hash_lock);
15021+ hlist_for_each(pos, head) {
15022+ struct vx_info *vxi;
d337f35e 15023+
4bf69007
AM
15024+ if (--index > 0)
15025+ continue;
d337f35e 15026+
4bf69007
AM
15027+ vxi = hlist_entry(pos, struct vx_info, vx_hlist);
15028+ xids[nr_xids] = vxi->vx_id;
15029+ if (++nr_xids >= size) {
15030+ spin_unlock(&vx_info_hash_lock);
15031+ goto out;
15032+ }
15033+ }
15034+ /* keep the lock time short */
15035+ spin_unlock(&vx_info_hash_lock);
15036+ }
15037+out:
15038+ return nr_xids;
15039+}
15040+#endif
d337f35e 15041+
4bf69007 15042+#ifdef CONFIG_VSERVER_DEBUG
d337f35e 15043+
4bf69007 15044+void dump_vx_info_inactive(int level)
d337f35e 15045+{
4bf69007 15046+ struct hlist_node *entry, *next;
d337f35e 15047+
4bf69007
AM
15048+ hlist_for_each_safe(entry, next, &vx_info_inactive) {
15049+ struct vx_info *vxi =
15050+ list_entry(entry, struct vx_info, vx_hlist);
d337f35e 15051+
4bf69007
AM
15052+ dump_vx_info(vxi, level);
15053+ }
d337f35e
JR
15054+}
15055+
4bf69007 15056+#endif
d337f35e 15057+
4bf69007
AM
15058+#if 0
15059+int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
d337f35e 15060+{
4bf69007 15061+ struct user_struct *new_user, *old_user;
d337f35e 15062+
4bf69007
AM
15063+ if (!p || !vxi)
15064+ BUG();
d337f35e 15065+
4bf69007
AM
15066+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
15067+ return -EACCES;
d337f35e 15068+
4bf69007
AM
15069+ new_user = alloc_uid(vxi->vx_id, p->uid);
15070+ if (!new_user)
15071+ return -ENOMEM;
d337f35e 15072+
4bf69007
AM
15073+ old_user = p->user;
15074+ if (new_user != old_user) {
15075+ atomic_inc(&new_user->processes);
15076+ atomic_dec(&old_user->processes);
15077+ p->user = new_user;
d337f35e 15078+ }
4bf69007
AM
15079+ free_uid(old_user);
15080+ return 0;
d337f35e 15081+}
4bf69007 15082+#endif
d337f35e 15083+
4bf69007
AM
15084+#if 0
15085+void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
d337f35e 15086+{
4bf69007
AM
15087+ // p->cap_effective &= vxi->vx_cap_bset;
15088+ p->cap_effective =
15089+ cap_intersect(p->cap_effective, vxi->cap_bset);
15090+ // p->cap_inheritable &= vxi->vx_cap_bset;
15091+ p->cap_inheritable =
15092+ cap_intersect(p->cap_inheritable, vxi->cap_bset);
15093+ // p->cap_permitted &= vxi->vx_cap_bset;
15094+ p->cap_permitted =
15095+ cap_intersect(p->cap_permitted, vxi->cap_bset);
15096+}
15097+#endif
d337f35e
JR
15098+
15099+
4bf69007
AM
15100+#include <linux/file.h>
15101+#include <linux/fdtable.h>
d337f35e 15102+
4bf69007
AM
15103+static int vx_openfd_task(struct task_struct *tsk)
15104+{
15105+ struct files_struct *files = tsk->files;
15106+ struct fdtable *fdt;
15107+ const unsigned long *bptr;
15108+ int count, total;
d337f35e 15109+
4bf69007
AM
15110+ /* no rcu_read_lock() because of spin_lock() */
15111+ spin_lock(&files->file_lock);
15112+ fdt = files_fdtable(files);
15113+ bptr = fdt->open_fds;
15114+ count = fdt->max_fds / (sizeof(unsigned long) * 8);
15115+ for (total = 0; count > 0; count--) {
15116+ if (*bptr)
15117+ total += hweight_long(*bptr);
15118+ bptr++;
15119+ }
15120+ spin_unlock(&files->file_lock);
15121+ return total;
d337f35e
JR
15122+}
15123+
d337f35e 15124+
4bf69007
AM
15125+/* for *space compatibility */
15126+
15127+asmlinkage long sys_unshare(unsigned long);
15128+
15129+/*
15130+ * migrate task to new context
15131+ * gets vxi, puts old_vxi on change
15132+ * optionally unshares namespaces (hack)
2380c486 15133+ */
4bf69007
AM
15134+
15135+int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
2380c486 15136+{
4bf69007
AM
15137+ struct vx_info *old_vxi;
15138+ int ret = 0;
d337f35e 15139+
4bf69007
AM
15140+ if (!p || !vxi)
15141+ BUG();
d337f35e 15142+
4bf69007
AM
15143+ vxdprintk(VXD_CBIT(xid, 5),
15144+ "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
15145+ vxi->vx_id, atomic_read(&vxi->vx_usecnt));
d337f35e 15146+
4bf69007
AM
15147+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
15148+ !vx_info_flags(vxi, VXF_STATE_SETUP, 0))
15149+ return -EACCES;
2380c486 15150+
4bf69007
AM
15151+ if (vx_info_state(vxi, VXS_SHUTDOWN))
15152+ return -EFAULT;
d337f35e 15153+
4bf69007
AM
15154+ old_vxi = task_get_vx_info(p);
15155+ if (old_vxi == vxi)
15156+ goto out;
d337f35e 15157+
4bf69007
AM
15158+// if (!(ret = vx_migrate_user(p, vxi))) {
15159+ {
15160+ int openfd;
d337f35e 15161+
4bf69007
AM
15162+ task_lock(p);
15163+ openfd = vx_openfd_task(p);
15164+
15165+ if (old_vxi) {
15166+ atomic_dec(&old_vxi->cvirt.nr_threads);
15167+ atomic_dec(&old_vxi->cvirt.nr_running);
15168+ __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
15169+ /* FIXME: what about the struct files here? */
15170+ __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
15171+ /* account for the executable */
15172+ __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
2380c486 15173+ }
4bf69007
AM
15174+ atomic_inc(&vxi->cvirt.nr_threads);
15175+ atomic_inc(&vxi->cvirt.nr_running);
15176+ __rlim_inc(&vxi->limit, RLIMIT_NPROC);
15177+ /* FIXME: what about the struct files here? */
15178+ __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
15179+ /* account for the executable */
15180+ __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
2380c486 15181+
4bf69007
AM
15182+ if (old_vxi) {
15183+ release_vx_info(old_vxi, p);
15184+ clr_vx_info(&p->vx_info);
15185+ }
15186+ claim_vx_info(vxi, p);
15187+ set_vx_info(&p->vx_info, vxi);
15188+ p->xid = vxi->vx_id;
d337f35e 15189+
4bf69007
AM
15190+ vxdprintk(VXD_CBIT(xid, 5),
15191+ "moved task %p into vxi:%p[#%d]",
15192+ p, vxi, vxi->vx_id);
d337f35e 15193+
4bf69007
AM
15194+ // vx_mask_cap_bset(vxi, p);
15195+ task_unlock(p);
d337f35e 15196+
4bf69007
AM
15197+ /* hack for *spaces to provide compatibility */
15198+ if (unshare) {
15199+ struct nsproxy *old_nsp, *new_nsp;
d337f35e 15200+
4bf69007
AM
15201+ ret = unshare_nsproxy_namespaces(
15202+ CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER,
b00e13aa 15203+ &new_nsp, NULL, NULL);
4bf69007
AM
15204+ if (ret)
15205+ goto out;
d337f35e 15206+
4bf69007
AM
15207+ old_nsp = xchg(&p->nsproxy, new_nsp);
15208+ vx_set_space(vxi,
15209+ CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER, 0);
15210+ put_nsproxy(old_nsp);
15211+ }
15212+ }
15213+out:
15214+ put_vx_info(old_vxi);
2380c486
JR
15215+ return ret;
15216+}
d337f35e 15217+
4bf69007 15218+int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
d337f35e 15219+{
4bf69007
AM
15220+ struct task_struct *old_reaper;
15221+ struct vx_info *reaper_vxi;
d337f35e 15222+
4bf69007
AM
15223+ if (!vxi)
15224+ return -EINVAL;
d337f35e 15225+
4bf69007
AM
15226+ vxdprintk(VXD_CBIT(xid, 6),
15227+ "vx_set_reaper(%p[#%d],%p[#%d,%d])",
15228+ vxi, vxi->vx_id, p, p->xid, p->pid);
d337f35e 15229+
4bf69007
AM
15230+ old_reaper = vxi->vx_reaper;
15231+ if (old_reaper == p)
15232+ return 0;
d337f35e 15233+
4bf69007
AM
15234+ reaper_vxi = task_get_vx_info(p);
15235+ if (reaper_vxi && reaper_vxi != vxi) {
15236+ vxwprintk(1,
15237+ "Unsuitable reaper [" VS_Q("%s") ",%u:#%u] "
15238+ "for [xid #%u]",
15239+ p->comm, p->pid, p->xid, vx_current_xid());
2380c486
JR
15240+ goto out;
15241+ }
4bf69007
AM
15242+
15243+ /* set new child reaper */
15244+ get_task_struct(p);
15245+ vxi->vx_reaper = p;
15246+ put_task_struct(old_reaper);
2380c486 15247+out:
4bf69007
AM
15248+ put_vx_info(reaper_vxi);
15249+ return 0;
2380c486 15250+}
d337f35e 15251+
4bf69007 15252+int vx_set_init(struct vx_info *vxi, struct task_struct *p)
d337f35e 15253+{
4bf69007
AM
15254+ if (!vxi)
15255+ return -EINVAL;
d337f35e 15256+
4bf69007
AM
15257+ vxdprintk(VXD_CBIT(xid, 6),
15258+ "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
15259+ vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
d337f35e 15260+
4bf69007
AM
15261+ vxi->vx_flags &= ~VXF_STATE_INIT;
15262+ // vxi->vx_initpid = p->tgid;
15263+ vxi->vx_initpid = p->pid;
2380c486 15264+ return 0;
d337f35e
JR
15265+}
15266+
4bf69007 15267+void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
d337f35e 15268+{
4bf69007
AM
15269+ vxdprintk(VXD_CBIT(xid, 6),
15270+ "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
15271+ vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
2380c486 15272+
4bf69007
AM
15273+ vxi->exit_code = code;
15274+ vxi->vx_initpid = 0;
d337f35e
JR
15275+}
15276+
2380c486 15277+
4bf69007 15278+void vx_set_persistent(struct vx_info *vxi)
d337f35e 15279+{
4bf69007
AM
15280+ vxdprintk(VXD_CBIT(xid, 6),
15281+ "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
2380c486 15282+
4bf69007
AM
15283+ get_vx_info(vxi);
15284+ claim_vx_info(vxi, NULL);
d337f35e
JR
15285+}
15286+
4bf69007 15287+void vx_clear_persistent(struct vx_info *vxi)
2380c486 15288+{
4bf69007
AM
15289+ vxdprintk(VXD_CBIT(xid, 6),
15290+ "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
d337f35e 15291+
4bf69007
AM
15292+ release_vx_info(vxi, NULL);
15293+ put_vx_info(vxi);
2380c486 15294+}
d337f35e 15295+
4bf69007 15296+void vx_update_persistent(struct vx_info *vxi)
d337f35e 15297+{
4bf69007
AM
15298+ if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
15299+ vx_set_persistent(vxi);
2380c486 15300+ else
4bf69007 15301+ vx_clear_persistent(vxi);
2380c486 15302+}
d337f35e 15303+
d337f35e 15304+
4bf69007
AM
15305+/* task must be current or locked */
15306+
15307+void exit_vx_info(struct task_struct *p, int code)
2380c486 15308+{
4bf69007 15309+ struct vx_info *vxi = p->vx_info;
d337f35e 15310+
4bf69007
AM
15311+ if (vxi) {
15312+ atomic_dec(&vxi->cvirt.nr_threads);
15313+ vx_nproc_dec(p);
d337f35e 15314+
4bf69007
AM
15315+ vxi->exit_code = code;
15316+ release_vx_info(vxi, p);
15317+ }
2380c486 15318+}
d337f35e 15319+
4bf69007 15320+void exit_vx_info_early(struct task_struct *p, int code)
2380c486 15321+{
4bf69007 15322+ struct vx_info *vxi = p->vx_info;
d337f35e 15323+
4bf69007
AM
15324+ if (vxi) {
15325+ if (vxi->vx_initpid == p->pid)
15326+ vx_exit_init(vxi, p, code);
15327+ if (vxi->vx_reaper == p)
15328+ vx_set_reaper(vxi, init_pid_ns.child_reaper);
15329+ }
d337f35e
JR
15330+}
15331+
15332+
4bf69007 15333+/* vserver syscall commands below here */
d337f35e 15334+
4bf69007 15335+/* taks xid and vx_info functions */
d337f35e 15336+
4bf69007 15337+#include <asm/uaccess.h>
d337f35e 15338+
d337f35e 15339+
4bf69007 15340+int vc_task_xid(uint32_t id)
d337f35e 15341+{
61333608 15342+ vxid_t xid;
d337f35e 15343+
4bf69007
AM
15344+ if (id) {
15345+ struct task_struct *tsk;
d337f35e 15346+
4bf69007
AM
15347+ rcu_read_lock();
15348+ tsk = find_task_by_real_pid(id);
15349+ xid = (tsk) ? tsk->xid : -ESRCH;
15350+ rcu_read_unlock();
15351+ } else
15352+ xid = vx_current_xid();
15353+ return xid;
d337f35e
JR
15354+}
15355+
d337f35e 15356+
4bf69007
AM
15357+int vc_vx_info(struct vx_info *vxi, void __user *data)
15358+{
15359+ struct vcmd_vx_info_v0 vc_data;
d337f35e 15360+
4bf69007
AM
15361+ vc_data.xid = vxi->vx_id;
15362+ vc_data.initpid = vxi->vx_initpid;
d337f35e 15363+
4bf69007
AM
15364+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15365+ return -EFAULT;
15366+ return 0;
15367+}
d337f35e 15368+
d337f35e 15369+
4bf69007 15370+int vc_ctx_stat(struct vx_info *vxi, void __user *data)
d337f35e 15371+{
4bf69007 15372+ struct vcmd_ctx_stat_v0 vc_data;
d337f35e 15373+
4bf69007
AM
15374+ vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
15375+ vc_data.tasks = atomic_read(&vxi->vx_tasks);
d337f35e 15376+
4bf69007
AM
15377+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15378+ return -EFAULT;
15379+ return 0;
d337f35e
JR
15380+}
15381+
d337f35e 15382+
4bf69007 15383+/* context functions */
d337f35e 15384+
4bf69007 15385+int vc_ctx_create(uint32_t xid, void __user *data)
d337f35e 15386+{
4bf69007
AM
15387+ struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
15388+ struct vx_info *new_vxi;
15389+ int ret;
d337f35e 15390+
4bf69007
AM
15391+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
15392+ return -EFAULT;
d337f35e 15393+
4bf69007
AM
15394+ if ((xid > MAX_S_CONTEXT) || (xid < 2))
15395+ return -EINVAL;
d337f35e 15396+
4bf69007
AM
15397+ new_vxi = __create_vx_info(xid);
15398+ if (IS_ERR(new_vxi))
15399+ return PTR_ERR(new_vxi);
d337f35e 15400+
4bf69007
AM
15401+ /* initial flags */
15402+ new_vxi->vx_flags = vc_data.flagword;
d337f35e 15403+
4bf69007
AM
15404+ ret = -ENOEXEC;
15405+ if (vs_state_change(new_vxi, VSC_STARTUP))
15406+ goto out;
d337f35e 15407+
4bf69007
AM
15408+ ret = vx_migrate_task(current, new_vxi, (!data));
15409+ if (ret)
15410+ goto out;
d337f35e 15411+
4bf69007
AM
15412+ /* return context id on success */
15413+ ret = new_vxi->vx_id;
d337f35e 15414+
4bf69007
AM
15415+ /* get a reference for persistent contexts */
15416+ if ((vc_data.flagword & VXF_PERSISTENT))
15417+ vx_set_persistent(new_vxi);
15418+out:
15419+ release_vx_info(new_vxi, NULL);
15420+ put_vx_info(new_vxi);
15421+ return ret;
15422+}
d337f35e
JR
15423+
15424+
4bf69007 15425+int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
d337f35e 15426+{
4bf69007
AM
15427+ struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
15428+ int ret;
d337f35e 15429+
4bf69007
AM
15430+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
15431+ return -EFAULT;
d337f35e 15432+
4bf69007
AM
15433+ ret = vx_migrate_task(current, vxi, 0);
15434+ if (ret)
15435+ return ret;
15436+ if (vc_data.flagword & VXM_SET_INIT)
15437+ ret = vx_set_init(vxi, current);
15438+ if (ret)
15439+ return ret;
15440+ if (vc_data.flagword & VXM_SET_REAPER)
15441+ ret = vx_set_reaper(vxi, current);
15442+ return ret;
15443+}
d337f35e 15444+
d337f35e 15445+
4bf69007 15446+int vc_get_cflags(struct vx_info *vxi, void __user *data)
d337f35e 15447+{
4bf69007 15448+ struct vcmd_ctx_flags_v0 vc_data;
d337f35e 15449+
4bf69007 15450+ vc_data.flagword = vxi->vx_flags;
d337f35e 15451+
4bf69007
AM
15452+ /* special STATE flag handling */
15453+ vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
d337f35e 15454+
4bf69007
AM
15455+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15456+ return -EFAULT;
15457+ return 0;
d337f35e
JR
15458+}
15459+
4bf69007
AM
15460+int vc_set_cflags(struct vx_info *vxi, void __user *data)
15461+{
15462+ struct vcmd_ctx_flags_v0 vc_data;
15463+ uint64_t mask, trigger;
d337f35e 15464+
4bf69007
AM
15465+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15466+ return -EFAULT;
d337f35e 15467+
4bf69007
AM
15468+ /* special STATE flag handling */
15469+ mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
15470+ trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
d337f35e 15471+
4bf69007
AM
15472+ if (vxi == current_vx_info()) {
15473+ /* if (trigger & VXF_STATE_SETUP)
15474+ vx_mask_cap_bset(vxi, current); */
15475+ if (trigger & VXF_STATE_INIT) {
15476+ int ret;
d337f35e 15477+
4bf69007
AM
15478+ ret = vx_set_init(vxi, current);
15479+ if (ret)
15480+ return ret;
15481+ ret = vx_set_reaper(vxi, current);
15482+ if (ret)
15483+ return ret;
d337f35e
JR
15484+ }
15485+ }
4bf69007
AM
15486+
15487+ vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
15488+ vc_data.flagword, mask);
15489+ if (trigger & VXF_PERSISTENT)
15490+ vx_update_persistent(vxi);
15491+
15492+ return 0;
d337f35e
JR
15493+}
15494+
15495+
4bf69007 15496+static inline uint64_t caps_from_cap_t(kernel_cap_t c)
d337f35e 15497+{
4bf69007 15498+ uint64_t v = c.cap[0] | ((uint64_t)c.cap[1] << 32);
d337f35e 15499+
4bf69007
AM
15500+ // printk("caps_from_cap_t(%08x:%08x) = %016llx\n", c.cap[1], c.cap[0], v);
15501+ return v;
d337f35e
JR
15502+}
15503+
4bf69007 15504+static inline kernel_cap_t cap_t_from_caps(uint64_t v)
d337f35e 15505+{
4bf69007 15506+ kernel_cap_t c = __cap_empty_set;
d337f35e 15507+
4bf69007
AM
15508+ c.cap[0] = v & 0xFFFFFFFF;
15509+ c.cap[1] = (v >> 32) & 0xFFFFFFFF;
d337f35e 15510+
4bf69007
AM
15511+ // printk("cap_t_from_caps(%016llx) = %08x:%08x\n", v, c.cap[1], c.cap[0]);
15512+ return c;
d337f35e
JR
15513+}
15514+
15515+
4bf69007 15516+static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
d337f35e 15517+{
4bf69007
AM
15518+ if (bcaps)
15519+ *bcaps = caps_from_cap_t(vxi->vx_bcaps);
15520+ if (ccaps)
15521+ *ccaps = vxi->vx_ccaps;
d337f35e 15522+
4bf69007
AM
15523+ return 0;
15524+}
d337f35e 15525+
4bf69007
AM
15526+int vc_get_ccaps(struct vx_info *vxi, void __user *data)
15527+{
15528+ struct vcmd_ctx_caps_v1 vc_data;
15529+ int ret;
d337f35e 15530+
4bf69007
AM
15531+ ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
15532+ if (ret)
15533+ return ret;
15534+ vc_data.cmask = ~0ULL;
d337f35e 15535+
4bf69007
AM
15536+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15537+ return -EFAULT;
15538+ return 0;
d337f35e
JR
15539+}
15540+
4bf69007
AM
15541+static int do_set_caps(struct vx_info *vxi,
15542+ uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
d337f35e 15543+{
4bf69007 15544+ uint64_t bcold = caps_from_cap_t(vxi->vx_bcaps);
d337f35e 15545+
4bf69007
AM
15546+#if 0
15547+ printk("do_set_caps(%16llx, %16llx, %16llx, %16llx)\n",
15548+ bcaps, bmask, ccaps, cmask);
15549+#endif
15550+ vxi->vx_bcaps = cap_t_from_caps(
15551+ vs_mask_flags(bcold, bcaps, bmask));
15552+ vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
d337f35e 15553+
4bf69007 15554+ return 0;
d337f35e
JR
15555+}
15556+
4bf69007 15557+int vc_set_ccaps(struct vx_info *vxi, void __user *data)
d337f35e 15558+{
4bf69007 15559+ struct vcmd_ctx_caps_v1 vc_data;
d337f35e 15560+
2380c486 15561+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15562+ return -EFAULT;
15563+
4bf69007 15564+ return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
d337f35e
JR
15565+}
15566+
4bf69007 15567+int vc_get_bcaps(struct vx_info *vxi, void __user *data)
d337f35e 15568+{
4bf69007
AM
15569+ struct vcmd_bcaps vc_data;
15570+ int ret;
d337f35e 15571+
4bf69007
AM
15572+ ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
15573+ if (ret)
15574+ return ret;
15575+ vc_data.bmask = ~0ULL;
d337f35e 15576+
4bf69007
AM
15577+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15578+ return -EFAULT;
15579+ return 0;
d337f35e
JR
15580+}
15581+
4bf69007 15582+int vc_set_bcaps(struct vx_info *vxi, void __user *data)
d337f35e 15583+{
4bf69007 15584+ struct vcmd_bcaps vc_data;
d337f35e 15585+
2380c486 15586+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15587+ return -EFAULT;
15588+
4bf69007 15589+ return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
d337f35e
JR
15590+}
15591+
d337f35e 15592+
4bf69007 15593+int vc_get_umask(struct vx_info *vxi, void __user *data)
d337f35e 15594+{
4bf69007 15595+ struct vcmd_umask vc_data;
7e46296a 15596+
4bf69007
AM
15597+ vc_data.umask = vxi->vx_umask;
15598+ vc_data.mask = ~0ULL;
d337f35e 15599+
4bf69007
AM
15600+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15601+ return -EFAULT;
15602+ return 0;
15603+}
d337f35e 15604+
4bf69007
AM
15605+int vc_set_umask(struct vx_info *vxi, void __user *data)
15606+{
15607+ struct vcmd_umask vc_data;
d337f35e 15608+
4bf69007
AM
15609+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15610+ return -EFAULT;
7e46296a 15611+
4bf69007
AM
15612+ vxi->vx_umask = vs_mask_flags(vxi->vx_umask,
15613+ vc_data.umask, vc_data.mask);
15614+ return 0;
15615+}
7e46296a 15616+
d337f35e 15617+
4bf69007
AM
15618+int vc_get_wmask(struct vx_info *vxi, void __user *data)
15619+{
15620+ struct vcmd_wmask vc_data;
d337f35e 15621+
4bf69007
AM
15622+ vc_data.wmask = vxi->vx_wmask;
15623+ vc_data.mask = ~0ULL;
d337f35e 15624+
4bf69007
AM
15625+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15626+ return -EFAULT;
15627+ return 0;
d337f35e
JR
15628+}
15629+
4bf69007 15630+int vc_set_wmask(struct vx_info *vxi, void __user *data)
d337f35e 15631+{
4bf69007 15632+ struct vcmd_wmask vc_data;
d337f35e 15633+
2380c486 15634+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15635+ return -EFAULT;
15636+
4bf69007
AM
15637+ vxi->vx_wmask = vs_mask_flags(vxi->vx_wmask,
15638+ vc_data.wmask, vc_data.mask);
15639+ return 0;
d337f35e
JR
15640+}
15641+
d337f35e 15642+
4bf69007 15643+int vc_get_badness(struct vx_info *vxi, void __user *data)
d337f35e 15644+{
4bf69007
AM
15645+ struct vcmd_badness_v0 vc_data;
15646+
15647+ vc_data.bias = vxi->vx_badness_bias;
15648+
15649+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15650+ return -EFAULT;
15651+ return 0;
15652+}
15653+
15654+int vc_set_badness(struct vx_info *vxi, void __user *data)
15655+{
15656+ struct vcmd_badness_v0 vc_data;
d337f35e 15657+
2380c486 15658+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
15659+ return -EFAULT;
15660+
4bf69007
AM
15661+ vxi->vx_badness_bias = vc_data.bias;
15662+ return 0;
d337f35e
JR
15663+}
15664+
4bf69007 15665+#include <linux/module.h>
d337f35e 15666+
4bf69007 15667+EXPORT_SYMBOL_GPL(free_vx_info);
d337f35e 15668+
09a55596
AM
15669diff -NurpP --minimal linux-4.9.135/kernel/vserver/cvirt.c linux-4.9.135-vs2.3.9.8/kernel/vserver/cvirt.c
15670--- linux-4.9.135/kernel/vserver/cvirt.c 1970-01-01 00:00:00.000000000 +0000
15671+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/cvirt.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 15672@@ -0,0 +1,350 @@
4bf69007
AM
15673+/*
15674+ * linux/kernel/vserver/cvirt.c
15675+ *
15676+ * Virtual Server: Context Virtualization
15677+ *
cc23e853 15678+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
15679+ *
15680+ * V0.01 broken out from limit.c
15681+ * V0.02 added utsname stuff
15682+ * V0.03 changed vcmds to vxi arg
15683+ *
15684+ */
d337f35e 15685+
4bf69007
AM
15686+#include <linux/types.h>
15687+#include <linux/utsname.h>
15688+#include <linux/vs_cvirt.h>
15689+#include <linux/vserver/switch.h>
15690+#include <linux/vserver/cvirt_cmd.h>
d337f35e 15691+
4bf69007 15692+#include <asm/uaccess.h>
d337f35e 15693+
d337f35e 15694+
369dbd59 15695+void vx_vsi_boottime64(struct timespec64 *boottime)
4bf69007
AM
15696+{
15697+ struct vx_info *vxi = current_vx_info();
d337f35e 15698+
369dbd59 15699+ set_normalized_timespec64(boottime,
4bf69007
AM
15700+ boottime->tv_sec + vxi->cvirt.bias_uptime.tv_sec,
15701+ boottime->tv_nsec + vxi->cvirt.bias_uptime.tv_nsec);
15702+ return;
d337f35e
JR
15703+}
15704+
4bf69007 15705+void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
d337f35e 15706+{
4bf69007 15707+ struct vx_info *vxi = current_vx_info();
d337f35e 15708+
4bf69007
AM
15709+ set_normalized_timespec(uptime,
15710+ uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
15711+ uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
15712+ if (!idle)
15713+ return;
15714+ set_normalized_timespec(idle,
15715+ idle->tv_sec - vxi->cvirt.bias_idle.tv_sec,
15716+ idle->tv_nsec - vxi->cvirt.bias_idle.tv_nsec);
15717+ return;
d337f35e
JR
15718+}
15719+
4bf69007 15720+uint64_t vx_idle_jiffies(void)
d337f35e 15721+{
4bf69007 15722+ return init_task.utime + init_task.stime;
d337f35e
JR
15723+}
15724+
d337f35e
JR
15725+
15726+
4bf69007
AM
15727+static inline uint32_t __update_loadavg(uint32_t load,
15728+ int wsize, int delta, int n)
d337f35e 15729+{
4bf69007 15730+ unsigned long long calc, prev;
d337f35e 15731+
4bf69007
AM
15732+ /* just set it to n */
15733+ if (unlikely(delta >= wsize))
15734+ return (n << FSHIFT);
d337f35e 15735+
4bf69007
AM
15736+ calc = delta * n;
15737+ calc <<= FSHIFT;
15738+ prev = (wsize - delta);
15739+ prev *= load;
15740+ calc += prev;
15741+ do_div(calc, wsize);
15742+ return calc;
15743+}
d337f35e 15744+
d337f35e 15745+
4bf69007
AM
15746+void vx_update_load(struct vx_info *vxi)
15747+{
15748+ uint32_t now, last, delta;
15749+ unsigned int nr_running, nr_uninterruptible;
15750+ unsigned int total;
15751+ unsigned long flags;
d337f35e 15752+
4bf69007 15753+ spin_lock_irqsave(&vxi->cvirt.load_lock, flags);
d337f35e 15754+
4bf69007
AM
15755+ now = jiffies;
15756+ last = vxi->cvirt.load_last;
15757+ delta = now - last;
d337f35e 15758+
4bf69007
AM
15759+ if (delta < 5*HZ)
15760+ goto out;
d337f35e 15761+
4bf69007
AM
15762+ nr_running = atomic_read(&vxi->cvirt.nr_running);
15763+ nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
15764+ total = nr_running + nr_uninterruptible;
d337f35e 15765+
4bf69007
AM
15766+ vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
15767+ 60*HZ, delta, total);
15768+ vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
15769+ 5*60*HZ, delta, total);
15770+ vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
15771+ 15*60*HZ, delta, total);
d337f35e 15772+
4bf69007
AM
15773+ vxi->cvirt.load_last = now;
15774+out:
15775+ atomic_inc(&vxi->cvirt.load_updates);
15776+ spin_unlock_irqrestore(&vxi->cvirt.load_lock, flags);
d337f35e
JR
15777+}
15778+
d337f35e 15779+
d337f35e 15780+/*
4bf69007 15781+ * Commands to do_syslog:
d337f35e 15782+ *
4bf69007
AM
15783+ * 0 -- Close the log. Currently a NOP.
15784+ * 1 -- Open the log. Currently a NOP.
15785+ * 2 -- Read from the log.
15786+ * 3 -- Read all messages remaining in the ring buffer.
15787+ * 4 -- Read and clear all messages remaining in the ring buffer
15788+ * 5 -- Clear ring buffer.
15789+ * 6 -- Disable printk's to console
15790+ * 7 -- Enable printk's to console
15791+ * 8 -- Set level of messages printed to console
15792+ * 9 -- Return number of unread characters in the log buffer
15793+ * 10 -- Return size of the log buffer
d337f35e 15794+ */
4bf69007
AM
15795+int vx_do_syslog(int type, char __user *buf, int len)
15796+{
15797+ int error = 0;
15798+ int do_clear = 0;
15799+ struct vx_info *vxi = current_vx_info();
15800+ struct _vx_syslog *log;
d337f35e 15801+
4bf69007
AM
15802+ if (!vxi)
15803+ return -EINVAL;
15804+ log = &vxi->cvirt.syslog;
15805+
15806+ switch (type) {
15807+ case 0: /* Close log */
15808+ case 1: /* Open log */
15809+ break;
15810+ case 2: /* Read from log */
15811+ error = wait_event_interruptible(log->log_wait,
15812+ (log->log_start - log->log_end));
15813+ if (error)
15814+ break;
15815+ spin_lock_irq(&log->logbuf_lock);
15816+ spin_unlock_irq(&log->logbuf_lock);
15817+ break;
15818+ case 4: /* Read/clear last kernel messages */
15819+ do_clear = 1;
15820+ /* fall through */
15821+ case 3: /* Read last kernel messages */
15822+ return 0;
d337f35e 15823+
4bf69007
AM
15824+ case 5: /* Clear ring buffer */
15825+ return 0;
d337f35e 15826+
4bf69007
AM
15827+ case 6: /* Disable logging to console */
15828+ case 7: /* Enable logging to console */
15829+ case 8: /* Set level of messages printed to console */
15830+ break;
d337f35e 15831+
4bf69007
AM
15832+ case 9: /* Number of chars in the log buffer */
15833+ return 0;
15834+ case 10: /* Size of the log buffer */
15835+ return 0;
15836+ default:
15837+ error = -EINVAL;
15838+ break;
15839+ }
15840+ return error;
1e8b8f9b 15841+}
d337f35e 15842+
4bf69007
AM
15843+
15844+/* virtual host info names */
15845+
15846+static char *vx_vhi_name(struct vx_info *vxi, int id)
d337f35e 15847+{
4bf69007
AM
15848+ struct nsproxy *nsproxy;
15849+ struct uts_namespace *uts;
d337f35e 15850+
4bf69007
AM
15851+ if (id == VHIN_CONTEXT)
15852+ return vxi->vx_name;
15853+
15854+ nsproxy = vxi->space[0].vx_nsproxy;
15855+ if (!nsproxy)
15856+ return NULL;
15857+
15858+ uts = nsproxy->uts_ns;
15859+ if (!uts)
15860+ return NULL;
15861+
15862+ switch (id) {
15863+ case VHIN_SYSNAME:
15864+ return uts->name.sysname;
15865+ case VHIN_NODENAME:
15866+ return uts->name.nodename;
15867+ case VHIN_RELEASE:
15868+ return uts->name.release;
15869+ case VHIN_VERSION:
15870+ return uts->name.version;
15871+ case VHIN_MACHINE:
15872+ return uts->name.machine;
15873+ case VHIN_DOMAINNAME:
15874+ return uts->name.domainname;
15875+ default:
15876+ return NULL;
d337f35e 15877+ }
4bf69007 15878+ return NULL;
d337f35e
JR
15879+}
15880+
4bf69007 15881+int vc_set_vhi_name(struct vx_info *vxi, void __user *data)
d337f35e 15882+{
4bf69007
AM
15883+ struct vcmd_vhi_name_v0 vc_data;
15884+ char *name;
d337f35e 15885+
4bf69007
AM
15886+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15887+ return -EFAULT;
d337f35e 15888+
4bf69007
AM
15889+ name = vx_vhi_name(vxi, vc_data.field);
15890+ if (!name)
15891+ return -EINVAL;
d337f35e 15892+
4bf69007
AM
15893+ memcpy(name, vc_data.name, 65);
15894+ return 0;
15895+}
d337f35e 15896+
4bf69007
AM
15897+int vc_get_vhi_name(struct vx_info *vxi, void __user *data)
15898+{
15899+ struct vcmd_vhi_name_v0 vc_data;
15900+ char *name;
d337f35e 15901+
4bf69007
AM
15902+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
15903+ return -EFAULT;
d337f35e 15904+
4bf69007
AM
15905+ name = vx_vhi_name(vxi, vc_data.field);
15906+ if (!name)
15907+ return -EINVAL;
d337f35e 15908+
4bf69007
AM
15909+ memcpy(vc_data.name, name, 65);
15910+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15911+ return -EFAULT;
15912+ return 0;
15913+}
d337f35e 15914+
d337f35e 15915+
4bf69007
AM
15916+int vc_virt_stat(struct vx_info *vxi, void __user *data)
15917+{
15918+ struct vcmd_virt_stat_v0 vc_data;
15919+ struct _vx_cvirt *cvirt = &vxi->cvirt;
cc23e853 15920+ struct timespec64 uptime;
99a884b4 15921+
369dbd59
AM
15922+ ktime_get_ts64(&uptime);
15923+ set_normalized_timespec64(&uptime,
4bf69007
AM
15924+ uptime.tv_sec - cvirt->bias_uptime.tv_sec,
15925+ uptime.tv_nsec - cvirt->bias_uptime.tv_nsec);
d337f35e 15926+
cc23e853
AM
15927+ vc_data.offset = timespec64_to_ns(&cvirt->bias_ts);
15928+ vc_data.uptime = timespec64_to_ns(&uptime);
4bf69007
AM
15929+ vc_data.nr_threads = atomic_read(&cvirt->nr_threads);
15930+ vc_data.nr_running = atomic_read(&cvirt->nr_running);
15931+ vc_data.nr_uninterruptible = atomic_read(&cvirt->nr_uninterruptible);
15932+ vc_data.nr_onhold = atomic_read(&cvirt->nr_onhold);
15933+ vc_data.nr_forks = atomic_read(&cvirt->total_forks);
15934+ vc_data.load[0] = cvirt->load[0];
15935+ vc_data.load[1] = cvirt->load[1];
15936+ vc_data.load[2] = cvirt->load[2];
15937+
15938+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
15939+ return -EFAULT;
15940+ return 0;
d337f35e
JR
15941+}
15942+
15943+
4bf69007
AM
15944+#ifdef CONFIG_VSERVER_VTIME
15945+
15946+/* virtualized time base */
15947+
15948+void vx_adjust_timespec(struct timespec *ts)
d337f35e 15949+{
4bf69007 15950+ struct vx_info *vxi;
d337f35e 15951+
4bf69007
AM
15952+ if (!vx_flags(VXF_VIRT_TIME, 0))
15953+ return;
d337f35e 15954+
4bf69007
AM
15955+ vxi = current_vx_info();
15956+ ts->tv_sec += vxi->cvirt.bias_ts.tv_sec;
15957+ ts->tv_nsec += vxi->cvirt.bias_ts.tv_nsec;
d337f35e 15958+
4bf69007
AM
15959+ if (ts->tv_nsec >= NSEC_PER_SEC) {
15960+ ts->tv_sec++;
15961+ ts->tv_nsec -= NSEC_PER_SEC;
15962+ } else if (ts->tv_nsec < 0) {
15963+ ts->tv_sec--;
15964+ ts->tv_nsec += NSEC_PER_SEC;
d337f35e 15965+ }
d337f35e
JR
15966+}
15967+
cc23e853
AM
15968+void vx_adjust_timespec64(struct timespec64 *ts)
15969+{
15970+ struct vx_info *vxi;
15971+
15972+ if (!vx_flags(VXF_VIRT_TIME, 0))
15973+ return;
15974+
15975+ vxi = current_vx_info();
15976+ ts->tv_sec += vxi->cvirt.bias_ts.tv_sec;
15977+ ts->tv_nsec += vxi->cvirt.bias_ts.tv_nsec;
15978+
15979+ if (ts->tv_nsec >= NSEC_PER_SEC) {
15980+ ts->tv_sec++;
15981+ ts->tv_nsec -= NSEC_PER_SEC;
15982+ } else if (ts->tv_nsec < 0) {
15983+ ts->tv_sec--;
15984+ ts->tv_nsec += NSEC_PER_SEC;
15985+ }
15986+}
15987+
4bf69007 15988+int vx_settimeofday(const struct timespec *ts)
99a884b4 15989+{
4bf69007
AM
15990+ struct timespec ats, delta;
15991+ struct vx_info *vxi;
99a884b4 15992+
4bf69007
AM
15993+ if (!vx_flags(VXF_VIRT_TIME, 0))
15994+ return do_settimeofday(ts);
99a884b4 15995+
4bf69007
AM
15996+ getnstimeofday(&ats);
15997+ delta = timespec_sub(*ts, ats);
99a884b4 15998+
4bf69007 15999+ vxi = current_vx_info();
cc23e853
AM
16000+ vxi->cvirt.bias_ts = timespec64_add(vxi->cvirt.bias_ts,
16001+ timespec_to_timespec64(delta));
16002+ return 0;
16003+}
16004+
16005+int vx_settimeofday64(const struct timespec64 *ts)
16006+{
16007+ struct timespec64 ats, delta;
16008+ struct vx_info *vxi;
16009+
16010+ if (!vx_flags(VXF_VIRT_TIME, 0))
16011+ return do_settimeofday64(ts);
16012+
16013+ getnstimeofday64(&ats);
16014+ delta = timespec64_sub(*ts, ats);
16015+
16016+ vxi = current_vx_info();
16017+ vxi->cvirt.bias_ts = timespec64_add(vxi->cvirt.bias_ts, delta);
99a884b4
AM
16018+ return 0;
16019+}
d337f35e 16020+
4bf69007 16021+#endif
d337f35e 16022+
09a55596
AM
16023diff -NurpP --minimal linux-4.9.135/kernel/vserver/cvirt_init.h linux-4.9.135-vs2.3.9.8/kernel/vserver/cvirt_init.h
16024--- linux-4.9.135/kernel/vserver/cvirt_init.h 1970-01-01 00:00:00.000000000 +0000
16025+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/cvirt_init.h 2018-10-20 04:58:15.000000000 +0000
4bf69007 16026@@ -0,0 +1,70 @@
d337f35e 16027+
d337f35e 16028+
4bf69007 16029+extern uint64_t vx_idle_jiffies(void);
d337f35e 16030+
4bf69007
AM
16031+static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
16032+{
16033+ uint64_t idle_jiffies = vx_idle_jiffies();
16034+ uint64_t nsuptime;
d337f35e 16035+
cc23e853 16036+ ktime_get_ts64(&cvirt->bias_uptime);
4bf69007
AM
16037+ nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
16038+ * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
16039+ cvirt->bias_clock = nsec_to_clock_t(nsuptime);
16040+ cvirt->bias_ts.tv_sec = 0;
16041+ cvirt->bias_ts.tv_nsec = 0;
d337f35e 16042+
cc23e853 16043+ jiffies_to_timespec64(idle_jiffies, &cvirt->bias_idle);
4bf69007
AM
16044+ atomic_set(&cvirt->nr_threads, 0);
16045+ atomic_set(&cvirt->nr_running, 0);
16046+ atomic_set(&cvirt->nr_uninterruptible, 0);
16047+ atomic_set(&cvirt->nr_onhold, 0);
d337f35e 16048+
4bf69007
AM
16049+ spin_lock_init(&cvirt->load_lock);
16050+ cvirt->load_last = jiffies;
16051+ atomic_set(&cvirt->load_updates, 0);
16052+ cvirt->load[0] = 0;
16053+ cvirt->load[1] = 0;
16054+ cvirt->load[2] = 0;
16055+ atomic_set(&cvirt->total_forks, 0);
d337f35e 16056+
4bf69007
AM
16057+ spin_lock_init(&cvirt->syslog.logbuf_lock);
16058+ init_waitqueue_head(&cvirt->syslog.log_wait);
16059+ cvirt->syslog.log_start = 0;
16060+ cvirt->syslog.log_end = 0;
16061+ cvirt->syslog.con_start = 0;
16062+ cvirt->syslog.logged_chars = 0;
16063+}
16064+
16065+static inline
16066+void vx_info_init_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
d337f35e 16067+{
4bf69007
AM
16068+ // cvirt_pc->cpustat = { 0 };
16069+}
d337f35e 16070+
4bf69007
AM
16071+static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
16072+{
16073+#ifdef CONFIG_VSERVER_WARN
16074+ int value;
16075+#endif
16076+ vxwprintk_xid((value = atomic_read(&cvirt->nr_threads)),
16077+ "!!! cvirt: %p[nr_threads] = %d on exit.",
16078+ cvirt, value);
16079+ vxwprintk_xid((value = atomic_read(&cvirt->nr_running)),
16080+ "!!! cvirt: %p[nr_running] = %d on exit.",
16081+ cvirt, value);
16082+ vxwprintk_xid((value = atomic_read(&cvirt->nr_uninterruptible)),
16083+ "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
16084+ cvirt, value);
16085+ vxwprintk_xid((value = atomic_read(&cvirt->nr_onhold)),
16086+ "!!! cvirt: %p[nr_onhold] = %d on exit.",
16087+ cvirt, value);
16088+ return;
16089+}
d337f35e 16090+
4bf69007
AM
16091+static inline
16092+void vx_info_exit_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc, int cpu)
16093+{
16094+ return;
16095+}
d337f35e 16096+
09a55596
AM
16097diff -NurpP --minimal linux-4.9.135/kernel/vserver/cvirt_proc.h linux-4.9.135-vs2.3.9.8/kernel/vserver/cvirt_proc.h
16098--- linux-4.9.135/kernel/vserver/cvirt_proc.h 1970-01-01 00:00:00.000000000 +0000
16099+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/cvirt_proc.h 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
16100@@ -0,0 +1,123 @@
16101+#ifndef _VX_CVIRT_PROC_H
16102+#define _VX_CVIRT_PROC_H
d337f35e 16103+
4bf69007
AM
16104+#include <linux/nsproxy.h>
16105+#include <linux/mnt_namespace.h>
16106+#include <linux/ipc_namespace.h>
16107+#include <linux/utsname.h>
16108+#include <linux/ipc.h>
d337f35e 16109+
4bf69007 16110+extern int vx_info_mnt_namespace(struct mnt_namespace *, char *);
d337f35e 16111+
4bf69007
AM
16112+static inline
16113+int vx_info_proc_nsproxy(struct nsproxy *nsproxy, char *buffer)
16114+{
16115+ struct mnt_namespace *ns;
16116+ struct uts_namespace *uts;
16117+ struct ipc_namespace *ipc;
16118+ int length = 0;
d337f35e 16119+
4bf69007
AM
16120+ if (!nsproxy)
16121+ goto out;
d337f35e 16122+
4bf69007
AM
16123+ length += sprintf(buffer + length,
16124+ "NSProxy:\t%p [%p,%p,%p]\n",
16125+ nsproxy, nsproxy->mnt_ns,
16126+ nsproxy->uts_ns, nsproxy->ipc_ns);
d337f35e 16127+
4bf69007
AM
16128+ ns = nsproxy->mnt_ns;
16129+ if (!ns)
16130+ goto skip_ns;
d337f35e 16131+
4bf69007 16132+ length += vx_info_mnt_namespace(ns, buffer + length);
d337f35e 16133+
4bf69007 16134+skip_ns:
d337f35e 16135+
4bf69007
AM
16136+ uts = nsproxy->uts_ns;
16137+ if (!uts)
16138+ goto skip_uts;
d337f35e 16139+
4bf69007
AM
16140+ length += sprintf(buffer + length,
16141+ "SysName:\t%.*s\n"
16142+ "NodeName:\t%.*s\n"
16143+ "Release:\t%.*s\n"
16144+ "Version:\t%.*s\n"
16145+ "Machine:\t%.*s\n"
16146+ "DomainName:\t%.*s\n",
16147+ __NEW_UTS_LEN, uts->name.sysname,
16148+ __NEW_UTS_LEN, uts->name.nodename,
16149+ __NEW_UTS_LEN, uts->name.release,
16150+ __NEW_UTS_LEN, uts->name.version,
16151+ __NEW_UTS_LEN, uts->name.machine,
16152+ __NEW_UTS_LEN, uts->name.domainname);
16153+skip_uts:
d337f35e 16154+
4bf69007
AM
16155+ ipc = nsproxy->ipc_ns;
16156+ if (!ipc)
16157+ goto skip_ipc;
d337f35e 16158+
4bf69007
AM
16159+ length += sprintf(buffer + length,
16160+ "SEMS:\t\t%d %d %d %d %d\n"
16161+ "MSG:\t\t%d %d %d\n"
b00e13aa 16162+ "SHM:\t\t%lu %lu %d %ld\n",
4bf69007
AM
16163+ ipc->sem_ctls[0], ipc->sem_ctls[1],
16164+ ipc->sem_ctls[2], ipc->sem_ctls[3],
16165+ ipc->used_sems,
16166+ ipc->msg_ctlmax, ipc->msg_ctlmnb, ipc->msg_ctlmni,
16167+ (unsigned long)ipc->shm_ctlmax,
16168+ (unsigned long)ipc->shm_ctlall,
16169+ ipc->shm_ctlmni, ipc->shm_tot);
16170+skip_ipc:
16171+out:
16172+ return length;
16173+}
d337f35e
JR
16174+
16175+
4bf69007 16176+#include <linux/sched.h>
d337f35e 16177+
4bf69007
AM
16178+#define LOAD_INT(x) ((x) >> FSHIFT)
16179+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1 - 1)) * 100)
d337f35e 16180+
4bf69007
AM
16181+static inline
16182+int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
d337f35e 16183+{
4bf69007
AM
16184+ int length = 0;
16185+ int a, b, c;
d337f35e 16186+
4bf69007 16187+ length += sprintf(buffer + length,
cc23e853
AM
16188+ "BiasUptime:\t%llu.%02lu\n",
16189+ (unsigned long long)cvirt->bias_uptime.tv_sec,
4bf69007 16190+ (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
d337f35e 16191+
4bf69007
AM
16192+ a = cvirt->load[0] + (FIXED_1 / 200);
16193+ b = cvirt->load[1] + (FIXED_1 / 200);
16194+ c = cvirt->load[2] + (FIXED_1 / 200);
16195+ length += sprintf(buffer + length,
16196+ "nr_threads:\t%d\n"
16197+ "nr_running:\t%d\n"
16198+ "nr_unintr:\t%d\n"
16199+ "nr_onhold:\t%d\n"
16200+ "load_updates:\t%d\n"
16201+ "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
16202+ "total_forks:\t%d\n",
16203+ atomic_read(&cvirt->nr_threads),
16204+ atomic_read(&cvirt->nr_running),
16205+ atomic_read(&cvirt->nr_uninterruptible),
16206+ atomic_read(&cvirt->nr_onhold),
16207+ atomic_read(&cvirt->load_updates),
16208+ LOAD_INT(a), LOAD_FRAC(a),
16209+ LOAD_INT(b), LOAD_FRAC(b),
16210+ LOAD_INT(c), LOAD_FRAC(c),
16211+ atomic_read(&cvirt->total_forks));
16212+ return length;
d337f35e
JR
16213+}
16214+
4bf69007
AM
16215+static inline
16216+int vx_info_proc_cvirt_pc(struct _vx_cvirt_pc *cvirt_pc,
16217+ char *buffer, int cpu)
16218+{
16219+ int length = 0;
16220+ return length;
16221+}
d337f35e 16222+
4bf69007 16223+#endif /* _VX_CVIRT_PROC_H */
09a55596
AM
16224diff -NurpP --minimal linux-4.9.135/kernel/vserver/debug.c linux-4.9.135-vs2.3.9.8/kernel/vserver/debug.c
16225--- linux-4.9.135/kernel/vserver/debug.c 1970-01-01 00:00:00.000000000 +0000
16226+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/debug.c 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
16227@@ -0,0 +1,32 @@
16228+/*
16229+ * kernel/vserver/debug.c
16230+ *
cc23e853 16231+ * Copyright (C) 2005-2007 Herbert P?tzl
4bf69007
AM
16232+ *
16233+ * V0.01 vx_info dump support
16234+ *
16235+ */
d337f35e 16236+
4bf69007 16237+#include <linux/module.h>
d337f35e 16238+
4bf69007 16239+#include <linux/vserver/context.h>
d337f35e 16240+
d337f35e 16241+
4bf69007 16242+void dump_vx_info(struct vx_info *vxi, int level)
d337f35e 16243+{
4bf69007
AM
16244+ printk("vx_info %p[#%d, %d.%d, %4x]\n", vxi, vxi->vx_id,
16245+ atomic_read(&vxi->vx_usecnt),
16246+ atomic_read(&vxi->vx_tasks),
16247+ vxi->vx_state);
16248+ if (level > 0) {
16249+ __dump_vx_limit(&vxi->limit);
16250+ __dump_vx_sched(&vxi->sched);
16251+ __dump_vx_cvirt(&vxi->cvirt);
16252+ __dump_vx_cacct(&vxi->cacct);
16253+ }
16254+ printk("---\n");
16255+}
d337f35e 16256+
d337f35e 16257+
4bf69007 16258+EXPORT_SYMBOL_GPL(dump_vx_info);
d337f35e 16259+
09a55596
AM
16260diff -NurpP --minimal linux-4.9.135/kernel/vserver/device.c linux-4.9.135-vs2.3.9.8/kernel/vserver/device.c
16261--- linux-4.9.135/kernel/vserver/device.c 1970-01-01 00:00:00.000000000 +0000
16262+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/device.c 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
16263@@ -0,0 +1,443 @@
16264+/*
16265+ * linux/kernel/vserver/device.c
16266+ *
16267+ * Linux-VServer: Device Support
16268+ *
cc23e853 16269+ * Copyright (C) 2006 Herbert P?tzl
4bf69007
AM
16270+ * Copyright (C) 2007 Daniel Hokka Zakrisson
16271+ *
16272+ * V0.01 device mapping basics
16273+ * V0.02 added defaults
16274+ *
16275+ */
d337f35e 16276+
4bf69007
AM
16277+#include <linux/slab.h>
16278+#include <linux/rcupdate.h>
16279+#include <linux/fs.h>
16280+#include <linux/namei.h>
16281+#include <linux/hash.h>
d337f35e 16282+
4bf69007
AM
16283+#include <asm/errno.h>
16284+#include <asm/uaccess.h>
16285+#include <linux/vserver/base.h>
16286+#include <linux/vserver/debug.h>
16287+#include <linux/vserver/context.h>
16288+#include <linux/vserver/device.h>
16289+#include <linux/vserver/device_cmd.h>
d337f35e 16290+
d337f35e 16291+
4bf69007 16292+#define DMAP_HASH_BITS 4
d337f35e 16293+
d337f35e 16294+
4bf69007
AM
16295+struct vs_mapping {
16296+ union {
16297+ struct hlist_node hlist;
16298+ struct list_head list;
16299+ } u;
16300+#define dm_hlist u.hlist
16301+#define dm_list u.list
61333608 16302+ vxid_t xid;
4bf69007
AM
16303+ dev_t device;
16304+ struct vx_dmap_target target;
16305+};
d337f35e 16306+
d337f35e 16307+
4bf69007 16308+static struct hlist_head dmap_main_hash[1 << DMAP_HASH_BITS];
d337f35e 16309+
4bf69007 16310+static DEFINE_SPINLOCK(dmap_main_hash_lock);
d337f35e 16311+
4bf69007
AM
16312+static struct vx_dmap_target dmap_defaults[2] = {
16313+ { .flags = DATTR_OPEN },
16314+ { .flags = DATTR_OPEN },
16315+};
d337f35e
JR
16316+
16317+
4bf69007 16318+struct kmem_cache *dmap_cachep __read_mostly;
d337f35e 16319+
4bf69007
AM
16320+int __init dmap_cache_init(void)
16321+{
16322+ dmap_cachep = kmem_cache_create("dmap_cache",
16323+ sizeof(struct vs_mapping), 0,
16324+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
d337f35e
JR
16325+ return 0;
16326+}
16327+
4bf69007 16328+__initcall(dmap_cache_init);
d337f35e 16329+
4bf69007
AM
16330+
16331+static inline unsigned int __hashval(dev_t dev, int bits)
d337f35e 16332+{
4bf69007
AM
16333+ return hash_long((unsigned long)dev, bits);
16334+}
d337f35e 16335+
d337f35e 16336+
4bf69007
AM
16337+/* __hash_mapping()
16338+ * add the mapping to the hash table
16339+ */
16340+static inline void __hash_mapping(struct vx_info *vxi, struct vs_mapping *vdm)
16341+{
16342+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16343+ struct hlist_head *head, *hash = dmap_main_hash;
16344+ int device = vdm->device;
d337f35e 16345+
4bf69007
AM
16346+ spin_lock(hash_lock);
16347+ vxdprintk(VXD_CBIT(misc, 8), "__hash_mapping: %p[#%d] %08x:%08x",
16348+ vxi, vxi ? vxi->vx_id : 0, device, vdm->target.target);
d337f35e 16349+
4bf69007
AM
16350+ head = &hash[__hashval(device, DMAP_HASH_BITS)];
16351+ hlist_add_head(&vdm->dm_hlist, head);
16352+ spin_unlock(hash_lock);
16353+}
16354+
16355+
16356+static inline int __mode_to_default(umode_t mode)
16357+{
16358+ switch (mode) {
16359+ case S_IFBLK:
16360+ return 0;
16361+ case S_IFCHR:
16362+ return 1;
16363+ default:
16364+ BUG();
d337f35e 16365+ }
d337f35e
JR
16366+}
16367+
4bf69007
AM
16368+
16369+/* __set_default()
16370+ * set a default
16371+ */
16372+static inline void __set_default(struct vx_info *vxi, umode_t mode,
16373+ struct vx_dmap_target *vdmt)
d337f35e 16374+{
4bf69007
AM
16375+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16376+ spin_lock(hash_lock);
d337f35e 16377+
4bf69007
AM
16378+ if (vxi)
16379+ vxi->dmap.targets[__mode_to_default(mode)] = *vdmt;
16380+ else
16381+ dmap_defaults[__mode_to_default(mode)] = *vdmt;
d337f35e 16382+
d337f35e 16383+
4bf69007 16384+ spin_unlock(hash_lock);
d337f35e 16385+
4bf69007
AM
16386+ vxdprintk(VXD_CBIT(misc, 8), "__set_default: %p[#%u] %08x %04x",
16387+ vxi, vxi ? vxi->vx_id : 0, vdmt->target, vdmt->flags);
d337f35e
JR
16388+}
16389+
d337f35e 16390+
4bf69007
AM
16391+/* __remove_default()
16392+ * remove a default
16393+ */
16394+static inline int __remove_default(struct vx_info *vxi, umode_t mode)
d337f35e 16395+{
4bf69007
AM
16396+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16397+ spin_lock(hash_lock);
d337f35e 16398+
4bf69007
AM
16399+ if (vxi)
16400+ vxi->dmap.targets[__mode_to_default(mode)].flags = 0;
16401+ else /* remove == reset */
16402+ dmap_defaults[__mode_to_default(mode)].flags = DATTR_OPEN | mode;
d337f35e 16403+
4bf69007
AM
16404+ spin_unlock(hash_lock);
16405+ return 0;
d337f35e
JR
16406+}
16407+
d337f35e 16408+
4bf69007
AM
16409+/* __find_mapping()
16410+ * find a mapping in the hash table
16411+ *
16412+ * caller must hold hash_lock
16413+ */
61333608 16414+static inline int __find_mapping(vxid_t xid, dev_t device, umode_t mode,
4bf69007
AM
16415+ struct vs_mapping **local, struct vs_mapping **global)
16416+{
16417+ struct hlist_head *hash = dmap_main_hash;
16418+ struct hlist_head *head = &hash[__hashval(device, DMAP_HASH_BITS)];
16419+ struct hlist_node *pos;
16420+ struct vs_mapping *vdm;
d337f35e 16421+
4bf69007
AM
16422+ *local = NULL;
16423+ if (global)
16424+ *global = NULL;
d337f35e 16425+
4bf69007
AM
16426+ hlist_for_each(pos, head) {
16427+ vdm = hlist_entry(pos, struct vs_mapping, dm_hlist);
d337f35e 16428+
4bf69007
AM
16429+ if ((vdm->device == device) &&
16430+ !((vdm->target.flags ^ mode) & S_IFMT)) {
16431+ if (vdm->xid == xid) {
16432+ *local = vdm;
16433+ return 1;
16434+ } else if (global && vdm->xid == 0)
16435+ *global = vdm;
2380c486
JR
16436+ }
16437+ }
16438+
4bf69007
AM
16439+ if (global && *global)
16440+ return 0;
16441+ else
16442+ return -ENOENT;
2380c486
JR
16443+}
16444+
16445+
4bf69007
AM
16446+/* __lookup_mapping()
16447+ * find a mapping and store the result in target and flags
16448+ */
16449+static inline int __lookup_mapping(struct vx_info *vxi,
16450+ dev_t device, dev_t *target, int *flags, umode_t mode)
2380c486 16451+{
4bf69007
AM
16452+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16453+ struct vs_mapping *vdm, *global;
16454+ struct vx_dmap_target *vdmt;
2380c486 16455+ int ret = 0;
61333608 16456+ vxid_t xid = vxi->vx_id;
4bf69007 16457+ int index;
2380c486 16458+
4bf69007
AM
16459+ spin_lock(hash_lock);
16460+ if (__find_mapping(xid, device, mode, &vdm, &global) > 0) {
2380c486 16461+ ret = 1;
4bf69007
AM
16462+ vdmt = &vdm->target;
16463+ goto found;
16464+ }
2380c486 16465+
4bf69007
AM
16466+ index = __mode_to_default(mode);
16467+ if (vxi && vxi->dmap.targets[index].flags) {
16468+ ret = 2;
16469+ vdmt = &vxi->dmap.targets[index];
16470+ } else if (global) {
16471+ ret = 3;
16472+ vdmt = &global->target;
16473+ goto found;
16474+ } else {
16475+ ret = 4;
16476+ vdmt = &dmap_defaults[index];
d337f35e 16477+ }
2380c486 16478+
4bf69007
AM
16479+found:
16480+ if (target && (vdmt->flags & DATTR_REMAP))
16481+ *target = vdmt->target;
16482+ else if (target)
16483+ *target = device;
16484+ if (flags)
16485+ *flags = vdmt->flags;
16486+
16487+ spin_unlock(hash_lock);
2380c486
JR
16488+
16489+ return ret;
d337f35e
JR
16490+}
16491+
16492+
4bf69007
AM
16493+/* __remove_mapping()
16494+ * remove a mapping from the hash table
16495+ */
16496+static inline int __remove_mapping(struct vx_info *vxi, dev_t device,
16497+ umode_t mode)
d337f35e 16498+{
4bf69007
AM
16499+ spinlock_t *hash_lock = &dmap_main_hash_lock;
16500+ struct vs_mapping *vdm = NULL;
d337f35e
JR
16501+ int ret = 0;
16502+
4bf69007
AM
16503+ spin_lock(hash_lock);
16504+
16505+ ret = __find_mapping((vxi ? vxi->vx_id : 0), device, mode, &vdm,
16506+ NULL);
16507+ vxdprintk(VXD_CBIT(misc, 8), "__remove_mapping: %p[#%d] %08x %04x",
16508+ vxi, vxi ? vxi->vx_id : 0, device, mode);
16509+ if (ret < 0)
2380c486 16510+ goto out;
4bf69007 16511+ hlist_del(&vdm->dm_hlist);
2380c486 16512+
2380c486 16513+out:
4bf69007
AM
16514+ spin_unlock(hash_lock);
16515+ if (vdm)
16516+ kmem_cache_free(dmap_cachep, vdm);
2380c486
JR
16517+ return ret;
16518+}
16519+
16520+
2380c486 16521+
4bf69007
AM
16522+int vs_map_device(struct vx_info *vxi,
16523+ dev_t device, dev_t *target, umode_t mode)
2380c486 16524+{
4bf69007 16525+ int ret, flags = DATTR_MASK;
2380c486 16526+
4bf69007
AM
16527+ if (!vxi) {
16528+ if (target)
16529+ *target = device;
2380c486 16530+ goto out;
2380c486 16531+ }
4bf69007
AM
16532+ ret = __lookup_mapping(vxi, device, target, &flags, mode);
16533+ vxdprintk(VXD_CBIT(misc, 8), "vs_map_device: %08x target: %08x flags: %04x mode: %04x mapped=%d",
16534+ device, target ? *target : 0, flags, mode, ret);
2380c486 16535+out:
4bf69007 16536+ return (flags & DATTR_MASK);
2380c486
JR
16537+}
16538+
2380c486 16539+
4bf69007
AM
16540+
16541+static int do_set_mapping(struct vx_info *vxi,
16542+ dev_t device, dev_t target, int flags, umode_t mode)
2380c486 16543+{
4bf69007
AM
16544+ if (device) {
16545+ struct vs_mapping *new;
2380c486 16546+
4bf69007
AM
16547+ new = kmem_cache_alloc(dmap_cachep, GFP_KERNEL);
16548+ if (!new)
16549+ return -ENOMEM;
16550+
16551+ INIT_HLIST_NODE(&new->dm_hlist);
16552+ new->device = device;
16553+ new->target.target = target;
16554+ new->target.flags = flags | mode;
16555+ new->xid = (vxi ? vxi->vx_id : 0);
16556+
16557+ vxdprintk(VXD_CBIT(misc, 8), "do_set_mapping: %08x target: %08x flags: %04x", device, target, flags);
16558+ __hash_mapping(vxi, new);
16559+ } else {
16560+ struct vx_dmap_target new = {
16561+ .target = target,
16562+ .flags = flags | mode,
16563+ };
16564+ __set_default(vxi, mode, &new);
16565+ }
16566+ return 0;
2380c486
JR
16567+}
16568+
4bf69007
AM
16569+
16570+static int do_unset_mapping(struct vx_info *vxi,
16571+ dev_t device, dev_t target, int flags, umode_t mode)
2380c486 16572+{
4bf69007 16573+ int ret = -EINVAL;
763640ca 16574+
4bf69007
AM
16575+ if (device) {
16576+ ret = __remove_mapping(vxi, device, mode);
16577+ if (ret < 0)
16578+ goto out;
16579+ } else {
16580+ ret = __remove_default(vxi, mode);
16581+ if (ret < 0)
16582+ goto out;
16583+ }
2380c486 16584+
4bf69007
AM
16585+out:
16586+ return ret;
16587+}
2380c486 16588+
2380c486 16589+
4bf69007
AM
16590+static inline int __user_device(const char __user *name, dev_t *dev,
16591+ umode_t *mode)
16592+{
cc23e853 16593+ struct path path;
4bf69007 16594+ int ret;
2380c486 16595+
4bf69007
AM
16596+ if (!name) {
16597+ *dev = 0;
16598+ return 0;
16599+ }
cc23e853 16600+ ret = user_lpath(name, &path);
4bf69007
AM
16601+ if (ret)
16602+ return ret;
cc23e853
AM
16603+ if (path.dentry->d_inode) {
16604+ *dev = path.dentry->d_inode->i_rdev;
16605+ *mode = path.dentry->d_inode->i_mode;
4bf69007 16606+ }
cc23e853 16607+ path_put(&path);
4bf69007
AM
16608+ return 0;
16609+}
2380c486 16610+
4bf69007
AM
16611+static inline int __mapping_mode(dev_t device, dev_t target,
16612+ umode_t device_mode, umode_t target_mode, umode_t *mode)
16613+{
16614+ if (device)
16615+ *mode = device_mode & S_IFMT;
16616+ else if (target)
16617+ *mode = target_mode & S_IFMT;
16618+ else
16619+ return -EINVAL;
2380c486 16620+
4bf69007
AM
16621+ /* if both given, device and target mode have to match */
16622+ if (device && target &&
16623+ ((device_mode ^ target_mode) & S_IFMT))
16624+ return -EINVAL;
16625+ return 0;
16626+}
d337f35e 16627+
d337f35e 16628+
4bf69007
AM
16629+static inline int do_mapping(struct vx_info *vxi, const char __user *device_path,
16630+ const char __user *target_path, int flags, int set)
16631+{
16632+ dev_t device = ~0, target = ~0;
16633+ umode_t device_mode = 0, target_mode = 0, mode;
16634+ int ret;
2380c486 16635+
4bf69007
AM
16636+ ret = __user_device(device_path, &device, &device_mode);
16637+ if (ret)
16638+ return ret;
16639+ ret = __user_device(target_path, &target, &target_mode);
16640+ if (ret)
16641+ return ret;
2380c486 16642+
4bf69007
AM
16643+ ret = __mapping_mode(device, target,
16644+ device_mode, target_mode, &mode);
16645+ if (ret)
16646+ return ret;
2380c486 16647+
4bf69007
AM
16648+ if (set)
16649+ return do_set_mapping(vxi, device, target,
16650+ flags, mode);
16651+ else
16652+ return do_unset_mapping(vxi, device, target,
16653+ flags, mode);
d337f35e
JR
16654+}
16655+
d337f35e 16656+
4bf69007
AM
16657+int vc_set_mapping(struct vx_info *vxi, void __user *data)
16658+{
16659+ struct vcmd_set_mapping_v0 vc_data;
d337f35e 16660+
4bf69007
AM
16661+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16662+ return -EFAULT;
d337f35e 16663+
4bf69007
AM
16664+ return do_mapping(vxi, vc_data.device, vc_data.target,
16665+ vc_data.flags, 1);
16666+}
d337f35e 16667+
4bf69007 16668+int vc_unset_mapping(struct vx_info *vxi, void __user *data)
d337f35e 16669+{
4bf69007 16670+ struct vcmd_set_mapping_v0 vc_data;
d337f35e 16671+
4bf69007
AM
16672+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16673+ return -EFAULT;
16674+
16675+ return do_mapping(vxi, vc_data.device, vc_data.target,
16676+ vc_data.flags, 0);
d337f35e
JR
16677+}
16678+
16679+
4bf69007
AM
16680+#ifdef CONFIG_COMPAT
16681+
16682+int vc_set_mapping_x32(struct vx_info *vxi, void __user *data)
d337f35e 16683+{
4bf69007 16684+ struct vcmd_set_mapping_v0_x32 vc_data;
d337f35e 16685+
4bf69007
AM
16686+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16687+ return -EFAULT;
16688+
16689+ return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
16690+ compat_ptr(vc_data.target_ptr), vc_data.flags, 1);
d337f35e
JR
16691+}
16692+
4bf69007
AM
16693+int vc_unset_mapping_x32(struct vx_info *vxi, void __user *data)
16694+{
16695+ struct vcmd_set_mapping_v0_x32 vc_data;
16696+
16697+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16698+ return -EFAULT;
d337f35e 16699+
4bf69007
AM
16700+ return do_mapping(vxi, compat_ptr(vc_data.device_ptr),
16701+ compat_ptr(vc_data.target_ptr), vc_data.flags, 0);
16702+}
d337f35e 16703+
4bf69007 16704+#endif /* CONFIG_COMPAT */
d337f35e 16705+
4bf69007 16706+
09a55596
AM
16707diff -NurpP --minimal linux-4.9.135/kernel/vserver/dlimit.c linux-4.9.135-vs2.3.9.8/kernel/vserver/dlimit.c
16708--- linux-4.9.135/kernel/vserver/dlimit.c 1970-01-01 00:00:00.000000000 +0000
16709+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/dlimit.c 2018-10-20 04:58:15.000000000 +0000
b00e13aa 16710@@ -0,0 +1,528 @@
d337f35e 16711+/*
4bf69007 16712+ * linux/kernel/vserver/dlimit.c
d337f35e 16713+ *
4bf69007 16714+ * Virtual Server: Context Disk Limits
d337f35e 16715+ *
cc23e853 16716+ * Copyright (C) 2004-2009 Herbert P?tzl
d337f35e 16717+ *
4bf69007
AM
16718+ * V0.01 initial version
16719+ * V0.02 compat32 splitup
16720+ * V0.03 extended interface
d337f35e
JR
16721+ *
16722+ */
16723+
4bf69007
AM
16724+#include <linux/statfs.h>
16725+#include <linux/sched.h>
2380c486 16726+#include <linux/namei.h>
d337f35e 16727+#include <linux/vs_tag.h>
4bf69007
AM
16728+#include <linux/vs_dlimit.h>
16729+#include <linux/vserver/dlimit_cmd.h>
16730+#include <linux/slab.h>
16731+// #include <linux/gfp.h>
d337f35e 16732+
d337f35e
JR
16733+#include <asm/uaccess.h>
16734+
4bf69007 16735+/* __alloc_dl_info()
d337f35e 16736+
4bf69007
AM
16737+ * allocate an initialized dl_info struct
16738+ * doesn't make it visible (hash) */
d337f35e 16739+
61333608 16740+static struct dl_info *__alloc_dl_info(struct super_block *sb, vtag_t tag)
4bf69007
AM
16741+{
16742+ struct dl_info *new = NULL;
d337f35e 16743+
4bf69007
AM
16744+ vxdprintk(VXD_CBIT(dlim, 5),
16745+ "alloc_dl_info(%p,%d)*", sb, tag);
d337f35e 16746+
4bf69007
AM
16747+ /* would this benefit from a slab cache? */
16748+ new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
16749+ if (!new)
16750+ return 0;
d337f35e 16751+
4bf69007
AM
16752+ memset(new, 0, sizeof(struct dl_info));
16753+ new->dl_tag = tag;
16754+ new->dl_sb = sb;
16755+ // INIT_RCU_HEAD(&new->dl_rcu);
16756+ INIT_HLIST_NODE(&new->dl_hlist);
16757+ spin_lock_init(&new->dl_lock);
16758+ atomic_set(&new->dl_refcnt, 0);
16759+ atomic_set(&new->dl_usecnt, 0);
d337f35e 16760+
4bf69007 16761+ /* rest of init goes here */
d337f35e 16762+
4bf69007
AM
16763+ vxdprintk(VXD_CBIT(dlim, 4),
16764+ "alloc_dl_info(%p,%d) = %p", sb, tag, new);
16765+ return new;
16766+}
d4263eb0 16767+
4bf69007 16768+/* __dealloc_dl_info()
d337f35e 16769+
4bf69007 16770+ * final disposal of dl_info */
d337f35e 16771+
4bf69007 16772+static void __dealloc_dl_info(struct dl_info *dli)
adc1caaa 16773+{
4bf69007
AM
16774+ vxdprintk(VXD_CBIT(dlim, 4),
16775+ "dealloc_dl_info(%p)", dli);
2380c486 16776+
4bf69007
AM
16777+ dli->dl_hlist.next = LIST_POISON1;
16778+ dli->dl_tag = -1;
16779+ dli->dl_sb = 0;
2380c486 16780+
4bf69007
AM
16781+ BUG_ON(atomic_read(&dli->dl_usecnt));
16782+ BUG_ON(atomic_read(&dli->dl_refcnt));
2380c486 16783+
4bf69007 16784+ kfree(dli);
adc1caaa 16785+}
2380c486 16786+
2380c486 16787+
4bf69007 16788+/* hash table for dl_info hash */
2380c486 16789+
4bf69007 16790+#define DL_HASH_SIZE 13
2380c486 16791+
4bf69007 16792+struct hlist_head dl_info_hash[DL_HASH_SIZE];
2380c486 16793+
4bf69007 16794+static DEFINE_SPINLOCK(dl_info_hash_lock);
2380c486 16795+
d33d7b00 16796+
61333608 16797+static inline unsigned int __hashval(struct super_block *sb, vtag_t tag)
adc1caaa 16798+{
4bf69007
AM
16799+ return ((tag ^ (unsigned long)sb) % DL_HASH_SIZE);
16800+}
2380c486 16801+
2380c486 16802+
2380c486 16803+
4bf69007 16804+/* __hash_dl_info()
2380c486 16805+
4bf69007
AM
16806+ * add the dli to the global hash table
16807+ * requires the hash_lock to be held */
2380c486 16808+
4bf69007
AM
16809+static inline void __hash_dl_info(struct dl_info *dli)
16810+{
16811+ struct hlist_head *head;
d337f35e 16812+
4bf69007
AM
16813+ vxdprintk(VXD_CBIT(dlim, 6),
16814+ "__hash_dl_info: %p[#%d]", dli, dli->dl_tag);
16815+ get_dl_info(dli);
16816+ head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_tag)];
16817+ hlist_add_head_rcu(&dli->dl_hlist, head);
16818+}
d337f35e 16819+
4bf69007 16820+/* __unhash_dl_info()
3bac966d 16821+
4bf69007
AM
16822+ * remove the dli from the global hash table
16823+ * requires the hash_lock to be held */
3bac966d 16824+
4bf69007
AM
16825+static inline void __unhash_dl_info(struct dl_info *dli)
16826+{
16827+ vxdprintk(VXD_CBIT(dlim, 6),
16828+ "__unhash_dl_info: %p[#%d]", dli, dli->dl_tag);
16829+ hlist_del_rcu(&dli->dl_hlist);
16830+ put_dl_info(dli);
16831+}
3bac966d 16832+
3bac966d 16833+
4bf69007 16834+/* __lookup_dl_info()
3bac966d 16835+
4bf69007
AM
16836+ * requires the rcu_read_lock()
16837+ * doesn't increment the dl_refcnt */
3bac966d 16838+
61333608 16839+static inline struct dl_info *__lookup_dl_info(struct super_block *sb, vtag_t tag)
4bf69007
AM
16840+{
16841+ struct hlist_head *head = &dl_info_hash[__hashval(sb, tag)];
4bf69007 16842+ struct dl_info *dli;
3bac966d 16843+
b00e13aa
AM
16844+ hlist_for_each_entry_rcu(dli, head, dl_hlist) {
16845+ if (dli->dl_tag == tag && dli->dl_sb == sb)
4bf69007 16846+ return dli;
d33d7b00 16847+ }
4bf69007
AM
16848+ return NULL;
16849+}
3bac966d 16850+
3bac966d 16851+
61333608 16852+struct dl_info *locate_dl_info(struct super_block *sb, vtag_t tag)
4bf69007
AM
16853+{
16854+ struct dl_info *dli;
16855+
16856+ rcu_read_lock();
16857+ dli = get_dl_info(__lookup_dl_info(sb, tag));
16858+ vxdprintk(VXD_CBIT(dlim, 7),
16859+ "locate_dl_info(%p,#%d) = %p", sb, tag, dli);
16860+ rcu_read_unlock();
16861+ return dli;
d33d7b00 16862+}
3bac966d 16863+
4bf69007 16864+void rcu_free_dl_info(struct rcu_head *head)
d33d7b00 16865+{
4bf69007
AM
16866+ struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
16867+ int usecnt, refcnt;
3bac966d 16868+
4bf69007 16869+ BUG_ON(!dli || !head);
3bac966d 16870+
4bf69007
AM
16871+ usecnt = atomic_read(&dli->dl_usecnt);
16872+ BUG_ON(usecnt < 0);
3bac966d 16873+
4bf69007
AM
16874+ refcnt = atomic_read(&dli->dl_refcnt);
16875+ BUG_ON(refcnt < 0);
16876+
16877+ vxdprintk(VXD_CBIT(dlim, 3),
16878+ "rcu_free_dl_info(%p)", dli);
16879+ if (!usecnt)
16880+ __dealloc_dl_info(dli);
16881+ else
16882+ printk("!!! rcu didn't free\n");
d33d7b00 16883+}
3bac966d 16884+
3bac966d 16885+
4bf69007
AM
16886+
16887+
16888+static int do_addrem_dlimit(uint32_t id, const char __user *name,
16889+ uint32_t flags, int add)
d33d7b00
AM
16890+{
16891+ struct path path;
d33d7b00 16892+ int ret;
3bac966d 16893+
4bf69007 16894+ ret = user_lpath(name, &path);
d33d7b00 16895+ if (!ret) {
4bf69007
AM
16896+ struct super_block *sb;
16897+ struct dl_info *dli;
16898+
16899+ ret = -EINVAL;
16900+ if (!path.dentry->d_inode)
16901+ goto out_release;
16902+ if (!(sb = path.dentry->d_inode->i_sb))
16903+ goto out_release;
16904+
16905+ if (add) {
16906+ dli = __alloc_dl_info(sb, id);
16907+ spin_lock(&dl_info_hash_lock);
16908+
16909+ ret = -EEXIST;
16910+ if (__lookup_dl_info(sb, id))
16911+ goto out_unlock;
16912+ __hash_dl_info(dli);
16913+ dli = NULL;
16914+ } else {
16915+ spin_lock(&dl_info_hash_lock);
16916+ dli = __lookup_dl_info(sb, id);
16917+
16918+ ret = -ESRCH;
16919+ if (!dli)
16920+ goto out_unlock;
16921+ __unhash_dl_info(dli);
16922+ }
16923+ ret = 0;
16924+ out_unlock:
16925+ spin_unlock(&dl_info_hash_lock);
16926+ if (add && dli)
16927+ __dealloc_dl_info(dli);
16928+ out_release:
d33d7b00
AM
16929+ path_put(&path);
16930+ }
d33d7b00
AM
16931+ return ret;
16932+}
3bac966d 16933+
4bf69007 16934+int vc_add_dlimit(uint32_t id, void __user *data)
d33d7b00 16935+{
4bf69007 16936+ struct vcmd_ctx_dlimit_base_v0 vc_data;
3bac966d 16937+
d33d7b00
AM
16938+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16939+ return -EFAULT;
3bac966d 16940+
4bf69007
AM
16941+ return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
16942+}
3bac966d 16943+
4bf69007
AM
16944+int vc_rem_dlimit(uint32_t id, void __user *data)
16945+{
16946+ struct vcmd_ctx_dlimit_base_v0 vc_data;
3bac966d 16947+
4bf69007 16948+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d33d7b00 16949+ return -EFAULT;
4bf69007
AM
16950+
16951+ return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
d33d7b00 16952+}
3bac966d 16953+
4bf69007 16954+#ifdef CONFIG_COMPAT
3bac966d 16955+
4bf69007
AM
16956+int vc_add_dlimit_x32(uint32_t id, void __user *data)
16957+{
16958+ struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
3bac966d 16959+
4bf69007
AM
16960+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16961+ return -EFAULT;
d337f35e 16962+
4bf69007
AM
16963+ return do_addrem_dlimit(id,
16964+ compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
16965+}
d337f35e 16966+
4bf69007 16967+int vc_rem_dlimit_x32(uint32_t id, void __user *data)
d33d7b00 16968+{
4bf69007 16969+ struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
d337f35e 16970+
4bf69007
AM
16971+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
16972+ return -EFAULT;
16973+
16974+ return do_addrem_dlimit(id,
16975+ compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
d33d7b00 16976+}
d337f35e 16977+
4bf69007
AM
16978+#endif /* CONFIG_COMPAT */
16979+
16980+
16981+static inline
16982+int do_set_dlimit(uint32_t id, const char __user *name,
16983+ uint32_t space_used, uint32_t space_total,
16984+ uint32_t inodes_used, uint32_t inodes_total,
16985+ uint32_t reserved, uint32_t flags)
d33d7b00 16986+{
4bf69007
AM
16987+ struct path path;
16988+ int ret;
ba86f833 16989+
4bf69007
AM
16990+ ret = user_lpath(name, &path);
16991+ if (!ret) {
16992+ struct super_block *sb;
16993+ struct dl_info *dli;
d337f35e 16994+
4bf69007
AM
16995+ ret = -EINVAL;
16996+ if (!path.dentry->d_inode)
16997+ goto out_release;
16998+ if (!(sb = path.dentry->d_inode->i_sb))
16999+ goto out_release;
d337f35e 17000+
4bf69007
AM
17001+ /* sanity checks */
17002+ if ((reserved != CDLIM_KEEP &&
17003+ reserved > 100) ||
17004+ (inodes_used != CDLIM_KEEP &&
17005+ inodes_used > inodes_total) ||
17006+ (space_used != CDLIM_KEEP &&
17007+ space_used > space_total))
17008+ goto out_release;
d337f35e 17009+
4bf69007
AM
17010+ ret = -ESRCH;
17011+ dli = locate_dl_info(sb, id);
17012+ if (!dli)
17013+ goto out_release;
ba86f833 17014+
4bf69007 17015+ spin_lock(&dli->dl_lock);
d337f35e 17016+
4bf69007
AM
17017+ if (inodes_used != CDLIM_KEEP)
17018+ dli->dl_inodes_used = inodes_used;
17019+ if (inodes_total != CDLIM_KEEP)
17020+ dli->dl_inodes_total = inodes_total;
17021+ if (space_used != CDLIM_KEEP)
17022+ dli->dl_space_used = dlimit_space_32to64(
17023+ space_used, flags, DLIMS_USED);
d337f35e 17024+
4bf69007
AM
17025+ if (space_total == CDLIM_INFINITY)
17026+ dli->dl_space_total = DLIM_INFINITY;
17027+ else if (space_total != CDLIM_KEEP)
17028+ dli->dl_space_total = dlimit_space_32to64(
17029+ space_total, flags, DLIMS_TOTAL);
78865d5b 17030+
4bf69007
AM
17031+ if (reserved != CDLIM_KEEP)
17032+ dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
78865d5b 17033+
4bf69007 17034+ spin_unlock(&dli->dl_lock);
d337f35e 17035+
4bf69007
AM
17036+ put_dl_info(dli);
17037+ ret = 0;
d337f35e 17038+
4bf69007
AM
17039+ out_release:
17040+ path_put(&path);
17041+ }
17042+ return ret;
17043+}
d337f35e 17044+
4bf69007
AM
17045+int vc_set_dlimit(uint32_t id, void __user *data)
17046+{
17047+ struct vcmd_ctx_dlimit_v0 vc_data;
d337f35e 17048+
4bf69007
AM
17049+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17050+ return -EFAULT;
d337f35e 17051+
4bf69007
AM
17052+ return do_set_dlimit(id, vc_data.name,
17053+ vc_data.space_used, vc_data.space_total,
17054+ vc_data.inodes_used, vc_data.inodes_total,
17055+ vc_data.reserved, vc_data.flags);
17056+}
d337f35e 17057+
4bf69007 17058+#ifdef CONFIG_COMPAT
d337f35e 17059+
4bf69007
AM
17060+int vc_set_dlimit_x32(uint32_t id, void __user *data)
17061+{
17062+ struct vcmd_ctx_dlimit_v0_x32 vc_data;
d337f35e 17063+
4bf69007
AM
17064+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17065+ return -EFAULT;
d337f35e 17066+
4bf69007
AM
17067+ return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
17068+ vc_data.space_used, vc_data.space_total,
17069+ vc_data.inodes_used, vc_data.inodes_total,
17070+ vc_data.reserved, vc_data.flags);
17071+}
d337f35e 17072+
4bf69007 17073+#endif /* CONFIG_COMPAT */
d337f35e 17074+
d337f35e 17075+
4bf69007
AM
17076+static inline
17077+int do_get_dlimit(uint32_t id, const char __user *name,
17078+ uint32_t *space_used, uint32_t *space_total,
17079+ uint32_t *inodes_used, uint32_t *inodes_total,
17080+ uint32_t *reserved, uint32_t *flags)
17081+{
17082+ struct path path;
17083+ int ret;
d337f35e 17084+
4bf69007
AM
17085+ ret = user_lpath(name, &path);
17086+ if (!ret) {
17087+ struct super_block *sb;
17088+ struct dl_info *dli;
d337f35e 17089+
4bf69007
AM
17090+ ret = -EINVAL;
17091+ if (!path.dentry->d_inode)
17092+ goto out_release;
17093+ if (!(sb = path.dentry->d_inode->i_sb))
17094+ goto out_release;
d337f35e 17095+
4bf69007
AM
17096+ ret = -ESRCH;
17097+ dli = locate_dl_info(sb, id);
17098+ if (!dli)
17099+ goto out_release;
d337f35e 17100+
4bf69007
AM
17101+ spin_lock(&dli->dl_lock);
17102+ *inodes_used = dli->dl_inodes_used;
17103+ *inodes_total = dli->dl_inodes_total;
d337f35e 17104+
4bf69007
AM
17105+ *space_used = dlimit_space_64to32(
17106+ dli->dl_space_used, flags, DLIMS_USED);
d337f35e 17107+
4bf69007
AM
17108+ if (dli->dl_space_total == DLIM_INFINITY)
17109+ *space_total = CDLIM_INFINITY;
17110+ else
17111+ *space_total = dlimit_space_64to32(
17112+ dli->dl_space_total, flags, DLIMS_TOTAL);
d337f35e 17113+
4bf69007
AM
17114+ *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
17115+ spin_unlock(&dli->dl_lock);
d337f35e 17116+
4bf69007
AM
17117+ put_dl_info(dli);
17118+ ret = -EFAULT;
d337f35e 17119+
4bf69007
AM
17120+ ret = 0;
17121+ out_release:
17122+ path_put(&path);
17123+ }
17124+ return ret;
d337f35e
JR
17125+}
17126+
4bf69007
AM
17127+
17128+int vc_get_dlimit(uint32_t id, void __user *data)
d337f35e 17129+{
4bf69007 17130+ struct vcmd_ctx_dlimit_v0 vc_data;
d337f35e
JR
17131+ int ret;
17132+
2380c486 17133+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
17134+ return -EFAULT;
17135+
4bf69007
AM
17136+ ret = do_get_dlimit(id, vc_data.name,
17137+ &vc_data.space_used, &vc_data.space_total,
17138+ &vc_data.inodes_used, &vc_data.inodes_total,
17139+ &vc_data.reserved, &vc_data.flags);
d337f35e
JR
17140+ if (ret)
17141+ return ret;
17142+
2380c486 17143+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
d337f35e
JR
17144+ return -EFAULT;
17145+ return 0;
17146+}
17147+
4bf69007 17148+#ifdef CONFIG_COMPAT
d337f35e 17149+
4bf69007 17150+int vc_get_dlimit_x32(uint32_t id, void __user *data)
d337f35e 17151+{
4bf69007 17152+ struct vcmd_ctx_dlimit_v0_x32 vc_data;
d337f35e
JR
17153+ int ret;
17154+
2380c486 17155+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
17156+ return -EFAULT;
17157+
4bf69007
AM
17158+ ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
17159+ &vc_data.space_used, &vc_data.space_total,
17160+ &vc_data.inodes_used, &vc_data.inodes_total,
17161+ &vc_data.reserved, &vc_data.flags);
d337f35e
JR
17162+ if (ret)
17163+ return ret;
17164+
2380c486 17165+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
d337f35e
JR
17166+ return -EFAULT;
17167+ return 0;
17168+}
17169+
4bf69007 17170+#endif /* CONFIG_COMPAT */
ec22aa5c
AM
17171+
17172+
4bf69007 17173+void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
ec22aa5c 17174+{
4bf69007
AM
17175+ struct dl_info *dli;
17176+ __u64 blimit, bfree, bavail;
17177+ __u32 ifree;
ec22aa5c 17178+
4bf69007
AM
17179+ dli = locate_dl_info(sb, dx_current_tag());
17180+ if (!dli)
17181+ return;
ec22aa5c 17182+
4bf69007
AM
17183+ spin_lock(&dli->dl_lock);
17184+ if (dli->dl_inodes_total == (unsigned long)DLIM_INFINITY)
17185+ goto no_ilim;
ec22aa5c 17186+
4bf69007
AM
17187+ /* reduce max inodes available to limit */
17188+ if (buf->f_files > dli->dl_inodes_total)
17189+ buf->f_files = dli->dl_inodes_total;
ec22aa5c 17190+
4bf69007
AM
17191+ ifree = dli->dl_inodes_total - dli->dl_inodes_used;
17192+ /* reduce free inodes to min */
17193+ if (ifree < buf->f_ffree)
17194+ buf->f_ffree = ifree;
b2252bc2 17195+
4bf69007
AM
17196+no_ilim:
17197+ if (dli->dl_space_total == DLIM_INFINITY)
17198+ goto no_blim;
d337f35e 17199+
4bf69007 17200+ blimit = dli->dl_space_total >> sb->s_blocksize_bits;
d337f35e 17201+
4bf69007
AM
17202+ if (dli->dl_space_total < dli->dl_space_used)
17203+ bfree = 0;
17204+ else
17205+ bfree = (dli->dl_space_total - dli->dl_space_used)
17206+ >> sb->s_blocksize_bits;
d337f35e 17207+
4bf69007
AM
17208+ bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
17209+ if (bavail < dli->dl_space_used)
17210+ bavail = 0;
17211+ else
17212+ bavail = (bavail - dli->dl_space_used)
17213+ >> sb->s_blocksize_bits;
d337f35e 17214+
4bf69007
AM
17215+ /* reduce max space available to limit */
17216+ if (buf->f_blocks > blimit)
17217+ buf->f_blocks = blimit;
d337f35e 17218+
4bf69007
AM
17219+ /* reduce free space to min */
17220+ if (bfree < buf->f_bfree)
17221+ buf->f_bfree = bfree;
d337f35e 17222+
4bf69007
AM
17223+ /* reduce avail space to min */
17224+ if (bavail < buf->f_bavail)
17225+ buf->f_bavail = bavail;
d337f35e 17226+
4bf69007
AM
17227+no_blim:
17228+ spin_unlock(&dli->dl_lock);
17229+ put_dl_info(dli);
d337f35e 17230+
4bf69007 17231+ return;
d337f35e
JR
17232+}
17233+
4bf69007 17234+#include <linux/module.h>
d337f35e 17235+
4bf69007
AM
17236+EXPORT_SYMBOL_GPL(locate_dl_info);
17237+EXPORT_SYMBOL_GPL(rcu_free_dl_info);
e3afe727 17238+
09a55596
AM
17239diff -NurpP --minimal linux-4.9.135/kernel/vserver/helper.c linux-4.9.135-vs2.3.9.8/kernel/vserver/helper.c
17240--- linux-4.9.135/kernel/vserver/helper.c 1970-01-01 00:00:00.000000000 +0000
17241+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/helper.c 2018-10-20 04:58:15.000000000 +0000
09be7631 17242@@ -0,0 +1,242 @@
4bf69007
AM
17243+/*
17244+ * linux/kernel/vserver/helper.c
17245+ *
17246+ * Virtual Context Support
17247+ *
cc23e853 17248+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
17249+ *
17250+ * V0.01 basic helper
17251+ *
17252+ */
e3afe727 17253+
4bf69007
AM
17254+#include <linux/kmod.h>
17255+#include <linux/reboot.h>
17256+#include <linux/vs_context.h>
17257+#include <linux/vs_network.h>
17258+#include <linux/vserver/signal.h>
e3afe727 17259+
4bf69007
AM
17260+
17261+char vshelper_path[255] = "/sbin/vshelper";
17262+
17263+static int vshelper_init(struct subprocess_info *info, struct cred *new_cred)
17264+{
09be7631 17265+ current->flags &= ~PF_NO_SETAFFINITY;
4bf69007 17266+ return 0;
d337f35e
JR
17267+}
17268+
09be7631
JR
17269+static int vs_call_usermodehelper(char *path, char **argv, char **envp, int wait)
17270+{
17271+ struct subprocess_info *info;
17272+ gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
17273+
17274+ info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
17275+ vshelper_init, NULL, NULL);
17276+ if (info == NULL)
17277+ return -ENOMEM;
17278+
17279+ return call_usermodehelper_exec(info, wait);
17280+}
17281+
4bf69007 17282+static int do_vshelper(char *name, char *argv[], char *envp[], int sync)
d337f35e 17283+{
4bf69007 17284+ int ret;
e3afe727 17285+
09be7631
JR
17286+ if ((ret = vs_call_usermodehelper(name, argv, envp,
17287+ sync ? UMH_WAIT_PROC : UMH_WAIT_EXEC))) {
4bf69007
AM
17288+ printk(KERN_WARNING "%s: (%s %s) returned %s with %d\n",
17289+ name, argv[1], argv[2],
17290+ sync ? "sync" : "async", ret);
17291+ }
17292+ vxdprintk(VXD_CBIT(switch, 4),
17293+ "%s: (%s %s) returned %s with %d",
17294+ name, argv[1], argv[2], sync ? "sync" : "async", ret);
17295+ return ret;
17296+}
e3afe727 17297+
4bf69007
AM
17298+/*
17299+ * vshelper path is set via /proc/sys
17300+ * invoked by vserver sys_reboot(), with
17301+ * the following arguments
17302+ *
17303+ * argv [0] = vshelper_path;
17304+ * argv [1] = action: "restart", "halt", "poweroff", ...
17305+ * argv [2] = context identifier
17306+ *
17307+ * envp [*] = type-specific parameters
17308+ */
e3afe727 17309+
4bf69007
AM
17310+long vs_reboot_helper(struct vx_info *vxi, int cmd, void __user *arg)
17311+{
17312+ char id_buf[8], cmd_buf[16];
17313+ char uid_buf[16], pid_buf[16];
17314+ int ret;
e3afe727 17315+
4bf69007
AM
17316+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
17317+ char *envp[] = {"HOME=/", "TERM=linux",
17318+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
17319+ uid_buf, pid_buf, cmd_buf, 0};
e3afe727 17320+
4bf69007
AM
17321+ if (vx_info_state(vxi, VXS_HELPER))
17322+ return -EAGAIN;
17323+ vxi->vx_state |= VXS_HELPER;
7b17263b 17324+
4bf69007 17325+ snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
d337f35e 17326+
4bf69007 17327+ snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
8ce283e1
AM
17328+ snprintf(uid_buf, sizeof(uid_buf), "VS_UID=%d",
17329+ from_kuid(&init_user_ns, current_uid()));
4bf69007 17330+ snprintf(pid_buf, sizeof(pid_buf), "VS_PID=%d", current->pid);
e3afe727 17331+
4bf69007
AM
17332+ switch (cmd) {
17333+ case LINUX_REBOOT_CMD_RESTART:
17334+ argv[1] = "restart";
17335+ break;
07a627a5 17336+
4bf69007
AM
17337+ case LINUX_REBOOT_CMD_HALT:
17338+ argv[1] = "halt";
17339+ break;
e3afe727 17340+
4bf69007
AM
17341+ case LINUX_REBOOT_CMD_POWER_OFF:
17342+ argv[1] = "poweroff";
17343+ break;
d337f35e 17344+
4bf69007
AM
17345+ case LINUX_REBOOT_CMD_SW_SUSPEND:
17346+ argv[1] = "swsusp";
17347+ break;
d337f35e 17348+
4bf69007
AM
17349+ case LINUX_REBOOT_CMD_OOM:
17350+ argv[1] = "oom";
17351+ break;
d337f35e 17352+
4bf69007
AM
17353+ default:
17354+ vxi->vx_state &= ~VXS_HELPER;
17355+ return 0;
d337f35e 17356+ }
4bf69007
AM
17357+
17358+ ret = do_vshelper(vshelper_path, argv, envp, 0);
17359+ vxi->vx_state &= ~VXS_HELPER;
17360+ __wakeup_vx_info(vxi);
17361+ return (ret) ? -EPERM : 0;
d337f35e
JR
17362+}
17363+
4bf69007
AM
17364+
17365+long vs_reboot(unsigned int cmd, void __user *arg)
d337f35e 17366+{
4bf69007
AM
17367+ struct vx_info *vxi = current_vx_info();
17368+ long ret = 0;
d337f35e 17369+
4bf69007
AM
17370+ vxdprintk(VXD_CBIT(misc, 5),
17371+ "vs_reboot(%p[#%d],%u)",
17372+ vxi, vxi ? vxi->vx_id : 0, cmd);
17373+
17374+ ret = vs_reboot_helper(vxi, cmd, arg);
17375+ if (ret)
17376+ return ret;
17377+
17378+ vxi->reboot_cmd = cmd;
17379+ if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
17380+ switch (cmd) {
17381+ case LINUX_REBOOT_CMD_RESTART:
17382+ case LINUX_REBOOT_CMD_HALT:
17383+ case LINUX_REBOOT_CMD_POWER_OFF:
17384+ vx_info_kill(vxi, 0, SIGKILL);
17385+ vx_info_kill(vxi, 1, SIGKILL);
17386+ default:
17387+ break;
17388+ }
d337f35e 17389+ }
4bf69007 17390+ return 0;
d337f35e
JR
17391+}
17392+
4bf69007
AM
17393+long vs_oom_action(unsigned int cmd)
17394+{
17395+ struct vx_info *vxi = current_vx_info();
17396+ long ret = 0;
d337f35e 17397+
4bf69007
AM
17398+ vxdprintk(VXD_CBIT(misc, 5),
17399+ "vs_oom_action(%p[#%d],%u)",
17400+ vxi, vxi ? vxi->vx_id : 0, cmd);
d337f35e 17401+
4bf69007
AM
17402+ ret = vs_reboot_helper(vxi, cmd, NULL);
17403+ if (ret)
17404+ return ret;
d337f35e 17405+
4bf69007
AM
17406+ vxi->reboot_cmd = cmd;
17407+ if (vx_info_flags(vxi, VXF_REBOOT_KILL, 0)) {
17408+ vx_info_kill(vxi, 0, SIGKILL);
17409+ vx_info_kill(vxi, 1, SIGKILL);
17410+ }
17411+ return 0;
17412+}
d337f35e 17413+
4bf69007
AM
17414+/*
17415+ * argv [0] = vshelper_path;
17416+ * argv [1] = action: "startup", "shutdown"
17417+ * argv [2] = context identifier
17418+ *
17419+ * envp [*] = type-specific parameters
17420+ */
d337f35e 17421+
4bf69007 17422+long vs_state_change(struct vx_info *vxi, unsigned int cmd)
d337f35e 17423+{
4bf69007
AM
17424+ char id_buf[8], cmd_buf[16];
17425+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
17426+ char *envp[] = {"HOME=/", "TERM=linux",
17427+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
17428+
17429+ if (!vx_info_flags(vxi, VXF_SC_HELPER, 0))
17430+ return 0;
17431+
17432+ snprintf(id_buf, sizeof(id_buf), "%d", vxi->vx_id);
17433+ snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17434+
17435+ switch (cmd) {
17436+ case VSC_STARTUP:
17437+ argv[1] = "startup";
17438+ break;
17439+ case VSC_SHUTDOWN:
17440+ argv[1] = "shutdown";
17441+ break;
17442+ default:
17443+ return 0;
17444+ }
17445+
17446+ return do_vshelper(vshelper_path, argv, envp, 1);
d337f35e
JR
17447+}
17448+
d337f35e 17449+
4bf69007
AM
17450+/*
17451+ * argv [0] = vshelper_path;
17452+ * argv [1] = action: "netup", "netdown"
17453+ * argv [2] = context identifier
17454+ *
17455+ * envp [*] = type-specific parameters
17456+ */
17457+
17458+long vs_net_change(struct nx_info *nxi, unsigned int cmd)
17459+{
17460+ char id_buf[8], cmd_buf[16];
17461+ char *argv[] = {vshelper_path, NULL, id_buf, 0};
17462+ char *envp[] = {"HOME=/", "TERM=linux",
17463+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
17464+
17465+ if (!nx_info_flags(nxi, NXF_SC_HELPER, 0))
17466+ return 0;
17467+
17468+ snprintf(id_buf, sizeof(id_buf), "%d", nxi->nx_id);
17469+ snprintf(cmd_buf, sizeof(cmd_buf), "VS_CMD=%08x", cmd);
17470+
17471+ switch (cmd) {
17472+ case VSC_NETUP:
17473+ argv[1] = "netup";
17474+ break;
17475+ case VSC_NETDOWN:
17476+ argv[1] = "netdown";
17477+ break;
17478+ default:
17479+ return 0;
17480+ }
17481+
17482+ return do_vshelper(vshelper_path, argv, envp, 1);
17483+}
d337f35e 17484+
09a55596
AM
17485diff -NurpP --minimal linux-4.9.135/kernel/vserver/history.c linux-4.9.135-vs2.3.9.8/kernel/vserver/history.c
17486--- linux-4.9.135/kernel/vserver/history.c 1970-01-01 00:00:00.000000000 +0000
17487+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/history.c 2018-10-20 04:58:15.000000000 +0000
4bf69007 17488@@ -0,0 +1,258 @@
d337f35e 17489+/*
4bf69007 17490+ * kernel/vserver/history.c
d337f35e 17491+ *
4bf69007 17492+ * Virtual Context History Backtrace
d337f35e 17493+ *
cc23e853 17494+ * Copyright (C) 2004-2007 Herbert P?tzl
d337f35e 17495+ *
4bf69007
AM
17496+ * V0.01 basic structure
17497+ * V0.02 hash/unhash and trace
17498+ * V0.03 preemption fixes
d337f35e
JR
17499+ *
17500+ */
17501+
4bf69007
AM
17502+#include <linux/module.h>
17503+#include <asm/uaccess.h>
d337f35e 17504+
4bf69007
AM
17505+#include <linux/vserver/context.h>
17506+#include <linux/vserver/debug.h>
17507+#include <linux/vserver/debug_cmd.h>
17508+#include <linux/vserver/history.h>
d337f35e
JR
17509+
17510+
4bf69007
AM
17511+#ifdef CONFIG_VSERVER_HISTORY
17512+#define VXH_SIZE CONFIG_VSERVER_HISTORY_SIZE
17513+#else
17514+#define VXH_SIZE 64
17515+#endif
d337f35e 17516+
4bf69007
AM
17517+struct _vx_history {
17518+ unsigned int counter;
2380c486 17519+
4bf69007
AM
17520+ struct _vx_hist_entry entry[VXH_SIZE + 1];
17521+};
2380c486 17522+
2380c486 17523+
4bf69007 17524+DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
2380c486 17525+
4bf69007 17526+unsigned volatile int vxh_active = 1;
2380c486 17527+
4bf69007 17528+static atomic_t sequence = ATOMIC_INIT(0);
2380c486 17529+
2380c486 17530+
4bf69007 17531+/* vxh_advance()
2380c486 17532+
4bf69007
AM
17533+ * requires disabled preemption */
17534+
17535+struct _vx_hist_entry *vxh_advance(void *loc)
2380c486 17536+{
4bf69007
AM
17537+ unsigned int cpu = smp_processor_id();
17538+ struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
17539+ struct _vx_hist_entry *entry;
17540+ unsigned int index;
17541+
17542+ index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
17543+ entry = &hist->entry[index];
17544+
17545+ entry->seq = atomic_inc_return(&sequence);
17546+ entry->loc = loc;
17547+ return entry;
2380c486
JR
17548+}
17549+
4bf69007 17550+EXPORT_SYMBOL_GPL(vxh_advance);
2380c486 17551+
2380c486 17552+
4bf69007 17553+#define VXH_LOC_FMTS "(#%04x,*%d):%p"
2380c486 17554+
4bf69007 17555+#define VXH_LOC_ARGS(e) (e)->seq, cpu, (e)->loc
2380c486 17556+
2380c486 17557+
4bf69007 17558+#define VXH_VXI_FMTS "%p[#%d,%d.%d]"
2380c486 17559+
4bf69007
AM
17560+#define VXH_VXI_ARGS(e) (e)->vxi.ptr, \
17561+ (e)->vxi.ptr ? (e)->vxi.xid : 0, \
17562+ (e)->vxi.ptr ? (e)->vxi.usecnt : 0, \
17563+ (e)->vxi.ptr ? (e)->vxi.tasks : 0
17564+
17565+void vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
2380c486 17566+{
4bf69007
AM
17567+ switch (e->type) {
17568+ case VXH_THROW_OOPS:
17569+ printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
17570+ break;
2380c486 17571+
4bf69007
AM
17572+ case VXH_GET_VX_INFO:
17573+ case VXH_PUT_VX_INFO:
17574+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
17575+ VXH_LOC_ARGS(e),
17576+ (e->type == VXH_GET_VX_INFO) ? "get" : "put",
17577+ VXH_VXI_ARGS(e));
17578+ break;
2380c486 17579+
4bf69007
AM
17580+ case VXH_INIT_VX_INFO:
17581+ case VXH_SET_VX_INFO:
17582+ case VXH_CLR_VX_INFO:
17583+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
17584+ VXH_LOC_ARGS(e),
17585+ (e->type == VXH_INIT_VX_INFO) ? "init" :
17586+ ((e->type == VXH_SET_VX_INFO) ? "set" : "clr"),
17587+ VXH_VXI_ARGS(e), e->sc.data);
17588+ break;
2380c486 17589+
4bf69007
AM
17590+ case VXH_CLAIM_VX_INFO:
17591+ case VXH_RELEASE_VX_INFO:
17592+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
17593+ VXH_LOC_ARGS(e),
17594+ (e->type == VXH_CLAIM_VX_INFO) ? "claim" : "release",
17595+ VXH_VXI_ARGS(e), e->sc.data);
17596+ break;
2380c486 17597+
4bf69007
AM
17598+ case VXH_ALLOC_VX_INFO:
17599+ case VXH_DEALLOC_VX_INFO:
17600+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
17601+ VXH_LOC_ARGS(e),
17602+ (e->type == VXH_ALLOC_VX_INFO) ? "alloc" : "dealloc",
17603+ VXH_VXI_ARGS(e));
17604+ break;
2380c486 17605+
4bf69007
AM
17606+ case VXH_HASH_VX_INFO:
17607+ case VXH_UNHASH_VX_INFO:
17608+ printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
17609+ VXH_LOC_ARGS(e),
17610+ (e->type == VXH_HASH_VX_INFO) ? "hash" : "unhash",
17611+ VXH_VXI_ARGS(e));
17612+ break;
2380c486 17613+
4bf69007
AM
17614+ case VXH_LOC_VX_INFO:
17615+ case VXH_LOOKUP_VX_INFO:
17616+ case VXH_CREATE_VX_INFO:
17617+ printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
17618+ VXH_LOC_ARGS(e),
17619+ (e->type == VXH_CREATE_VX_INFO) ? "create" :
17620+ ((e->type == VXH_LOC_VX_INFO) ? "loc" : "lookup"),
17621+ e->ll.arg, VXH_VXI_ARGS(e));
17622+ break;
2380c486
JR
17623+ }
17624+}
17625+
4bf69007
AM
17626+static void __vxh_dump_history(void)
17627+{
17628+ unsigned int i, cpu;
d337f35e 17629+
4bf69007
AM
17630+ printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
17631+ atomic_read(&sequence), NR_CPUS);
d337f35e 17632+
4bf69007
AM
17633+ for (i = 0; i < VXH_SIZE; i++) {
17634+ for_each_online_cpu(cpu) {
17635+ struct _vx_history *hist =
17636+ &per_cpu(vx_history_buffer, cpu);
17637+ unsigned int index = (hist->counter - i) % VXH_SIZE;
17638+ struct _vx_hist_entry *entry = &hist->entry[index];
d337f35e 17639+
4bf69007
AM
17640+ vxh_dump_entry(entry, cpu);
17641+ }
17642+ }
17643+}
d337f35e 17644+
4bf69007
AM
17645+void vxh_dump_history(void)
17646+{
17647+ vxh_active = 0;
17648+#ifdef CONFIG_SMP
17649+ local_irq_enable();
17650+ smp_send_stop();
17651+ local_irq_disable();
17652+#endif
17653+ __vxh_dump_history();
17654+}
d337f35e 17655+
d337f35e 17656+
4bf69007 17657+/* vserver syscall commands below here */
d337f35e 17658+
d337f35e 17659+
4bf69007
AM
17660+int vc_dump_history(uint32_t id)
17661+{
17662+ vxh_active = 0;
17663+ __vxh_dump_history();
17664+ vxh_active = 1;
2380c486 17665+
4bf69007 17666+ return 0;
d337f35e
JR
17667+}
17668+
d337f35e 17669+
4bf69007
AM
17670+int do_read_history(struct __user _vx_hist_entry *data,
17671+ int cpu, uint32_t *index, uint32_t *count)
d337f35e 17672+{
4bf69007
AM
17673+ int pos, ret = 0;
17674+ struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
17675+ int end = hist->counter;
17676+ int start = end - VXH_SIZE + 2;
17677+ int idx = *index;
d337f35e 17678+
4bf69007
AM
17679+ /* special case: get current pos */
17680+ if (!*count) {
17681+ *index = end;
17682+ return 0;
17683+ }
d337f35e 17684+
4bf69007
AM
17685+ /* have we lost some data? */
17686+ if (idx < start)
17687+ idx = start;
d337f35e 17688+
4bf69007
AM
17689+ for (pos = 0; (pos < *count) && (idx < end); pos++, idx++) {
17690+ struct _vx_hist_entry *entry =
17691+ &hist->entry[idx % VXH_SIZE];
2380c486 17692+
4bf69007
AM
17693+ /* send entry to userspace */
17694+ ret = copy_to_user(&data[pos], entry, sizeof(*entry));
17695+ if (ret)
17696+ break;
17697+ }
17698+ /* save new index and count */
17699+ *index = idx;
17700+ *count = pos;
17701+ return ret ? ret : (*index < end);
d337f35e
JR
17702+}
17703+
4bf69007 17704+int vc_read_history(uint32_t id, void __user *data)
d337f35e 17705+{
4bf69007
AM
17706+ struct vcmd_read_history_v0 vc_data;
17707+ int ret;
d337f35e 17708+
4bf69007
AM
17709+ if (id >= NR_CPUS)
17710+ return -EINVAL;
d337f35e 17711+
4bf69007
AM
17712+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17713+ return -EFAULT;
d337f35e 17714+
4bf69007
AM
17715+ ret = do_read_history((struct __user _vx_hist_entry *)vc_data.data,
17716+ id, &vc_data.index, &vc_data.count);
d337f35e 17717+
4bf69007
AM
17718+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17719+ return -EFAULT;
17720+ return ret;
d337f35e
JR
17721+}
17722+
4bf69007 17723+#ifdef CONFIG_COMPAT
d337f35e 17724+
4bf69007 17725+int vc_read_history_x32(uint32_t id, void __user *data)
d337f35e 17726+{
4bf69007
AM
17727+ struct vcmd_read_history_v0_x32 vc_data;
17728+ int ret;
d337f35e 17729+
4bf69007
AM
17730+ if (id >= NR_CPUS)
17731+ return -EINVAL;
d337f35e 17732+
4bf69007
AM
17733+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
17734+ return -EFAULT;
2380c486 17735+
4bf69007
AM
17736+ ret = do_read_history((struct __user _vx_hist_entry *)
17737+ compat_ptr(vc_data.data_ptr),
17738+ id, &vc_data.index, &vc_data.count);
d337f35e 17739+
4bf69007
AM
17740+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
17741+ return -EFAULT;
17742+ return ret;
17743+}
d337f35e 17744+
4bf69007 17745+#endif /* CONFIG_COMPAT */
d337f35e 17746+
09a55596
AM
17747diff -NurpP --minimal linux-4.9.135/kernel/vserver/inet.c linux-4.9.135-vs2.3.9.8/kernel/vserver/inet.c
17748--- linux-4.9.135/kernel/vserver/inet.c 1970-01-01 00:00:00.000000000 +0000
17749+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/inet.c 2018-10-20 04:58:15.000000000 +0000
7a9e40b8 17750@@ -0,0 +1,236 @@
d337f35e 17751+
4bf69007
AM
17752+#include <linux/in.h>
17753+#include <linux/inetdevice.h>
17754+#include <linux/export.h>
17755+#include <linux/vs_inet.h>
17756+#include <linux/vs_inet6.h>
17757+#include <linux/vserver/debug.h>
17758+#include <net/route.h>
17759+#include <net/addrconf.h>
d337f35e
JR
17760+
17761+
4bf69007 17762+int nx_v4_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
d337f35e 17763+{
4bf69007
AM
17764+ int ret = 0;
17765+
17766+ if (!nxi1 || !nxi2 || nxi1 == nxi2)
17767+ ret = 1;
17768+ else {
17769+ struct nx_addr_v4 *ptr;
7a9e40b8 17770+ unsigned long irqflags;
d337f35e 17771+
7a9e40b8 17772+ spin_lock_irqsave(&nxi1->addr_lock, irqflags);
4bf69007
AM
17773+ for (ptr = &nxi1->v4; ptr; ptr = ptr->next) {
17774+ if (v4_nx_addr_in_nx_info(nxi2, ptr, -1)) {
17775+ ret = 1;
17776+ break;
17777+ }
17778+ }
7a9e40b8 17779+ spin_unlock_irqrestore(&nxi1->addr_lock, irqflags);
4bf69007 17780+ }
d337f35e 17781+
4bf69007
AM
17782+ vxdprintk(VXD_CBIT(net, 2),
17783+ "nx_v4_addr_conflict(%p,%p): %d",
17784+ nxi1, nxi2, ret);
d337f35e 17785+
4bf69007
AM
17786+ return ret;
17787+}
d337f35e 17788+
d337f35e 17789+
4bf69007
AM
17790+#ifdef CONFIG_IPV6
17791+
17792+int nx_v6_addr_conflict(struct nx_info *nxi1, struct nx_info *nxi2)
d337f35e 17793+{
4bf69007 17794+ int ret = 0;
d337f35e 17795+
4bf69007
AM
17796+ if (!nxi1 || !nxi2 || nxi1 == nxi2)
17797+ ret = 1;
17798+ else {
17799+ struct nx_addr_v6 *ptr;
7a9e40b8 17800+ unsigned long irqflags;
d337f35e 17801+
7a9e40b8 17802+ spin_lock_irqsave(&nxi1->addr_lock, irqflags);
4bf69007
AM
17803+ for (ptr = &nxi1->v6; ptr; ptr = ptr->next) {
17804+ if (v6_nx_addr_in_nx_info(nxi2, ptr, -1)) {
17805+ ret = 1;
17806+ break;
17807+ }
17808+ }
7a9e40b8 17809+ spin_unlock_irqrestore(&nxi1->addr_lock, irqflags);
4bf69007 17810+ }
d337f35e 17811+
4bf69007
AM
17812+ vxdprintk(VXD_CBIT(net, 2),
17813+ "nx_v6_addr_conflict(%p,%p): %d",
17814+ nxi1, nxi2, ret);
d337f35e 17815+
4bf69007
AM
17816+ return ret;
17817+}
d337f35e 17818+
4bf69007 17819+#endif
d337f35e 17820+
4bf69007 17821+int v4_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
d337f35e 17822+{
4bf69007
AM
17823+ struct in_device *in_dev;
17824+ struct in_ifaddr **ifap;
17825+ struct in_ifaddr *ifa;
17826+ int ret = 0;
d337f35e 17827+
4bf69007
AM
17828+ if (!dev)
17829+ goto out;
17830+ in_dev = in_dev_get(dev);
17831+ if (!in_dev)
17832+ goto out;
d337f35e 17833+
4bf69007
AM
17834+ for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
17835+ ifap = &ifa->ifa_next) {
17836+ if (v4_addr_in_nx_info(nxi, ifa->ifa_local, NXA_MASK_SHOW)) {
17837+ ret = 1;
17838+ break;
17839+ }
17840+ }
17841+ in_dev_put(in_dev);
17842+out:
17843+ return ret;
d337f35e
JR
17844+}
17845+
17846+
4bf69007 17847+#ifdef CONFIG_IPV6
d337f35e 17848+
4bf69007 17849+int v6_dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
d337f35e 17850+{
4bf69007
AM
17851+ struct inet6_dev *in_dev;
17852+ struct inet6_ifaddr *ifa;
17853+ int ret = 0;
d337f35e 17854+
4bf69007
AM
17855+ if (!dev)
17856+ goto out;
17857+ in_dev = in6_dev_get(dev);
17858+ if (!in_dev)
17859+ goto out;
d337f35e 17860+
4bf69007
AM
17861+ // for (ifap = &in_dev->addr_list; (ifa = *ifap) != NULL;
17862+ list_for_each_entry(ifa, &in_dev->addr_list, if_list) {
17863+ if (v6_addr_in_nx_info(nxi, &ifa->addr, -1)) {
17864+ ret = 1;
17865+ break;
17866+ }
d337f35e 17867+ }
4bf69007
AM
17868+ in6_dev_put(in_dev);
17869+out:
17870+ return ret;
d337f35e
JR
17871+}
17872+
4bf69007 17873+#endif
d337f35e 17874+
4bf69007
AM
17875+int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
17876+{
17877+ int ret = 1;
d337f35e 17878+
4bf69007
AM
17879+ if (!nxi)
17880+ goto out;
17881+ if (nxi->v4.type && v4_dev_in_nx_info(dev, nxi))
17882+ goto out;
17883+#ifdef CONFIG_IPV6
17884+ ret = 2;
17885+ if (nxi->v6.type && v6_dev_in_nx_info(dev, nxi))
17886+ goto out;
17887+#endif
17888+ ret = 0;
17889+out:
17890+ vxdprintk(VXD_CBIT(net, 3),
17891+ "dev_in_nx_info(%p,%p[#%d]) = %d",
17892+ dev, nxi, nxi ? nxi->nx_id : 0, ret);
17893+ return ret;
17894+}
d337f35e 17895+
4bf69007
AM
17896+struct rtable *ip_v4_find_src(struct net *net, struct nx_info *nxi,
17897+ struct flowi4 *fl4)
d337f35e 17898+{
4bf69007 17899+ struct rtable *rt;
d337f35e 17900+
4bf69007
AM
17901+ if (!nxi)
17902+ return NULL;
d337f35e 17903+
4bf69007
AM
17904+ /* FIXME: handle lback only case */
17905+ if (!NX_IPV4(nxi))
17906+ return ERR_PTR(-EPERM);
d337f35e 17907+
4bf69007
AM
17908+ vxdprintk(VXD_CBIT(net, 4),
17909+ "ip_v4_find_src(%p[#%u]) " NIPQUAD_FMT " -> " NIPQUAD_FMT,
17910+ nxi, nxi ? nxi->nx_id : 0,
17911+ NIPQUAD(fl4->saddr), NIPQUAD(fl4->daddr));
d337f35e 17912+
4bf69007
AM
17913+ /* single IP is unconditional */
17914+ if (nx_info_flags(nxi, NXF_SINGLE_IP, 0) &&
17915+ (fl4->saddr == INADDR_ANY))
17916+ fl4->saddr = nxi->v4.ip[0].s_addr;
d337f35e 17917+
4bf69007
AM
17918+ if (fl4->saddr == INADDR_ANY) {
17919+ struct nx_addr_v4 *ptr;
17920+ __be32 found = 0;
17921+
17922+ rt = __ip_route_output_key(net, fl4);
17923+ if (!IS_ERR(rt)) {
17924+ found = fl4->saddr;
17925+ ip_rt_put(rt);
17926+ vxdprintk(VXD_CBIT(net, 4),
17927+ "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
17928+ nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(found));
17929+ if (v4_addr_in_nx_info(nxi, found, NXA_MASK_BIND))
17930+ goto found;
17931+ }
d337f35e 17932+
8d50a2ea 17933+ WARN_ON_ONCE(in_irq());
b00e13aa 17934+ spin_lock_bh(&nxi->addr_lock);
4bf69007
AM
17935+ for (ptr = &nxi->v4; ptr; ptr = ptr->next) {
17936+ __be32 primary = ptr->ip[0].s_addr;
17937+ __be32 mask = ptr->mask.s_addr;
17938+ __be32 neta = primary & mask;
d337f35e 17939+
4bf69007
AM
17940+ vxdprintk(VXD_CBIT(net, 4), "ip_v4_find_src(%p[#%u]) chk: "
17941+ NIPQUAD_FMT "/" NIPQUAD_FMT "/" NIPQUAD_FMT,
17942+ nxi, nxi ? nxi->nx_id : 0, NIPQUAD(primary),
17943+ NIPQUAD(mask), NIPQUAD(neta));
17944+ if ((found & mask) != neta)
17945+ continue;
d337f35e 17946+
4bf69007
AM
17947+ fl4->saddr = primary;
17948+ rt = __ip_route_output_key(net, fl4);
17949+ vxdprintk(VXD_CBIT(net, 4),
17950+ "ip_v4_find_src(%p[#%u]) rok[%u]: " NIPQUAD_FMT,
17951+ nxi, nxi ? nxi->nx_id : 0, fl4->flowi4_oif, NIPQUAD(primary));
17952+ if (!IS_ERR(rt)) {
17953+ found = fl4->saddr;
17954+ ip_rt_put(rt);
17955+ if (found == primary)
5cb1760b 17956+ goto found_unlock;
4bf69007
AM
17957+ }
17958+ }
17959+ /* still no source ip? */
17960+ found = ipv4_is_loopback(fl4->daddr)
17961+ ? IPI_LOOPBACK : nxi->v4.ip[0].s_addr;
5cb1760b 17962+ found_unlock:
b00e13aa 17963+ spin_unlock_bh(&nxi->addr_lock);
4bf69007
AM
17964+ found:
17965+ /* assign src ip to flow */
17966+ fl4->saddr = found;
17967+
17968+ } else {
17969+ if (!v4_addr_in_nx_info(nxi, fl4->saddr, NXA_MASK_BIND))
17970+ return ERR_PTR(-EPERM);
17971+ }
d337f35e 17972+
4bf69007
AM
17973+ if (nx_info_flags(nxi, NXF_LBACK_REMAP, 0)) {
17974+ if (ipv4_is_loopback(fl4->daddr))
17975+ fl4->daddr = nxi->v4_lback.s_addr;
17976+ if (ipv4_is_loopback(fl4->saddr))
17977+ fl4->saddr = nxi->v4_lback.s_addr;
17978+ } else if (ipv4_is_loopback(fl4->daddr) &&
17979+ !nx_info_flags(nxi, NXF_LBACK_ALLOW, 0))
17980+ return ERR_PTR(-EPERM);
d337f35e 17981+
4bf69007 17982+ return NULL;
d337f35e
JR
17983+}
17984+
4bf69007 17985+EXPORT_SYMBOL_GPL(ip_v4_find_src);
d337f35e 17986+
09a55596
AM
17987diff -NurpP --minimal linux-4.9.135/kernel/vserver/init.c linux-4.9.135-vs2.3.9.8/kernel/vserver/init.c
17988--- linux-4.9.135/kernel/vserver/init.c 1970-01-01 00:00:00.000000000 +0000
17989+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/init.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 17990@@ -0,0 +1,46 @@
4bf69007
AM
17991+/*
17992+ * linux/kernel/init.c
17993+ *
17994+ * Virtual Server Init
17995+ *
cc23e853 17996+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
17997+ *
17998+ * V0.01 basic structure
17999+ *
18000+ */
d337f35e 18001+
4bf69007 18002+#include <linux/init.h>
cc23e853 18003+#include <linux/module.h>
4bf69007
AM
18004+
18005+int vserver_register_sysctl(void);
18006+void vserver_unregister_sysctl(void);
18007+
18008+
18009+static int __init init_vserver(void)
d337f35e 18010+{
4bf69007 18011+ int ret = 0;
d337f35e 18012+
4bf69007
AM
18013+#ifdef CONFIG_VSERVER_DEBUG
18014+ vserver_register_sysctl();
18015+#endif
18016+ return ret;
d337f35e
JR
18017+}
18018+
d337f35e 18019+
4bf69007 18020+static void __exit exit_vserver(void)
d337f35e 18021+{
d337f35e 18022+
4bf69007
AM
18023+#ifdef CONFIG_VSERVER_DEBUG
18024+ vserver_unregister_sysctl();
18025+#endif
18026+ return;
d337f35e
JR
18027+}
18028+
4bf69007
AM
18029+/* FIXME: GFP_ZONETYPES gone
18030+long vx_slab[GFP_ZONETYPES]; */
18031+long vx_area;
d337f35e 18032+
d337f35e 18033+
4bf69007
AM
18034+module_init(init_vserver);
18035+module_exit(exit_vserver);
d337f35e 18036+
09a55596
AM
18037diff -NurpP --minimal linux-4.9.135/kernel/vserver/inode.c linux-4.9.135-vs2.3.9.8/kernel/vserver/inode.c
18038--- linux-4.9.135/kernel/vserver/inode.c 1970-01-01 00:00:00.000000000 +0000
18039+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/inode.c 2018-10-20 04:58:15.000000000 +0000
09be7631 18040@@ -0,0 +1,440 @@
4bf69007
AM
18041+/*
18042+ * linux/kernel/vserver/inode.c
18043+ *
18044+ * Virtual Server: File System Support
18045+ *
cc23e853 18046+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
18047+ *
18048+ * V0.01 separated from vcontext V0.05
18049+ * V0.02 moved to tag (instead of xid)
18050+ *
18051+ */
d337f35e 18052+
4bf69007
AM
18053+#include <linux/tty.h>
18054+#include <linux/proc_fs.h>
18055+#include <linux/devpts_fs.h>
18056+#include <linux/fs.h>
18057+#include <linux/file.h>
18058+#include <linux/mount.h>
18059+#include <linux/parser.h>
18060+#include <linux/namei.h>
09be7631
JR
18061+#include <linux/magic.h>
18062+#include <linux/slab.h>
4bf69007
AM
18063+#include <linux/vserver/inode.h>
18064+#include <linux/vserver/inode_cmd.h>
18065+#include <linux/vs_base.h>
18066+#include <linux/vs_tag.h>
d337f35e 18067+
4bf69007 18068+#include <asm/uaccess.h>
09be7631 18069+#include <../../fs/proc/internal.h>
d337f35e 18070+
d337f35e 18071+
4bf69007 18072+static int __vc_get_iattr(struct inode *in, uint32_t *tag, uint32_t *flags, uint32_t *mask)
d337f35e 18073+{
4bf69007 18074+ struct proc_dir_entry *entry;
d337f35e 18075+
4bf69007
AM
18076+ if (!in || !in->i_sb)
18077+ return -ESRCH;
d337f35e 18078+
4bf69007
AM
18079+ *flags = IATTR_TAG
18080+ | (IS_IMMUTABLE(in) ? IATTR_IMMUTABLE : 0)
18081+ | (IS_IXUNLINK(in) ? IATTR_IXUNLINK : 0)
18082+ | (IS_BARRIER(in) ? IATTR_BARRIER : 0)
18083+ | (IS_COW(in) ? IATTR_COW : 0);
18084+ *mask = IATTR_IXUNLINK | IATTR_IMMUTABLE | IATTR_COW;
d337f35e 18085+
4bf69007
AM
18086+ if (S_ISDIR(in->i_mode))
18087+ *mask |= IATTR_BARRIER;
d337f35e 18088+
4bf69007
AM
18089+ if (IS_TAGGED(in)) {
18090+ *tag = i_tag_read(in);
18091+ *mask |= IATTR_TAG;
18092+ }
2380c486 18093+
4bf69007
AM
18094+ switch (in->i_sb->s_magic) {
18095+ case PROC_SUPER_MAGIC:
18096+ entry = PROC_I(in)->pde;
d337f35e 18097+
4bf69007
AM
18098+ /* check for specific inodes? */
18099+ if (entry)
18100+ *mask |= IATTR_FLAGS;
18101+ if (entry)
18102+ *flags |= (entry->vx_flags & IATTR_FLAGS);
18103+ else
18104+ *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
18105+ break;
d337f35e 18106+
4bf69007
AM
18107+ case DEVPTS_SUPER_MAGIC:
18108+ *tag = i_tag_read(in);
18109+ *mask |= IATTR_TAG;
18110+ break;
d337f35e 18111+
4bf69007
AM
18112+ default:
18113+ break;
18114+ }
18115+ return 0;
d337f35e
JR
18116+}
18117+
4bf69007 18118+int vc_get_iattr(void __user *data)
d337f35e 18119+{
4bf69007
AM
18120+ struct path path;
18121+ struct vcmd_ctx_iattr_v1 vc_data = { .tag = -1 };
18122+ int ret;
d337f35e 18123+
4bf69007
AM
18124+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18125+ return -EFAULT;
d337f35e 18126+
4bf69007
AM
18127+ ret = user_lpath(vc_data.name, &path);
18128+ if (!ret) {
18129+ ret = __vc_get_iattr(path.dentry->d_inode,
18130+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18131+ path_put(&path);
18132+ }
18133+ if (ret)
18134+ return ret;
d337f35e 18135+
4bf69007
AM
18136+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18137+ ret = -EFAULT;
18138+ return ret;
d337f35e
JR
18139+}
18140+
4bf69007 18141+#ifdef CONFIG_COMPAT
d337f35e 18142+
4bf69007 18143+int vc_get_iattr_x32(void __user *data)
d337f35e 18144+{
4bf69007
AM
18145+ struct path path;
18146+ struct vcmd_ctx_iattr_v1_x32 vc_data = { .tag = -1 };
18147+ int ret;
d337f35e 18148+
4bf69007
AM
18149+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18150+ return -EFAULT;
d337f35e 18151+
4bf69007
AM
18152+ ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
18153+ if (!ret) {
18154+ ret = __vc_get_iattr(path.dentry->d_inode,
18155+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18156+ path_put(&path);
18157+ }
18158+ if (ret)
18159+ return ret;
d337f35e 18160+
2380c486 18161+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
4bf69007
AM
18162+ ret = -EFAULT;
18163+ return ret;
d337f35e
JR
18164+}
18165+
4bf69007 18166+#endif /* CONFIG_COMPAT */
d337f35e 18167+
d337f35e 18168+
4bf69007 18169+int vc_fget_iattr(uint32_t fd, void __user *data)
d337f35e 18170+{
4bf69007
AM
18171+ struct file *filp;
18172+ struct vcmd_ctx_fiattr_v0 vc_data = { .tag = -1 };
d337f35e
JR
18173+ int ret;
18174+
4bf69007 18175+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
18176+ return -EFAULT;
18177+
4bf69007 18178+ filp = fget(fd);
cc23e853 18179+ if (!filp || !filp->f_path.dentry || !filp->f_path.dentry->d_inode)
4bf69007 18180+ return -EBADF;
2380c486 18181+
cc23e853 18182+ ret = __vc_get_iattr(filp->f_path.dentry->d_inode,
4bf69007 18183+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
2380c486 18184+
4bf69007 18185+ fput(filp);
2380c486 18186+
4bf69007
AM
18187+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18188+ ret = -EFAULT;
d337f35e
JR
18189+ return ret;
18190+}
18191+
18192+
4bf69007 18193+static int __vc_set_iattr(struct dentry *de, uint32_t *tag, uint32_t *flags, uint32_t *mask)
2380c486 18194+{
4bf69007
AM
18195+ struct inode *in = de->d_inode;
18196+ int error = 0, is_proc = 0, has_tag = 0;
18197+ struct iattr attr = { 0 };
2380c486 18198+
4bf69007
AM
18199+ if (!in || !in->i_sb)
18200+ return -ESRCH;
2380c486 18201+
4bf69007
AM
18202+ is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
18203+ if ((*mask & IATTR_FLAGS) && !is_proc)
18204+ return -EINVAL;
2380c486 18205+
4bf69007
AM
18206+ has_tag = IS_TAGGED(in) ||
18207+ (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
18208+ if ((*mask & IATTR_TAG) && !has_tag)
18209+ return -EINVAL;
2380c486 18210+
cc23e853 18211+ inode_lock(in);
4bf69007 18212+ if (*mask & IATTR_TAG) {
8ce283e1 18213+ attr.ia_tag = make_ktag(&init_user_ns, *tag);
4bf69007 18214+ attr.ia_valid |= ATTR_TAG;
2380c486
JR
18215+ }
18216+
4bf69007
AM
18217+ if (*mask & IATTR_FLAGS) {
18218+ struct proc_dir_entry *entry = PROC_I(in)->pde;
18219+ unsigned int iflags = PROC_I(in)->vx_flags;
2380c486 18220+
4bf69007
AM
18221+ iflags = (iflags & ~(*mask & IATTR_FLAGS))
18222+ | (*flags & IATTR_FLAGS);
18223+ PROC_I(in)->vx_flags = iflags;
18224+ if (entry)
18225+ entry->vx_flags = iflags;
18226+ }
9f7054f1 18227+
4bf69007
AM
18228+ if (*mask & (IATTR_IMMUTABLE | IATTR_IXUNLINK |
18229+ IATTR_BARRIER | IATTR_COW)) {
18230+ int iflags = in->i_flags;
18231+ int vflags = in->i_vflags;
9f7054f1 18232+
4bf69007
AM
18233+ if (*mask & IATTR_IMMUTABLE) {
18234+ if (*flags & IATTR_IMMUTABLE)
18235+ iflags |= S_IMMUTABLE;
18236+ else
18237+ iflags &= ~S_IMMUTABLE;
18238+ }
18239+ if (*mask & IATTR_IXUNLINK) {
18240+ if (*flags & IATTR_IXUNLINK)
18241+ iflags |= S_IXUNLINK;
18242+ else
18243+ iflags &= ~S_IXUNLINK;
18244+ }
18245+ if (S_ISDIR(in->i_mode) && (*mask & IATTR_BARRIER)) {
18246+ if (*flags & IATTR_BARRIER)
18247+ vflags |= V_BARRIER;
18248+ else
18249+ vflags &= ~V_BARRIER;
18250+ }
18251+ if (S_ISREG(in->i_mode) && (*mask & IATTR_COW)) {
18252+ if (*flags & IATTR_COW)
18253+ vflags |= V_COW;
18254+ else
18255+ vflags &= ~V_COW;
18256+ }
18257+ if (in->i_op && in->i_op->sync_flags) {
18258+ error = in->i_op->sync_flags(in, iflags, vflags);
18259+ if (error)
18260+ goto out;
18261+ }
18262+ }
9f7054f1 18263+
4bf69007
AM
18264+ if (attr.ia_valid) {
18265+ if (in->i_op && in->i_op->setattr)
18266+ error = in->i_op->setattr(de, &attr);
18267+ else {
cc23e853 18268+ error = setattr_prepare(de, &attr);
4bf69007
AM
18269+ if (!error) {
18270+ setattr_copy(in, &attr);
18271+ mark_inode_dirty(in);
18272+ }
18273+ }
9f7054f1 18274+ }
9f7054f1 18275+
4bf69007 18276+out:
cc23e853 18277+ inode_unlock(in);
4bf69007
AM
18278+ return error;
18279+}
2380c486 18280+
4bf69007 18281+int vc_set_iattr(void __user *data)
d337f35e 18282+{
4bf69007
AM
18283+ struct path path;
18284+ struct vcmd_ctx_iattr_v1 vc_data;
18285+ int ret;
d337f35e 18286+
4bf69007
AM
18287+ if (!capable(CAP_LINUX_IMMUTABLE))
18288+ return -EPERM;
18289+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
18290+ return -EFAULT;
18291+
4bf69007
AM
18292+ ret = user_lpath(vc_data.name, &path);
18293+ if (!ret) {
18294+ ret = __vc_set_iattr(path.dentry,
18295+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18296+ path_put(&path);
d337f35e 18297+ }
4bf69007
AM
18298+
18299+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18300+ ret = -EFAULT;
d337f35e
JR
18301+ return ret;
18302+}
18303+
4bf69007
AM
18304+#ifdef CONFIG_COMPAT
18305+
18306+int vc_set_iattr_x32(void __user *data)
d337f35e 18307+{
4bf69007
AM
18308+ struct path path;
18309+ struct vcmd_ctx_iattr_v1_x32 vc_data;
18310+ int ret;
d337f35e 18311+
4bf69007
AM
18312+ if (!capable(CAP_LINUX_IMMUTABLE))
18313+ return -EPERM;
18314+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486
JR
18315+ return -EFAULT;
18316+
4bf69007
AM
18317+ ret = user_lpath(compat_ptr(vc_data.name_ptr), &path);
18318+ if (!ret) {
18319+ ret = __vc_set_iattr(path.dentry,
18320+ &vc_data.tag, &vc_data.flags, &vc_data.mask);
18321+ path_put(&path);
2380c486 18322+ }
4bf69007
AM
18323+
18324+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18325+ ret = -EFAULT;
18326+ return ret;
2380c486
JR
18327+}
18328+
4bf69007 18329+#endif /* CONFIG_COMPAT */
2380c486 18330+
4bf69007 18331+int vc_fset_iattr(uint32_t fd, void __user *data)
2380c486 18332+{
4bf69007
AM
18333+ struct file *filp;
18334+ struct vcmd_ctx_fiattr_v0 vc_data;
18335+ int ret;
2380c486 18336+
4bf69007
AM
18337+ if (!capable(CAP_LINUX_IMMUTABLE))
18338+ return -EPERM;
18339+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486
JR
18340+ return -EFAULT;
18341+
4bf69007 18342+ filp = fget(fd);
cc23e853 18343+ if (!filp || !filp->f_path.dentry || !filp->f_path.dentry->d_inode)
4bf69007 18344+ return -EBADF;
2380c486 18345+
cc23e853 18346+ ret = __vc_set_iattr(filp->f_path.dentry, &vc_data.tag,
4bf69007 18347+ &vc_data.flags, &vc_data.mask);
2380c486 18348+
4bf69007 18349+ fput(filp);
2380c486 18350+
4bf69007
AM
18351+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18352+ return -EFAULT;
18353+ return ret;
2380c486
JR
18354+}
18355+
2380c486 18356+
4bf69007 18357+enum { Opt_notagcheck, Opt_tag, Opt_notag, Opt_tagid, Opt_err };
2380c486 18358+
4bf69007
AM
18359+static match_table_t tokens = {
18360+ {Opt_notagcheck, "notagcheck"},
18361+#ifdef CONFIG_PROPAGATE
18362+ {Opt_notag, "notag"},
18363+ {Opt_tag, "tag"},
18364+ {Opt_tagid, "tagid=%u"},
18365+#endif
18366+ {Opt_err, NULL}
18367+};
2380c486 18368+
9f7054f1 18369+
4bf69007
AM
18370+static void __dx_parse_remove(char *string, char *opt)
18371+{
18372+ char *p = strstr(string, opt);
18373+ char *q = p;
2380c486 18374+
4bf69007
AM
18375+ if (p) {
18376+ while (*q != '\0' && *q != ',')
18377+ q++;
18378+ while (*q)
18379+ *p++ = *q++;
18380+ while (*p)
18381+ *p++ = '\0';
2380c486 18382+ }
2380c486
JR
18383+}
18384+
61333608 18385+int dx_parse_tag(char *string, vtag_t *tag, int remove, int *mnt_flags,
4bf69007 18386+ unsigned long *flags)
9f7054f1 18387+{
4bf69007
AM
18388+ int set = 0;
18389+ substring_t args[MAX_OPT_ARGS];
18390+ int token;
18391+ char *s, *p, *opts;
18392+#if defined(CONFIG_PROPAGATE) || defined(CONFIG_VSERVER_DEBUG)
18393+ int option = 0;
18394+#endif
9f7054f1 18395+
4bf69007
AM
18396+ if (!string)
18397+ return 0;
18398+ s = kstrdup(string, GFP_KERNEL | GFP_ATOMIC);
18399+ if (!s)
18400+ return 0;
9f7054f1 18401+
4bf69007
AM
18402+ opts = s;
18403+ while ((p = strsep(&opts, ",")) != NULL) {
18404+ token = match_token(p, tokens, args);
9f7054f1 18405+
4bf69007
AM
18406+ switch (token) {
18407+#ifdef CONFIG_PROPAGATE
18408+ case Opt_tag:
18409+ if (tag)
18410+ *tag = 0;
18411+ if (remove)
18412+ __dx_parse_remove(s, "tag");
18413+ *mnt_flags |= MNT_TAGID;
18414+ set |= MNT_TAGID;
18415+ break;
18416+ case Opt_notag:
18417+ if (remove)
18418+ __dx_parse_remove(s, "notag");
18419+ *mnt_flags |= MNT_NOTAG;
18420+ set |= MNT_NOTAG;
18421+ break;
18422+ case Opt_tagid:
18423+ if (tag && !match_int(args, &option))
18424+ *tag = option;
18425+ if (remove)
18426+ __dx_parse_remove(s, "tagid");
18427+ *mnt_flags |= MNT_TAGID;
18428+ set |= MNT_TAGID;
18429+ break;
18430+#endif /* CONFIG_PROPAGATE */
18431+ case Opt_notagcheck:
18432+ if (remove)
18433+ __dx_parse_remove(s, "notagcheck");
18434+ *flags |= MS_NOTAGCHECK;
18435+ set |= MS_NOTAGCHECK;
18436+ break;
18437+ }
18438+ vxdprintk(VXD_CBIT(tag, 7),
18439+ "dx_parse_tag(" VS_Q("%s") "): %d:#%d",
18440+ p, token, option);
18441+ }
18442+ if (set)
18443+ strcpy(string, s);
18444+ kfree(s);
18445+ return set;
9f7054f1 18446+}
2380c486 18447+
4bf69007 18448+#ifdef CONFIG_PROPAGATE
2380c486 18449+
4bf69007 18450+void __dx_propagate_tag(struct nameidata *nd, struct inode *inode)
2380c486 18451+{
61333608 18452+ vtag_t new_tag = 0;
4bf69007
AM
18453+ struct vfsmount *mnt;
18454+ int propagate;
2380c486 18455+
4bf69007
AM
18456+ if (!nd)
18457+ return;
18458+ mnt = nd->path.mnt;
18459+ if (!mnt)
18460+ return;
2380c486 18461+
4bf69007
AM
18462+ propagate = (mnt->mnt_flags & MNT_TAGID);
18463+ if (propagate)
18464+ new_tag = mnt->mnt_tag;
2380c486 18465+
4bf69007
AM
18466+ vxdprintk(VXD_CBIT(tag, 7),
18467+ "dx_propagate_tag(%p[#%lu.%d]): %d,%d",
18468+ inode, inode->i_ino, inode->i_tag,
18469+ new_tag, (propagate) ? 1 : 0);
18470+
18471+ if (propagate)
18472+ i_tag_write(inode, new_tag);
2380c486
JR
18473+}
18474+
4bf69007 18475+#include <linux/module.h>
2380c486 18476+
4bf69007 18477+EXPORT_SYMBOL_GPL(__dx_propagate_tag);
2380c486 18478+
4bf69007 18479+#endif /* CONFIG_PROPAGATE */
2380c486 18480+
09a55596
AM
18481diff -NurpP --minimal linux-4.9.135/kernel/vserver/limit.c linux-4.9.135-vs2.3.9.8/kernel/vserver/limit.c
18482--- linux-4.9.135/kernel/vserver/limit.c 1970-01-01 00:00:00.000000000 +0000
18483+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/limit.c 2018-10-20 04:58:15.000000000 +0000
369dbd59 18484@@ -0,0 +1,386 @@
4bf69007
AM
18485+/*
18486+ * linux/kernel/vserver/limit.c
18487+ *
18488+ * Virtual Server: Context Limits
18489+ *
cc23e853 18490+ * Copyright (C) 2004-2010 Herbert P?tzl
4bf69007
AM
18491+ *
18492+ * V0.01 broken out from vcontext V0.05
18493+ * V0.02 changed vcmds to vxi arg
18494+ * V0.03 added memory cgroup support
18495+ *
18496+ */
2380c486 18497+
4bf69007
AM
18498+#include <linux/sched.h>
18499+#include <linux/module.h>
18500+#include <linux/memcontrol.h>
cc23e853 18501+#include <linux/page_counter.h>
4bf69007
AM
18502+#include <linux/vs_limit.h>
18503+#include <linux/vserver/limit.h>
18504+#include <linux/vserver/limit_cmd.h>
2380c486 18505+
4bf69007 18506+#include <asm/uaccess.h>
d337f35e 18507+
d337f35e 18508+
4bf69007
AM
18509+const char *vlimit_name[NUM_LIMITS] = {
18510+ [RLIMIT_CPU] = "CPU",
18511+ [RLIMIT_NPROC] = "NPROC",
18512+ [RLIMIT_NOFILE] = "NOFILE",
18513+ [RLIMIT_LOCKS] = "LOCKS",
18514+ [RLIMIT_SIGPENDING] = "SIGP",
18515+ [RLIMIT_MSGQUEUE] = "MSGQ",
d337f35e 18516+
4bf69007
AM
18517+ [VLIMIT_NSOCK] = "NSOCK",
18518+ [VLIMIT_OPENFD] = "OPENFD",
18519+ [VLIMIT_SHMEM] = "SHMEM",
18520+ [VLIMIT_DENTRY] = "DENTRY",
18521+};
2380c486 18522+
4bf69007 18523+EXPORT_SYMBOL_GPL(vlimit_name);
2380c486 18524+
4bf69007 18525+#define MASK_ENTRY(x) (1 << (x))
d337f35e 18526+
4bf69007
AM
18527+const struct vcmd_ctx_rlimit_mask_v0 vlimit_mask = {
18528+ /* minimum */
18529+ 0
18530+ , /* softlimit */
18531+ 0
18532+ , /* maximum */
18533+ MASK_ENTRY( RLIMIT_NPROC ) |
18534+ MASK_ENTRY( RLIMIT_NOFILE ) |
18535+ MASK_ENTRY( RLIMIT_LOCKS ) |
18536+ MASK_ENTRY( RLIMIT_MSGQUEUE ) |
d337f35e 18537+
4bf69007
AM
18538+ MASK_ENTRY( VLIMIT_NSOCK ) |
18539+ MASK_ENTRY( VLIMIT_OPENFD ) |
18540+ MASK_ENTRY( VLIMIT_SHMEM ) |
18541+ MASK_ENTRY( VLIMIT_DENTRY ) |
18542+ 0
18543+};
18544+ /* accounting only */
18545+uint32_t account_mask =
18546+ MASK_ENTRY( VLIMIT_SEMARY ) |
18547+ MASK_ENTRY( VLIMIT_NSEMS ) |
18548+ MASK_ENTRY( VLIMIT_MAPPED ) |
18549+ 0;
d337f35e 18550+
4bf69007
AM
18551+
18552+static int is_valid_vlimit(int id)
18553+{
18554+ uint32_t mask = vlimit_mask.minimum |
18555+ vlimit_mask.softlimit | vlimit_mask.maximum;
18556+ return mask & (1 << id);
d337f35e
JR
18557+}
18558+
4bf69007 18559+static int is_accounted_vlimit(int id)
d337f35e 18560+{
4bf69007
AM
18561+ if (is_valid_vlimit(id))
18562+ return 1;
18563+ return account_mask & (1 << id);
18564+}
d337f35e 18565+
d337f35e 18566+
4bf69007
AM
18567+static inline uint64_t vc_get_soft(struct vx_info *vxi, int id)
18568+{
18569+ rlim_t limit = __rlim_soft(&vxi->limit, id);
18570+ return VX_VLIM(limit);
18571+}
d337f35e 18572+
4bf69007
AM
18573+static inline uint64_t vc_get_hard(struct vx_info *vxi, int id)
18574+{
18575+ rlim_t limit = __rlim_hard(&vxi->limit, id);
18576+ return VX_VLIM(limit);
18577+}
d337f35e 18578+
4bf69007
AM
18579+static int do_get_rlimit(struct vx_info *vxi, uint32_t id,
18580+ uint64_t *minimum, uint64_t *softlimit, uint64_t *maximum)
18581+{
18582+ if (!is_valid_vlimit(id))
18583+ return -EINVAL;
18584+
18585+ if (minimum)
18586+ *minimum = CRLIM_UNSET;
18587+ if (softlimit)
18588+ *softlimit = vc_get_soft(vxi, id);
18589+ if (maximum)
18590+ *maximum = vc_get_hard(vxi, id);
d337f35e
JR
18591+ return 0;
18592+}
18593+
4bf69007 18594+int vc_get_rlimit(struct vx_info *vxi, void __user *data)
d337f35e 18595+{
4bf69007
AM
18596+ struct vcmd_ctx_rlimit_v0 vc_data;
18597+ int ret;
d337f35e 18598+
4bf69007
AM
18599+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18600+ return -EFAULT;
18601+
18602+ ret = do_get_rlimit(vxi, vc_data.id,
18603+ &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
18604+ if (ret)
18605+ return ret;
d337f35e 18606+
2380c486 18607+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
d337f35e
JR
18608+ return -EFAULT;
18609+ return 0;
18610+}
18611+
4bf69007
AM
18612+static int do_set_rlimit(struct vx_info *vxi, uint32_t id,
18613+ uint64_t minimum, uint64_t softlimit, uint64_t maximum)
d337f35e 18614+{
4bf69007
AM
18615+ if (!is_valid_vlimit(id))
18616+ return -EINVAL;
d337f35e 18617+
4bf69007
AM
18618+ if (maximum != CRLIM_KEEP)
18619+ __rlim_hard(&vxi->limit, id) = VX_RLIM(maximum);
18620+ if (softlimit != CRLIM_KEEP)
18621+ __rlim_soft(&vxi->limit, id) = VX_RLIM(softlimit);
18622+
18623+ /* clamp soft limit */
18624+ if (__rlim_soft(&vxi->limit, id) > __rlim_hard(&vxi->limit, id))
18625+ __rlim_soft(&vxi->limit, id) = __rlim_hard(&vxi->limit, id);
d337f35e 18626+
d337f35e
JR
18627+ return 0;
18628+}
18629+
4bf69007
AM
18630+int vc_set_rlimit(struct vx_info *vxi, void __user *data)
18631+{
18632+ struct vcmd_ctx_rlimit_v0 vc_data;
d337f35e 18633+
4bf69007
AM
18634+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18635+ return -EFAULT;
d337f35e 18636+
4bf69007
AM
18637+ return do_set_rlimit(vxi, vc_data.id,
18638+ vc_data.minimum, vc_data.softlimit, vc_data.maximum);
18639+}
d337f35e 18640+
4bf69007 18641+#ifdef CONFIG_IA32_EMULATION
2380c486 18642+
4bf69007
AM
18643+int vc_set_rlimit_x32(struct vx_info *vxi, void __user *data)
18644+{
18645+ struct vcmd_ctx_rlimit_v0_x32 vc_data;
d337f35e 18646+
4bf69007
AM
18647+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18648+ return -EFAULT;
d337f35e 18649+
4bf69007
AM
18650+ return do_set_rlimit(vxi, vc_data.id,
18651+ vc_data.minimum, vc_data.softlimit, vc_data.maximum);
18652+}
d337f35e 18653+
4bf69007
AM
18654+int vc_get_rlimit_x32(struct vx_info *vxi, void __user *data)
18655+{
18656+ struct vcmd_ctx_rlimit_v0_x32 vc_data;
18657+ int ret;
d337f35e 18658+
4bf69007
AM
18659+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18660+ return -EFAULT;
d337f35e 18661+
4bf69007
AM
18662+ ret = do_get_rlimit(vxi, vc_data.id,
18663+ &vc_data.minimum, &vc_data.softlimit, &vc_data.maximum);
18664+ if (ret)
18665+ return ret;
2380c486 18666+
4bf69007
AM
18667+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18668+ return -EFAULT;
18669+ return 0;
2380c486 18670+}
d337f35e 18671+
4bf69007 18672+#endif /* CONFIG_IA32_EMULATION */
d337f35e
JR
18673+
18674+
4bf69007
AM
18675+int vc_get_rlimit_mask(uint32_t id, void __user *data)
18676+{
18677+ if (copy_to_user(data, &vlimit_mask, sizeof(vlimit_mask)))
18678+ return -EFAULT;
18679+ return 0;
18680+}
d337f35e
JR
18681+
18682+
4bf69007
AM
18683+static inline void vx_reset_hits(struct _vx_limit *limit)
18684+{
18685+ int lim;
d337f35e 18686+
4bf69007
AM
18687+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18688+ atomic_set(&__rlim_lhit(limit, lim), 0);
18689+ }
18690+}
d337f35e 18691+
4bf69007 18692+int vc_reset_hits(struct vx_info *vxi, void __user *data)
d337f35e 18693+{
4bf69007
AM
18694+ vx_reset_hits(&vxi->limit);
18695+ return 0;
d337f35e
JR
18696+}
18697+
4bf69007 18698+static inline void vx_reset_minmax(struct _vx_limit *limit)
d337f35e 18699+{
4bf69007
AM
18700+ rlim_t value;
18701+ int lim;
18702+
18703+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18704+ value = __rlim_get(limit, lim);
18705+ __rlim_rmax(limit, lim) = value;
18706+ __rlim_rmin(limit, lim) = value;
18707+ }
d337f35e
JR
18708+}
18709+
4bf69007 18710+int vc_reset_minmax(struct vx_info *vxi, void __user *data)
d337f35e 18711+{
4bf69007
AM
18712+ vx_reset_minmax(&vxi->limit);
18713+ return 0;
d337f35e
JR
18714+}
18715+
18716+
4bf69007 18717+int vc_rlimit_stat(struct vx_info *vxi, void __user *data)
d337f35e 18718+{
4bf69007
AM
18719+ struct vcmd_rlimit_stat_v0 vc_data;
18720+ struct _vx_limit *limit = &vxi->limit;
18721+ int id;
d337f35e 18722+
4bf69007
AM
18723+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
18724+ return -EFAULT;
d337f35e 18725+
4bf69007
AM
18726+ id = vc_data.id;
18727+ if (!is_accounted_vlimit(id))
18728+ return -EINVAL;
2380c486 18729+
4bf69007
AM
18730+ vx_limit_fixup(limit, id);
18731+ vc_data.hits = atomic_read(&__rlim_lhit(limit, id));
18732+ vc_data.value = __rlim_get(limit, id);
18733+ vc_data.minimum = __rlim_rmin(limit, id);
18734+ vc_data.maximum = __rlim_rmax(limit, id);
2380c486 18735+
4bf69007
AM
18736+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
18737+ return -EFAULT;
18738+ return 0;
d337f35e
JR
18739+}
18740+
d337f35e 18741+
cc23e853 18742+#ifdef CONFIG_MEMCG
369dbd59
AM
18743+
18744+void dump_sysinfo(struct sysinfo *si)
18745+{
18746+ printk(KERN_INFO "sysinfo: memunit=%u\n"
18747+ "\ttotalram:\t%lu\n"
18748+ "\tfreeram:\t%lu\n"
18749+ "\tsharedram:\t%lu\n"
18750+ "\tbufferram:\t%lu\n"
18751+ "\ttotalswap:\t%lu\n"
18752+ "\tfreeswap:\t%lu\n"
18753+ "\ttotalhigh:\t%lu\n"
18754+ "\tfreehigh:\t%lu\n",
18755+ si->mem_unit,
18756+ si->totalram,
18757+ si->freeram,
18758+ si->sharedram,
18759+ si->bufferram,
18760+ si->totalswap,
18761+ si->freeswap,
18762+ si->totalhigh,
18763+ si->freehigh);
18764+}
18765+
4bf69007 18766+void vx_vsi_meminfo(struct sysinfo *val)
d337f35e 18767+{
4bf69007 18768+ struct mem_cgroup *mcg;
369dbd59
AM
18769+ unsigned long res_limit, res_usage;
18770+ unsigned shift;
18771+
18772+ if (VXD_CBIT(cvirt, 4))
18773+ dump_sysinfo(val);
d337f35e 18774+
4bf69007
AM
18775+ rcu_read_lock();
18776+ mcg = mem_cgroup_from_task(current);
369dbd59
AM
18777+ if (VXD_CBIT(cvirt, 5))
18778+ dump_mem_cgroup(mcg);
4bf69007
AM
18779+ rcu_read_unlock();
18780+ if (!mcg)
18781+ goto out;
d337f35e 18782+
cc23e853
AM
18783+ res_limit = mem_cgroup_mem_limit_pages(mcg);
18784+ res_usage = mem_cgroup_mem_usage_pages(mcg);
369dbd59 18785+ shift = val->mem_unit == 1 ? PAGE_SHIFT : 0;
2380c486 18786+
cc23e853 18787+ if (res_limit != PAGE_COUNTER_MAX)
369dbd59
AM
18788+ val->totalram = res_limit << shift;
18789+ val->freeram = val->totalram - (res_usage << shift);
4bf69007
AM
18790+ val->bufferram = 0;
18791+ val->totalhigh = 0;
18792+ val->freehigh = 0;
18793+out:
4bf69007 18794+ return;
d337f35e
JR
18795+}
18796+
4bf69007 18797+void vx_vsi_swapinfo(struct sysinfo *val)
d337f35e 18798+{
4bf69007
AM
18799+#ifdef CONFIG_MEMCG_SWAP
18800+ struct mem_cgroup *mcg;
369dbd59
AM
18801+ unsigned long res_limit, res_usage, memsw_limit, memsw_usage;
18802+ signed long swap_limit, swap_usage;
18803+ unsigned shift;
18804+
18805+ if (VXD_CBIT(cvirt, 6))
18806+ dump_sysinfo(val);
d337f35e 18807+
4bf69007
AM
18808+ rcu_read_lock();
18809+ mcg = mem_cgroup_from_task(current);
369dbd59
AM
18810+ if (VXD_CBIT(cvirt, 7))
18811+ dump_mem_cgroup(mcg);
4bf69007
AM
18812+ rcu_read_unlock();
18813+ if (!mcg)
18814+ goto out;
d337f35e 18815+
cc23e853 18816+ res_limit = mem_cgroup_mem_limit_pages(mcg);
d337f35e 18817+
4bf69007 18818+ /* memory unlimited */
cc23e853 18819+ if (res_limit == PAGE_COUNTER_MAX)
4bf69007 18820+ goto out;
d337f35e 18821+
369dbd59
AM
18822+ res_usage = mem_cgroup_mem_usage_pages(mcg);
18823+ memsw_limit = mem_cgroup_memsw_limit_pages(mcg);
18824+ memsw_usage = mem_cgroup_memsw_usage_pages(mcg);
18825+ shift = val->mem_unit == 1 ? PAGE_SHIFT : 0;
18826+
4bf69007
AM
18827+ swap_limit = memsw_limit - res_limit;
18828+ /* we have a swap limit? */
cc23e853 18829+ if (memsw_limit != PAGE_COUNTER_MAX)
369dbd59 18830+ val->totalswap = swap_limit << shift;
d337f35e 18831+
4bf69007
AM
18832+ /* calculate swap part */
18833+ swap_usage = (memsw_usage > res_usage) ?
18834+ memsw_usage - res_usage : 0;
18835+
18836+ /* total shown minus usage gives free swap */
18837+ val->freeswap = (swap_usage < swap_limit) ?
369dbd59 18838+ val->totalswap - (swap_usage << shift) : 0;
4bf69007
AM
18839+out:
18840+#else /* !CONFIG_MEMCG_SWAP */
18841+ val->totalswap = 0;
18842+ val->freeswap = 0;
18843+#endif /* !CONFIG_MEMCG_SWAP */
4bf69007 18844+ return;
d337f35e
JR
18845+}
18846+
4bf69007 18847+long vx_vsi_cached(struct sysinfo *val)
d337f35e 18848+{
4bf69007 18849+ long cache = 0;
cc23e853 18850+#ifdef CONFIG_MEMCG_BROKEN
4bf69007 18851+ struct mem_cgroup *mcg;
d337f35e 18852+
369dbd59
AM
18853+ if (VXD_CBIT(cvirt, 8))
18854+ dump_sysinfo(val);
18855+
4bf69007
AM
18856+ rcu_read_lock();
18857+ mcg = mem_cgroup_from_task(current);
369dbd59
AM
18858+ if (VXD_CBIT(cvirt, 9))
18859+ dump_mem_cgroup(mcg);
4bf69007
AM
18860+ rcu_read_unlock();
18861+ if (!mcg)
18862+ goto out;
2380c486 18863+
cc23e853 18864+ // cache = mem_cgroup_stat_read_cache(mcg);
4bf69007 18865+out:
2380c486 18866+#endif
4bf69007 18867+ return cache;
d337f35e 18868+}
cc23e853 18869+#endif /* !CONFIG_MEMCG */
d337f35e 18870+
09a55596
AM
18871diff -NurpP --minimal linux-4.9.135/kernel/vserver/limit_init.h linux-4.9.135-vs2.3.9.8/kernel/vserver/limit_init.h
18872--- linux-4.9.135/kernel/vserver/limit_init.h 1970-01-01 00:00:00.000000000 +0000
18873+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/limit_init.h 2018-10-20 04:58:15.000000000 +0000
4bf69007 18874@@ -0,0 +1,31 @@
d337f35e
JR
18875+
18876+
4bf69007
AM
18877+static inline void vx_info_init_limit(struct _vx_limit *limit)
18878+{
18879+ int lim;
d337f35e 18880+
4bf69007
AM
18881+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18882+ __rlim_soft(limit, lim) = RLIM_INFINITY;
18883+ __rlim_hard(limit, lim) = RLIM_INFINITY;
18884+ __rlim_set(limit, lim, 0);
18885+ atomic_set(&__rlim_lhit(limit, lim), 0);
18886+ __rlim_rmin(limit, lim) = 0;
18887+ __rlim_rmax(limit, lim) = 0;
18888+ }
18889+}
d337f35e 18890+
4bf69007 18891+static inline void vx_info_exit_limit(struct _vx_limit *limit)
d337f35e 18892+{
4bf69007
AM
18893+ rlim_t value;
18894+ int lim;
d337f35e 18895+
4bf69007
AM
18896+ for (lim = 0; lim < NUM_LIMITS; lim++) {
18897+ if ((1 << lim) & VLIM_NOCHECK)
18898+ continue;
18899+ value = __rlim_get(limit, lim);
18900+ vxwprintk_xid(value,
18901+ "!!! limit: %p[%s,%d] = %ld on exit.",
18902+ limit, vlimit_name[lim], lim, (long)value);
18903+ }
18904+}
d337f35e 18905+
09a55596
AM
18906diff -NurpP --minimal linux-4.9.135/kernel/vserver/limit_proc.h linux-4.9.135-vs2.3.9.8/kernel/vserver/limit_proc.h
18907--- linux-4.9.135/kernel/vserver/limit_proc.h 1970-01-01 00:00:00.000000000 +0000
18908+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/limit_proc.h 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
18909@@ -0,0 +1,57 @@
18910+#ifndef _VX_LIMIT_PROC_H
18911+#define _VX_LIMIT_PROC_H
d337f35e 18912+
4bf69007 18913+#include <linux/vserver/limit_int.h>
d337f35e 18914+
d337f35e 18915+
4bf69007
AM
18916+#define VX_LIMIT_FMT ":\t%8ld\t%8ld/%8ld\t%8lld/%8lld\t%6d\n"
18917+#define VX_LIMIT_TOP \
18918+ "Limit\t current\t min/max\t\t soft/hard\t\thits\n"
d337f35e 18919+
4bf69007
AM
18920+#define VX_LIMIT_ARG(r) \
18921+ (unsigned long)__rlim_get(limit, r), \
18922+ (unsigned long)__rlim_rmin(limit, r), \
18923+ (unsigned long)__rlim_rmax(limit, r), \
18924+ VX_VLIM(__rlim_soft(limit, r)), \
18925+ VX_VLIM(__rlim_hard(limit, r)), \
18926+ atomic_read(&__rlim_lhit(limit, r))
d337f35e 18927+
4bf69007
AM
18928+static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
18929+{
18930+ vx_limit_fixup(limit, -1);
18931+ return sprintf(buffer, VX_LIMIT_TOP
18932+ "PROC" VX_LIMIT_FMT
18933+ "VM" VX_LIMIT_FMT
18934+ "VML" VX_LIMIT_FMT
18935+ "RSS" VX_LIMIT_FMT
18936+ "ANON" VX_LIMIT_FMT
18937+ "RMAP" VX_LIMIT_FMT
18938+ "FILES" VX_LIMIT_FMT
18939+ "OFD" VX_LIMIT_FMT
18940+ "LOCKS" VX_LIMIT_FMT
18941+ "SOCK" VX_LIMIT_FMT
18942+ "MSGQ" VX_LIMIT_FMT
18943+ "SHM" VX_LIMIT_FMT
18944+ "SEMA" VX_LIMIT_FMT
18945+ "SEMS" VX_LIMIT_FMT
18946+ "DENT" VX_LIMIT_FMT,
18947+ VX_LIMIT_ARG(RLIMIT_NPROC),
18948+ VX_LIMIT_ARG(RLIMIT_AS),
18949+ VX_LIMIT_ARG(RLIMIT_MEMLOCK),
18950+ VX_LIMIT_ARG(RLIMIT_RSS),
18951+ VX_LIMIT_ARG(VLIMIT_ANON),
18952+ VX_LIMIT_ARG(VLIMIT_MAPPED),
18953+ VX_LIMIT_ARG(RLIMIT_NOFILE),
18954+ VX_LIMIT_ARG(VLIMIT_OPENFD),
18955+ VX_LIMIT_ARG(RLIMIT_LOCKS),
18956+ VX_LIMIT_ARG(VLIMIT_NSOCK),
18957+ VX_LIMIT_ARG(RLIMIT_MSGQUEUE),
18958+ VX_LIMIT_ARG(VLIMIT_SHMEM),
18959+ VX_LIMIT_ARG(VLIMIT_SEMARY),
18960+ VX_LIMIT_ARG(VLIMIT_NSEMS),
18961+ VX_LIMIT_ARG(VLIMIT_DENTRY));
d337f35e
JR
18962+}
18963+
4bf69007 18964+#endif /* _VX_LIMIT_PROC_H */
d337f35e 18965+
d337f35e 18966+
09a55596
AM
18967diff -NurpP --minimal linux-4.9.135/kernel/vserver/network.c linux-4.9.135-vs2.3.9.8/kernel/vserver/network.c
18968--- linux-4.9.135/kernel/vserver/network.c 1970-01-01 00:00:00.000000000 +0000
18969+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/network.c 2018-10-20 04:58:15.000000000 +0000
5cb1760b 18970@@ -0,0 +1,1053 @@
d337f35e 18971+/*
4bf69007 18972+ * linux/kernel/vserver/network.c
d337f35e 18973+ *
4bf69007
AM
18974+ * Virtual Server: Network Support
18975+ *
cc23e853 18976+ * Copyright (C) 2003-2007 Herbert P?tzl
4bf69007
AM
18977+ *
18978+ * V0.01 broken out from vcontext V0.05
18979+ * V0.02 cleaned up implementation
18980+ * V0.03 added equiv nx commands
18981+ * V0.04 switch to RCU based hash
18982+ * V0.05 and back to locking again
18983+ * V0.06 changed vcmds to nxi arg
18984+ * V0.07 have __create claim() the nxi
d337f35e 18985+ *
d337f35e 18986+ */
d337f35e 18987+
4bf69007
AM
18988+#include <linux/err.h>
18989+#include <linux/slab.h>
18990+#include <linux/rcupdate.h>
18991+#include <net/ipv6.h>
d337f35e 18992+
4bf69007
AM
18993+#include <linux/vs_network.h>
18994+#include <linux/vs_pid.h>
18995+#include <linux/vserver/network_cmd.h>
d337f35e
JR
18996+
18997+
4bf69007
AM
18998+atomic_t nx_global_ctotal = ATOMIC_INIT(0);
18999+atomic_t nx_global_cactive = ATOMIC_INIT(0);
d337f35e 19000+
4bf69007
AM
19001+static struct kmem_cache *nx_addr_v4_cachep = NULL;
19002+static struct kmem_cache *nx_addr_v6_cachep = NULL;
d337f35e 19003+
d337f35e 19004+
4bf69007 19005+static int __init init_network(void)
d337f35e 19006+{
4bf69007
AM
19007+ nx_addr_v4_cachep = kmem_cache_create("nx_v4_addr_cache",
19008+ sizeof(struct nx_addr_v4), 0,
19009+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
19010+ nx_addr_v6_cachep = kmem_cache_create("nx_v6_addr_cache",
19011+ sizeof(struct nx_addr_v6), 0,
19012+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
d337f35e
JR
19013+ return 0;
19014+}
19015+
19016+
4bf69007 19017+/* __alloc_nx_addr_v4() */
d337f35e 19018+
4bf69007 19019+static inline struct nx_addr_v4 *__alloc_nx_addr_v4(void)
d337f35e 19020+{
4bf69007
AM
19021+ struct nx_addr_v4 *nxa = kmem_cache_alloc(
19022+ nx_addr_v4_cachep, GFP_KERNEL);
92598135 19023+
4bf69007
AM
19024+ if (!IS_ERR(nxa))
19025+ memset(nxa, 0, sizeof(*nxa));
19026+ return nxa;
d337f35e
JR
19027+}
19028+
4bf69007 19029+/* __dealloc_nx_addr_v4() */
d337f35e 19030+
4bf69007
AM
19031+static inline void __dealloc_nx_addr_v4(struct nx_addr_v4 *nxa)
19032+{
19033+ kmem_cache_free(nx_addr_v4_cachep, nxa);
19034+}
d337f35e 19035+
4bf69007 19036+/* __dealloc_nx_addr_v4_all() */
d337f35e 19037+
4bf69007 19038+static inline void __dealloc_nx_addr_v4_all(struct nx_addr_v4 *nxa)
d337f35e 19039+{
4bf69007
AM
19040+ while (nxa) {
19041+ struct nx_addr_v4 *next = nxa->next;
d337f35e 19042+
4bf69007
AM
19043+ __dealloc_nx_addr_v4(nxa);
19044+ nxa = next;
19045+ }
19046+}
d337f35e 19047+
d337f35e 19048+
4bf69007 19049+#ifdef CONFIG_IPV6
d337f35e 19050+
4bf69007 19051+/* __alloc_nx_addr_v6() */
d337f35e 19052+
4bf69007
AM
19053+static inline struct nx_addr_v6 *__alloc_nx_addr_v6(void)
19054+{
19055+ struct nx_addr_v6 *nxa = kmem_cache_alloc(
19056+ nx_addr_v6_cachep, GFP_KERNEL);
d337f35e 19057+
4bf69007
AM
19058+ if (!IS_ERR(nxa))
19059+ memset(nxa, 0, sizeof(*nxa));
19060+ return nxa;
d337f35e
JR
19061+}
19062+
4bf69007
AM
19063+/* __dealloc_nx_addr_v6() */
19064+
19065+static inline void __dealloc_nx_addr_v6(struct nx_addr_v6 *nxa)
d337f35e 19066+{
4bf69007
AM
19067+ kmem_cache_free(nx_addr_v6_cachep, nxa);
19068+}
d337f35e 19069+
4bf69007 19070+/* __dealloc_nx_addr_v6_all() */
d337f35e 19071+
4bf69007
AM
19072+static inline void __dealloc_nx_addr_v6_all(struct nx_addr_v6 *nxa)
19073+{
19074+ while (nxa) {
19075+ struct nx_addr_v6 *next = nxa->next;
d337f35e 19076+
4bf69007
AM
19077+ __dealloc_nx_addr_v6(nxa);
19078+ nxa = next;
19079+ }
19080+}
d337f35e 19081+
4bf69007 19082+#endif /* CONFIG_IPV6 */
d337f35e 19083+
4bf69007 19084+/* __alloc_nx_info()
d337f35e 19085+
4bf69007
AM
19086+ * allocate an initialized nx_info struct
19087+ * doesn't make it visible (hash) */
d337f35e 19088+
61333608 19089+static struct nx_info *__alloc_nx_info(vnid_t nid)
d337f35e 19090+{
4bf69007 19091+ struct nx_info *new = NULL;
d337f35e 19092+
4bf69007 19093+ vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
d337f35e 19094+
4bf69007
AM
19095+ /* would this benefit from a slab cache? */
19096+ new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
19097+ if (!new)
19098+ return 0;
d337f35e 19099+
4bf69007
AM
19100+ memset(new, 0, sizeof(struct nx_info));
19101+ new->nx_id = nid;
19102+ INIT_HLIST_NODE(&new->nx_hlist);
19103+ atomic_set(&new->nx_usecnt, 0);
19104+ atomic_set(&new->nx_tasks, 0);
19105+ spin_lock_init(&new->addr_lock);
19106+ new->nx_state = 0;
d337f35e 19107+
4bf69007 19108+ new->nx_flags = NXF_INIT_SET;
d337f35e 19109+
4bf69007 19110+ /* rest of init goes here */
d337f35e 19111+
4bf69007
AM
19112+ new->v4_lback.s_addr = htonl(INADDR_LOOPBACK);
19113+ new->v4_bcast.s_addr = htonl(INADDR_BROADCAST);
19114+
19115+ vxdprintk(VXD_CBIT(nid, 0),
19116+ "alloc_nx_info(%d) = %p", nid, new);
19117+ atomic_inc(&nx_global_ctotal);
19118+ return new;
d337f35e
JR
19119+}
19120+
4bf69007 19121+/* __dealloc_nx_info()
d337f35e 19122+
4bf69007 19123+ * final disposal of nx_info */
d337f35e 19124+
4bf69007
AM
19125+static void __dealloc_nx_info(struct nx_info *nxi)
19126+{
19127+ vxdprintk(VXD_CBIT(nid, 0),
19128+ "dealloc_nx_info(%p)", nxi);
d337f35e 19129+
4bf69007
AM
19130+ nxi->nx_hlist.next = LIST_POISON1;
19131+ nxi->nx_id = -1;
d337f35e 19132+
4bf69007
AM
19133+ BUG_ON(atomic_read(&nxi->nx_usecnt));
19134+ BUG_ON(atomic_read(&nxi->nx_tasks));
19135+
19136+ __dealloc_nx_addr_v4_all(nxi->v4.next);
19137+#ifdef CONFIG_IPV6
19138+ __dealloc_nx_addr_v6_all(nxi->v6.next);
19139+#endif
19140+
19141+ nxi->nx_state |= NXS_RELEASED;
19142+ kfree(nxi);
19143+ atomic_dec(&nx_global_ctotal);
d337f35e
JR
19144+}
19145+
4bf69007
AM
19146+static void __shutdown_nx_info(struct nx_info *nxi)
19147+{
19148+ nxi->nx_state |= NXS_SHUTDOWN;
19149+ vs_net_change(nxi, VSC_NETDOWN);
19150+}
d337f35e 19151+
4bf69007 19152+/* exported stuff */
d337f35e 19153+
4bf69007
AM
19154+void free_nx_info(struct nx_info *nxi)
19155+{
19156+ /* context shutdown is mandatory */
19157+ BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
d337f35e 19158+
4bf69007
AM
19159+ /* context must not be hashed */
19160+ BUG_ON(nxi->nx_state & NXS_HASHED);
d337f35e 19161+
4bf69007
AM
19162+ BUG_ON(atomic_read(&nxi->nx_usecnt));
19163+ BUG_ON(atomic_read(&nxi->nx_tasks));
d337f35e 19164+
4bf69007
AM
19165+ __dealloc_nx_info(nxi);
19166+}
d337f35e 19167+
d337f35e 19168+
4bf69007
AM
19169+void __nx_set_lback(struct nx_info *nxi)
19170+{
19171+ int nid = nxi->nx_id;
19172+ __be32 lback = htonl(INADDR_LOOPBACK ^ ((nid & 0xFFFF) << 8));
d337f35e 19173+
4bf69007
AM
19174+ nxi->v4_lback.s_addr = lback;
19175+}
d337f35e 19176+
4bf69007
AM
19177+extern int __nx_inet_add_lback(__be32 addr);
19178+extern int __nx_inet_del_lback(__be32 addr);
d337f35e
JR
19179+
19180+
4bf69007 19181+/* hash table for nx_info hash */
d337f35e 19182+
4bf69007 19183+#define NX_HASH_SIZE 13
d337f35e 19184+
4bf69007
AM
19185+struct hlist_head nx_info_hash[NX_HASH_SIZE];
19186+
19187+static DEFINE_SPINLOCK(nx_info_hash_lock);
19188+
19189+
61333608 19190+static inline unsigned int __hashval(vnid_t nid)
d337f35e 19191+{
4bf69007 19192+ return (nid % NX_HASH_SIZE);
d337f35e
JR
19193+}
19194+
d337f35e 19195+
d337f35e 19196+
4bf69007 19197+/* __hash_nx_info()
d337f35e 19198+
4bf69007
AM
19199+ * add the nxi to the global hash table
19200+ * requires the hash_lock to be held */
19201+
19202+static inline void __hash_nx_info(struct nx_info *nxi)
d337f35e 19203+{
4bf69007 19204+ struct hlist_head *head;
d337f35e 19205+
4bf69007
AM
19206+ vxd_assert_lock(&nx_info_hash_lock);
19207+ vxdprintk(VXD_CBIT(nid, 4),
19208+ "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
d337f35e 19209+
4bf69007
AM
19210+ /* context must not be hashed */
19211+ BUG_ON(nx_info_state(nxi, NXS_HASHED));
d337f35e 19212+
4bf69007
AM
19213+ nxi->nx_state |= NXS_HASHED;
19214+ head = &nx_info_hash[__hashval(nxi->nx_id)];
19215+ hlist_add_head(&nxi->nx_hlist, head);
19216+ atomic_inc(&nx_global_cactive);
19217+}
d337f35e 19218+
4bf69007 19219+/* __unhash_nx_info()
d337f35e 19220+
4bf69007
AM
19221+ * remove the nxi from the global hash table
19222+ * requires the hash_lock to be held */
d337f35e 19223+
4bf69007
AM
19224+static inline void __unhash_nx_info(struct nx_info *nxi)
19225+{
19226+ vxd_assert_lock(&nx_info_hash_lock);
19227+ vxdprintk(VXD_CBIT(nid, 4),
19228+ "__unhash_nx_info: %p[#%d.%d.%d]", nxi, nxi->nx_id,
19229+ atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_tasks));
d337f35e 19230+
4bf69007
AM
19231+ /* context must be hashed */
19232+ BUG_ON(!nx_info_state(nxi, NXS_HASHED));
19233+ /* but without tasks */
19234+ BUG_ON(atomic_read(&nxi->nx_tasks));
d337f35e 19235+
4bf69007
AM
19236+ nxi->nx_state &= ~NXS_HASHED;
19237+ hlist_del(&nxi->nx_hlist);
19238+ atomic_dec(&nx_global_cactive);
d337f35e
JR
19239+}
19240+
d337f35e 19241+
4bf69007 19242+/* __lookup_nx_info()
d337f35e 19243+
4bf69007
AM
19244+ * requires the hash_lock to be held
19245+ * doesn't increment the nx_refcnt */
d337f35e 19246+
61333608 19247+static inline struct nx_info *__lookup_nx_info(vnid_t nid)
d337f35e 19248+{
4bf69007
AM
19249+ struct hlist_head *head = &nx_info_hash[__hashval(nid)];
19250+ struct hlist_node *pos;
19251+ struct nx_info *nxi;
d337f35e 19252+
4bf69007
AM
19253+ vxd_assert_lock(&nx_info_hash_lock);
19254+ hlist_for_each(pos, head) {
19255+ nxi = hlist_entry(pos, struct nx_info, nx_hlist);
19256+
19257+ if (nxi->nx_id == nid)
19258+ goto found;
d337f35e 19259+ }
4bf69007
AM
19260+ nxi = NULL;
19261+found:
19262+ vxdprintk(VXD_CBIT(nid, 0),
19263+ "__lookup_nx_info(#%u): %p[#%u]",
19264+ nid, nxi, nxi ? nxi->nx_id : 0);
19265+ return nxi;
d337f35e
JR
19266+}
19267+
19268+
4bf69007 19269+/* __create_nx_info()
d337f35e 19270+
4bf69007
AM
19271+ * create the requested context
19272+ * get(), claim() and hash it */
d337f35e 19273+
4bf69007
AM
19274+static struct nx_info *__create_nx_info(int id)
19275+{
19276+ struct nx_info *new, *nxi = NULL;
d337f35e 19277+
4bf69007 19278+ vxdprintk(VXD_CBIT(nid, 1), "create_nx_info(%d)*", id);
d337f35e 19279+
4bf69007
AM
19280+ if (!(new = __alloc_nx_info(id)))
19281+ return ERR_PTR(-ENOMEM);
d337f35e 19282+
4bf69007
AM
19283+ /* required to make dynamic xids unique */
19284+ spin_lock(&nx_info_hash_lock);
d337f35e 19285+
4bf69007
AM
19286+ /* static context requested */
19287+ if ((nxi = __lookup_nx_info(id))) {
19288+ vxdprintk(VXD_CBIT(nid, 0),
19289+ "create_nx_info(%d) = %p (already there)", id, nxi);
19290+ if (nx_info_flags(nxi, NXF_STATE_SETUP, 0))
19291+ nxi = ERR_PTR(-EBUSY);
19292+ else
19293+ nxi = ERR_PTR(-EEXIST);
19294+ goto out_unlock;
19295+ }
19296+ /* new context */
19297+ vxdprintk(VXD_CBIT(nid, 0),
19298+ "create_nx_info(%d) = %p (new)", id, new);
19299+ claim_nx_info(new, NULL);
19300+ __nx_set_lback(new);
19301+ __hash_nx_info(get_nx_info(new));
19302+ nxi = new, new = NULL;
d337f35e 19303+
4bf69007
AM
19304+out_unlock:
19305+ spin_unlock(&nx_info_hash_lock);
19306+ if (new)
19307+ __dealloc_nx_info(new);
19308+ return nxi;
19309+}
d337f35e
JR
19310+
19311+
d337f35e 19312+
4bf69007 19313+/* exported stuff */
d337f35e 19314+
d337f35e 19315+
4bf69007
AM
19316+void unhash_nx_info(struct nx_info *nxi)
19317+{
19318+ __shutdown_nx_info(nxi);
19319+ spin_lock(&nx_info_hash_lock);
19320+ __unhash_nx_info(nxi);
19321+ spin_unlock(&nx_info_hash_lock);
d337f35e
JR
19322+}
19323+
4bf69007 19324+/* lookup_nx_info()
d337f35e 19325+
4bf69007
AM
19326+ * search for a nx_info and get() it
19327+ * negative id means current */
d337f35e 19328+
4bf69007 19329+struct nx_info *lookup_nx_info(int id)
d337f35e 19330+{
4bf69007 19331+ struct nx_info *nxi = NULL;
d337f35e 19332+
4bf69007
AM
19333+ if (id < 0) {
19334+ nxi = get_nx_info(current_nx_info());
19335+ } else if (id > 1) {
19336+ spin_lock(&nx_info_hash_lock);
19337+ nxi = get_nx_info(__lookup_nx_info(id));
19338+ spin_unlock(&nx_info_hash_lock);
d337f35e 19339+ }
4bf69007
AM
19340+ return nxi;
19341+}
d337f35e 19342+
4bf69007 19343+/* nid_is_hashed()
d337f35e 19344+
4bf69007
AM
19345+ * verify that nid is still hashed */
19346+
61333608 19347+int nid_is_hashed(vnid_t nid)
4bf69007
AM
19348+{
19349+ int hashed;
19350+
19351+ spin_lock(&nx_info_hash_lock);
19352+ hashed = (__lookup_nx_info(nid) != NULL);
19353+ spin_unlock(&nx_info_hash_lock);
19354+ return hashed;
d337f35e
JR
19355+}
19356+
19357+
4bf69007 19358+#ifdef CONFIG_PROC_FS
d337f35e 19359+
4bf69007
AM
19360+/* get_nid_list()
19361+
19362+ * get a subset of hashed nids for proc
19363+ * assumes size is at least one */
19364+
19365+int get_nid_list(int index, unsigned int *nids, int size)
d337f35e 19366+{
4bf69007 19367+ int hindex, nr_nids = 0;
d337f35e 19368+
4bf69007
AM
19369+ /* only show current and children */
19370+ if (!nx_check(0, VS_ADMIN | VS_WATCH)) {
19371+ if (index > 0)
19372+ return 0;
19373+ nids[nr_nids] = nx_current_nid();
19374+ return 1;
19375+ }
d337f35e 19376+
4bf69007
AM
19377+ for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
19378+ struct hlist_head *head = &nx_info_hash[hindex];
19379+ struct hlist_node *pos;
d337f35e 19380+
4bf69007
AM
19381+ spin_lock(&nx_info_hash_lock);
19382+ hlist_for_each(pos, head) {
19383+ struct nx_info *nxi;
19384+
19385+ if (--index > 0)
19386+ continue;
19387+
19388+ nxi = hlist_entry(pos, struct nx_info, nx_hlist);
19389+ nids[nr_nids] = nxi->nx_id;
19390+ if (++nr_nids >= size) {
19391+ spin_unlock(&nx_info_hash_lock);
d337f35e 19392+ goto out;
4bf69007 19393+ }
d337f35e 19394+ }
4bf69007
AM
19395+ /* keep the lock time short */
19396+ spin_unlock(&nx_info_hash_lock);
d337f35e
JR
19397+ }
19398+out:
4bf69007 19399+ return nr_nids;
d337f35e 19400+}
4bf69007 19401+#endif
d337f35e 19402+
4bf69007
AM
19403+
19404+/*
19405+ * migrate task to new network
19406+ * gets nxi, puts old_nxi on change
19407+ */
19408+
19409+int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
2380c486 19410+{
4bf69007
AM
19411+ struct nx_info *old_nxi;
19412+ int ret = 0;
2380c486 19413+
4bf69007
AM
19414+ if (!p || !nxi)
19415+ BUG();
d337f35e 19416+
4bf69007
AM
19417+ vxdprintk(VXD_CBIT(nid, 5),
19418+ "nx_migrate_task(%p,%p[#%d.%d.%d])",
19419+ p, nxi, nxi->nx_id,
19420+ atomic_read(&nxi->nx_usecnt),
19421+ atomic_read(&nxi->nx_tasks));
d337f35e 19422+
4bf69007
AM
19423+ if (nx_info_flags(nxi, NXF_INFO_PRIVATE, 0) &&
19424+ !nx_info_flags(nxi, NXF_STATE_SETUP, 0))
19425+ return -EACCES;
d337f35e 19426+
4bf69007
AM
19427+ if (nx_info_state(nxi, NXS_SHUTDOWN))
19428+ return -EFAULT;
d337f35e 19429+
4bf69007
AM
19430+ /* maybe disallow this completely? */
19431+ old_nxi = task_get_nx_info(p);
19432+ if (old_nxi == nxi)
19433+ goto out;
d337f35e 19434+
4bf69007
AM
19435+ task_lock(p);
19436+ if (old_nxi)
19437+ clr_nx_info(&p->nx_info);
19438+ claim_nx_info(nxi, p);
19439+ set_nx_info(&p->nx_info, nxi);
19440+ p->nid = nxi->nx_id;
19441+ task_unlock(p);
d337f35e 19442+
4bf69007
AM
19443+ vxdprintk(VXD_CBIT(nid, 5),
19444+ "moved task %p into nxi:%p[#%d]",
19445+ p, nxi, nxi->nx_id);
d337f35e 19446+
4bf69007
AM
19447+ if (old_nxi)
19448+ release_nx_info(old_nxi, p);
19449+ ret = 0;
19450+out:
19451+ put_nx_info(old_nxi);
19452+ return ret;
19453+}
d337f35e 19454+
d337f35e 19455+
4bf69007
AM
19456+void nx_set_persistent(struct nx_info *nxi)
19457+{
19458+ vxdprintk(VXD_CBIT(nid, 6),
19459+ "nx_set_persistent(%p[#%d])", nxi, nxi->nx_id);
d337f35e 19460+
4bf69007
AM
19461+ get_nx_info(nxi);
19462+ claim_nx_info(nxi, NULL);
d337f35e
JR
19463+}
19464+
4bf69007 19465+void nx_clear_persistent(struct nx_info *nxi)
2380c486 19466+{
4bf69007
AM
19467+ vxdprintk(VXD_CBIT(nid, 6),
19468+ "nx_clear_persistent(%p[#%d])", nxi, nxi->nx_id);
2380c486 19469+
4bf69007
AM
19470+ release_nx_info(nxi, NULL);
19471+ put_nx_info(nxi);
2380c486 19472+}
d337f35e 19473+
4bf69007
AM
19474+void nx_update_persistent(struct nx_info *nxi)
19475+{
19476+ if (nx_info_flags(nxi, NXF_PERSISTENT, 0))
19477+ nx_set_persistent(nxi);
19478+ else
19479+ nx_clear_persistent(nxi);
19480+}
d337f35e 19481+
4bf69007
AM
19482+/* vserver syscall commands below here */
19483+
19484+/* taks nid and nx_info functions */
d337f35e 19485+
4bf69007 19486+#include <asm/uaccess.h>
d337f35e
JR
19487+
19488+
4bf69007 19489+int vc_task_nid(uint32_t id)
d337f35e 19490+{
61333608 19491+ vnid_t nid;
d337f35e 19492+
4bf69007
AM
19493+ if (id) {
19494+ struct task_struct *tsk;
d337f35e 19495+
4bf69007
AM
19496+ rcu_read_lock();
19497+ tsk = find_task_by_real_pid(id);
19498+ nid = (tsk) ? tsk->nid : -ESRCH;
19499+ rcu_read_unlock();
19500+ } else
19501+ nid = nx_current_nid();
19502+ return nid;
d337f35e
JR
19503+}
19504+
19505+
4bf69007
AM
19506+int vc_nx_info(struct nx_info *nxi, void __user *data)
19507+{
19508+ struct vcmd_nx_info_v0 vc_data;
d337f35e 19509+
4bf69007 19510+ vc_data.nid = nxi->nx_id;
d337f35e 19511+
4bf69007
AM
19512+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19513+ return -EFAULT;
19514+ return 0;
19515+}
d337f35e 19516+
d337f35e 19517+
4bf69007 19518+/* network functions */
d337f35e 19519+
4bf69007
AM
19520+int vc_net_create(uint32_t nid, void __user *data)
19521+{
19522+ struct vcmd_net_create vc_data = { .flagword = NXF_INIT_SET };
19523+ struct nx_info *new_nxi;
19524+ int ret;
d337f35e 19525+
4bf69007
AM
19526+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19527+ return -EFAULT;
d337f35e 19528+
4bf69007
AM
19529+ if ((nid > MAX_S_CONTEXT) || (nid < 2))
19530+ return -EINVAL;
d337f35e 19531+
4bf69007
AM
19532+ new_nxi = __create_nx_info(nid);
19533+ if (IS_ERR(new_nxi))
19534+ return PTR_ERR(new_nxi);
d337f35e 19535+
4bf69007
AM
19536+ /* initial flags */
19537+ new_nxi->nx_flags = vc_data.flagword;
d337f35e 19538+
4bf69007
AM
19539+ ret = -ENOEXEC;
19540+ if (vs_net_change(new_nxi, VSC_NETUP))
19541+ goto out;
d337f35e 19542+
4bf69007
AM
19543+ ret = nx_migrate_task(current, new_nxi);
19544+ if (ret)
d337f35e
JR
19545+ goto out;
19546+
4bf69007
AM
19547+ /* return context id on success */
19548+ ret = new_nxi->nx_id;
d337f35e 19549+
4bf69007
AM
19550+ /* get a reference for persistent contexts */
19551+ if ((vc_data.flagword & NXF_PERSISTENT))
19552+ nx_set_persistent(new_nxi);
d337f35e 19553+out:
4bf69007
AM
19554+ release_nx_info(new_nxi, NULL);
19555+ put_nx_info(new_nxi);
19556+ return ret;
d337f35e
JR
19557+}
19558+
d337f35e 19559+
4bf69007
AM
19560+int vc_net_migrate(struct nx_info *nxi, void __user *data)
19561+{
19562+ return nx_migrate_task(current, nxi);
19563+}
d337f35e 19564+
2380c486 19565+
4bf69007
AM
19566+static inline
19567+struct nx_addr_v4 *__find_v4_addr(struct nx_info *nxi,
19568+ __be32 ip, __be32 ip2, __be32 mask, uint16_t type, uint16_t flags,
19569+ struct nx_addr_v4 **prev)
d337f35e 19570+{
4bf69007
AM
19571+ struct nx_addr_v4 *nxa = &nxi->v4;
19572+
19573+ for (; nxa; nxa = nxa->next) {
19574+ if ((nxa->ip[0].s_addr == ip) &&
19575+ (nxa->ip[1].s_addr == ip2) &&
19576+ (nxa->mask.s_addr == mask) &&
19577+ (nxa->type == type) &&
19578+ (nxa->flags == flags))
19579+ return nxa;
19580+
19581+ /* save previous entry */
19582+ if (prev)
19583+ *prev = nxa;
19584+ }
19585+ return NULL;
2380c486
JR
19586+}
19587+
4bf69007
AM
19588+int do_add_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
19589+ uint16_t type, uint16_t flags)
d337f35e 19590+{
4bf69007
AM
19591+ struct nx_addr_v4 *nxa = NULL;
19592+ struct nx_addr_v4 *new = __alloc_nx_addr_v4();
5cb1760b 19593+ unsigned long irqflags;
4bf69007 19594+ int ret = -EEXIST;
d337f35e 19595+
4bf69007
AM
19596+ if (IS_ERR(new))
19597+ return PTR_ERR(new);
d337f35e 19598+
5cb1760b 19599+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19600+ if (__find_v4_addr(nxi, ip, ip2, mask, type, flags, &nxa))
19601+ goto out_unlock;
2380c486 19602+
4bf69007
AM
19603+ if (NX_IPV4(nxi)) {
19604+ nxa->next = new;
19605+ nxa = new;
19606+ new = NULL;
19607+
19608+ /* remove single ip for ip list */
19609+ nxi->nx_flags &= ~NXF_SINGLE_IP;
19610+ }
19611+
19612+ nxa->ip[0].s_addr = ip;
19613+ nxa->ip[1].s_addr = ip2;
19614+ nxa->mask.s_addr = mask;
19615+ nxa->type = type;
19616+ nxa->flags = flags;
19617+ ret = 0;
19618+out_unlock:
5cb1760b 19619+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19620+ if (new)
19621+ __dealloc_nx_addr_v4(new);
19622+ return ret;
d337f35e
JR
19623+}
19624+
4bf69007
AM
19625+int do_remove_v4_addr(struct nx_info *nxi, __be32 ip, __be32 ip2, __be32 mask,
19626+ uint16_t type, uint16_t flags)
2380c486 19627+{
4bf69007
AM
19628+ struct nx_addr_v4 *nxa = NULL;
19629+ struct nx_addr_v4 *old = NULL;
5cb1760b 19630+ unsigned long irqflags;
4bf69007 19631+ int ret = 0;
2380c486 19632+
5cb1760b 19633+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19634+ switch (type) {
19635+ case NXA_TYPE_ADDR:
19636+ old = __find_v4_addr(nxi, ip, ip2, mask, type, flags, &nxa);
19637+ if (old) {
19638+ if (nxa) {
19639+ nxa->next = old->next;
19640+ old->next = NULL;
19641+ } else {
19642+ if (old->next) {
19643+ nxa = old;
19644+ old = old->next;
19645+ *nxa = *old;
19646+ old->next = NULL;
19647+ } else {
19648+ memset(old, 0, sizeof(*old));
19649+ old = NULL;
19650+ }
19651+ }
19652+ } else
19653+ ret = -ESRCH;
19654+ break;
2380c486 19655+
4bf69007
AM
19656+ case NXA_TYPE_ANY:
19657+ nxa = &nxi->v4;
19658+ old = nxa->next;
19659+ memset(nxa, 0, sizeof(*nxa));
19660+ break;
19661+
19662+ default:
19663+ ret = -EINVAL;
19664+ }
5cb1760b 19665+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19666+ __dealloc_nx_addr_v4_all(old);
19667+ return ret;
2380c486
JR
19668+}
19669+
4bf69007
AM
19670+
19671+int vc_net_add(struct nx_info *nxi, void __user *data)
2380c486 19672+{
4bf69007
AM
19673+ struct vcmd_net_addr_v0 vc_data;
19674+ int index, ret = 0;
2380c486 19675+
4bf69007 19676+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486
JR
19677+ return -EFAULT;
19678+
4bf69007
AM
19679+ switch (vc_data.type) {
19680+ case NXA_TYPE_IPV4:
19681+ if ((vc_data.count < 1) || (vc_data.count > 4))
19682+ return -EINVAL;
adc1caaa 19683+
4bf69007
AM
19684+ index = 0;
19685+ while (index < vc_data.count) {
19686+ ret = do_add_v4_addr(nxi, vc_data.ip[index].s_addr, 0,
19687+ vc_data.mask[index].s_addr, NXA_TYPE_ADDR, 0);
19688+ if (ret)
19689+ return ret;
19690+ index++;
19691+ }
19692+ ret = index;
19693+ break;
2380c486 19694+
4bf69007
AM
19695+ case NXA_TYPE_IPV4|NXA_MOD_BCAST:
19696+ nxi->v4_bcast = vc_data.ip[0];
19697+ ret = 1;
19698+ break;
2380c486 19699+
4bf69007
AM
19700+ case NXA_TYPE_IPV4|NXA_MOD_LBACK:
19701+ nxi->v4_lback = vc_data.ip[0];
19702+ ret = 1;
19703+ break;
19704+
19705+ default:
19706+ ret = -EINVAL;
19707+ break;
19708+ }
19709+ return ret;
19710+}
19711+
19712+int vc_net_remove(struct nx_info *nxi, void __user *data)
19713+{
19714+ struct vcmd_net_addr_v0 vc_data;
19715+
19716+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
2380c486 19717+ return -EFAULT;
4bf69007
AM
19718+
19719+ switch (vc_data.type) {
19720+ case NXA_TYPE_ANY:
19721+ return do_remove_v4_addr(nxi, 0, 0, 0, vc_data.type, 0);
19722+ default:
19723+ return -EINVAL;
19724+ }
2380c486
JR
19725+ return 0;
19726+}
19727+
d337f35e 19728+
4bf69007 19729+int vc_net_add_ipv4_v1(struct nx_info *nxi, void __user *data)
d337f35e 19730+{
4bf69007
AM
19731+ struct vcmd_net_addr_ipv4_v1 vc_data;
19732+
19733+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19734+ return -EFAULT;
19735+
19736+ switch (vc_data.type) {
19737+ case NXA_TYPE_ADDR:
19738+ case NXA_TYPE_MASK:
19739+ return do_add_v4_addr(nxi, vc_data.ip.s_addr, 0,
19740+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19741+
19742+ case NXA_TYPE_ADDR | NXA_MOD_BCAST:
19743+ nxi->v4_bcast = vc_data.ip;
19744+ break;
19745+
19746+ case NXA_TYPE_ADDR | NXA_MOD_LBACK:
19747+ nxi->v4_lback = vc_data.ip;
19748+ break;
19749+
19750+ default:
19751+ return -EINVAL;
19752+ }
19753+ return 0;
d337f35e
JR
19754+}
19755+
4bf69007 19756+int vc_net_add_ipv4(struct nx_info *nxi, void __user *data)
d337f35e 19757+{
4bf69007 19758+ struct vcmd_net_addr_ipv4_v2 vc_data;
d337f35e 19759+
4bf69007
AM
19760+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19761+ return -EFAULT;
19762+
19763+ switch (vc_data.type) {
19764+ case NXA_TYPE_ADDR:
19765+ case NXA_TYPE_MASK:
19766+ case NXA_TYPE_RANGE:
19767+ return do_add_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
19768+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
19769+
19770+ case NXA_TYPE_ADDR | NXA_MOD_BCAST:
19771+ nxi->v4_bcast = vc_data.ip;
19772+ break;
19773+
19774+ case NXA_TYPE_ADDR | NXA_MOD_LBACK:
19775+ nxi->v4_lback = vc_data.ip;
19776+ break;
19777+
19778+ default:
19779+ return -EINVAL;
19780+ }
19781+ return 0;
d337f35e
JR
19782+}
19783+
4bf69007 19784+int vc_net_rem_ipv4_v1(struct nx_info *nxi, void __user *data)
d337f35e 19785+{
4bf69007
AM
19786+ struct vcmd_net_addr_ipv4_v1 vc_data;
19787+
19788+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19789+ return -EFAULT;
19790+
19791+ return do_remove_v4_addr(nxi, vc_data.ip.s_addr, 0,
19792+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
d337f35e
JR
19793+}
19794+
4bf69007 19795+int vc_net_rem_ipv4(struct nx_info *nxi, void __user *data)
d337f35e 19796+{
4bf69007
AM
19797+ struct vcmd_net_addr_ipv4_v2 vc_data;
19798+
19799+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19800+ return -EFAULT;
19801+
19802+ return do_remove_v4_addr(nxi, vc_data.ip.s_addr, vc_data.ip2.s_addr,
19803+ vc_data.mask.s_addr, vc_data.type, vc_data.flags);
d337f35e 19804+}
d337f35e 19805+
4bf69007 19806+#ifdef CONFIG_IPV6
d337f35e
JR
19807+
19808+static inline
4bf69007
AM
19809+struct nx_addr_v6 *__find_v6_addr(struct nx_info *nxi,
19810+ struct in6_addr *ip, struct in6_addr *mask,
19811+ uint32_t prefix, uint16_t type, uint16_t flags,
19812+ struct nx_addr_v6 **prev)
d337f35e 19813+{
4bf69007 19814+ struct nx_addr_v6 *nxa = &nxi->v6;
d337f35e 19815+
4bf69007
AM
19816+ for (; nxa; nxa = nxa->next) {
19817+ if (ipv6_addr_equal(&nxa->ip, ip) &&
19818+ ipv6_addr_equal(&nxa->mask, mask) &&
19819+ (nxa->prefix == prefix) &&
19820+ (nxa->type == type) &&
19821+ (nxa->flags == flags))
19822+ return nxa;
19823+
19824+ /* save previous entry */
19825+ if (prev)
19826+ *prev = nxa;
19827+ }
19828+ return NULL;
d337f35e
JR
19829+}
19830+
d337f35e 19831+
4bf69007
AM
19832+int do_add_v6_addr(struct nx_info *nxi,
19833+ struct in6_addr *ip, struct in6_addr *mask,
19834+ uint32_t prefix, uint16_t type, uint16_t flags)
19835+{
19836+ struct nx_addr_v6 *nxa = NULL;
19837+ struct nx_addr_v6 *new = __alloc_nx_addr_v6();
5cb1760b 19838+ unsigned long irqflags;
4bf69007 19839+ int ret = -EEXIST;
d337f35e 19840+
4bf69007
AM
19841+ if (IS_ERR(new))
19842+ return PTR_ERR(new);
d337f35e 19843+
5cb1760b 19844+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19845+ if (__find_v6_addr(nxi, ip, mask, prefix, type, flags, &nxa))
19846+ goto out_unlock;
d337f35e 19847+
4bf69007
AM
19848+ if (NX_IPV6(nxi)) {
19849+ nxa->next = new;
19850+ nxa = new;
19851+ new = NULL;
19852+ }
d337f35e 19853+
4bf69007
AM
19854+ nxa->ip = *ip;
19855+ nxa->mask = *mask;
19856+ nxa->prefix = prefix;
19857+ nxa->type = type;
19858+ nxa->flags = flags;
19859+ ret = 0;
19860+out_unlock:
5cb1760b 19861+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19862+ if (new)
19863+ __dealloc_nx_addr_v6(new);
19864+ return ret;
19865+}
d337f35e 19866+
4bf69007
AM
19867+int do_remove_v6_addr(struct nx_info *nxi,
19868+ struct in6_addr *ip, struct in6_addr *mask,
19869+ uint32_t prefix, uint16_t type, uint16_t flags)
d337f35e 19870+{
4bf69007
AM
19871+ struct nx_addr_v6 *nxa = NULL;
19872+ struct nx_addr_v6 *old = NULL;
5cb1760b 19873+ unsigned long irqflags;
4bf69007 19874+ int ret = 0;
d337f35e 19875+
5cb1760b 19876+ spin_lock_irqsave(&nxi->addr_lock, irqflags);
4bf69007
AM
19877+ switch (type) {
19878+ case NXA_TYPE_ADDR:
19879+ old = __find_v6_addr(nxi, ip, mask, prefix, type, flags, &nxa);
19880+ if (old) {
19881+ if (nxa) {
19882+ nxa->next = old->next;
19883+ old->next = NULL;
19884+ } else {
19885+ if (old->next) {
19886+ nxa = old;
19887+ old = old->next;
19888+ *nxa = *old;
19889+ old->next = NULL;
19890+ } else {
19891+ memset(old, 0, sizeof(*old));
19892+ old = NULL;
19893+ }
19894+ }
19895+ } else
19896+ ret = -ESRCH;
19897+ break;
d337f35e 19898+
4bf69007
AM
19899+ case NXA_TYPE_ANY:
19900+ nxa = &nxi->v6;
19901+ old = nxa->next;
19902+ memset(nxa, 0, sizeof(*nxa));
d337f35e
JR
19903+ break;
19904+
d337f35e 19905+ default:
4bf69007 19906+ ret = -EINVAL;
d337f35e 19907+ }
5cb1760b 19908+ spin_unlock_irqrestore(&nxi->addr_lock, irqflags);
4bf69007
AM
19909+ __dealloc_nx_addr_v6_all(old);
19910+ return ret;
d337f35e
JR
19911+}
19912+
4bf69007 19913+int vc_net_add_ipv6(struct nx_info *nxi, void __user *data)
d337f35e 19914+{
4bf69007 19915+ struct vcmd_net_addr_ipv6_v1 vc_data;
d337f35e 19916+
4bf69007 19917+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
d337f35e
JR
19918+ return -EFAULT;
19919+
4bf69007
AM
19920+ switch (vc_data.type) {
19921+ case NXA_TYPE_ADDR:
19922+ memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
19923+ /* fallthrough */
19924+ case NXA_TYPE_MASK:
19925+ return do_add_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
19926+ vc_data.prefix, vc_data.type, vc_data.flags);
19927+ default:
19928+ return -EINVAL;
19929+ }
19930+ return 0;
19931+}
d337f35e 19932+
4bf69007
AM
19933+int vc_net_remove_ipv6(struct nx_info *nxi, void __user *data)
19934+{
19935+ struct vcmd_net_addr_ipv6_v1 vc_data;
19936+
19937+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
19938+ return -EFAULT;
19939+
19940+ switch (vc_data.type) {
19941+ case NXA_TYPE_ADDR:
19942+ memset(&vc_data.mask, ~0, sizeof(vc_data.mask));
19943+ /* fallthrough */
19944+ case NXA_TYPE_MASK:
19945+ return do_remove_v6_addr(nxi, &vc_data.ip, &vc_data.mask,
19946+ vc_data.prefix, vc_data.type, vc_data.flags);
19947+ case NXA_TYPE_ANY:
19948+ return do_remove_v6_addr(nxi, NULL, NULL, 0, vc_data.type, 0);
19949+ default:
19950+ return -EINVAL;
19951+ }
19952+ return 0;
d337f35e
JR
19953+}
19954+
4bf69007 19955+#endif /* CONFIG_IPV6 */
d337f35e 19956+
4bf69007
AM
19957+
19958+int vc_get_nflags(struct nx_info *nxi, void __user *data)
d337f35e 19959+{
4bf69007 19960+ struct vcmd_net_flags_v0 vc_data;
d337f35e 19961+
4bf69007 19962+ vc_data.flagword = nxi->nx_flags;
d337f35e 19963+
4bf69007
AM
19964+ /* special STATE flag handling */
19965+ vc_data.mask = vs_mask_flags(~0ULL, nxi->nx_flags, NXF_ONE_TIME);
d337f35e 19966+
4bf69007
AM
19967+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
19968+ return -EFAULT;
19969+ return 0;
d337f35e
JR
19970+}
19971+
4bf69007
AM
19972+int vc_set_nflags(struct nx_info *nxi, void __user *data)
19973+{
19974+ struct vcmd_net_flags_v0 vc_data;
19975+ uint64_t mask, trigger;
19976+
19977+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
19978+ return -EFAULT;
d337f35e 19979+
4bf69007
AM
19980+ /* special STATE flag handling */
19981+ mask = vs_mask_mask(vc_data.mask, nxi->nx_flags, NXF_ONE_TIME);
19982+ trigger = (mask & nxi->nx_flags) ^ (mask & vc_data.flagword);
d337f35e 19983+
4bf69007
AM
19984+ nxi->nx_flags = vs_mask_flags(nxi->nx_flags,
19985+ vc_data.flagword, mask);
19986+ if (trigger & NXF_PERSISTENT)
19987+ nx_update_persistent(nxi);
19988+
19989+ return 0;
19990+}
19991+
19992+int vc_get_ncaps(struct nx_info *nxi, void __user *data)
d337f35e 19993+{
4bf69007 19994+ struct vcmd_net_caps_v0 vc_data;
d337f35e 19995+
4bf69007
AM
19996+ vc_data.ncaps = nxi->nx_ncaps;
19997+ vc_data.cmask = ~0ULL;
d337f35e 19998+
2380c486 19999+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
4bf69007
AM
20000+ return -EFAULT;
20001+ return 0;
d337f35e
JR
20002+}
20003+
4bf69007
AM
20004+int vc_set_ncaps(struct nx_info *nxi, void __user *data)
20005+{
20006+ struct vcmd_net_caps_v0 vc_data;
20007+
20008+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
20009+ return -EFAULT;
20010+
20011+ nxi->nx_ncaps = vs_mask_flags(nxi->nx_ncaps,
20012+ vc_data.ncaps, vc_data.cmask);
20013+ return 0;
20014+}
20015+
20016+
20017+#include <linux/module.h>
20018+
20019+module_init(init_network);
20020+
20021+EXPORT_SYMBOL_GPL(free_nx_info);
20022+EXPORT_SYMBOL_GPL(unhash_nx_info);
20023+
09a55596
AM
20024diff -NurpP --minimal linux-4.9.135/kernel/vserver/proc.c linux-4.9.135-vs2.3.9.8/kernel/vserver/proc.c
20025--- linux-4.9.135/kernel/vserver/proc.c 1970-01-01 00:00:00.000000000 +0000
20026+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/proc.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 20027@@ -0,0 +1,1040 @@
d337f35e 20028+/*
4bf69007 20029+ * linux/kernel/vserver/proc.c
d337f35e 20030+ *
4bf69007 20031+ * Virtual Context Support
d337f35e 20032+ *
cc23e853 20033+ * Copyright (C) 2003-2011 Herbert P?tzl
d337f35e 20034+ *
4bf69007
AM
20035+ * V0.01 basic structure
20036+ * V0.02 adaptation vs1.3.0
20037+ * V0.03 proc permissions
20038+ * V0.04 locking/generic
20039+ * V0.05 next generation procfs
20040+ * V0.06 inode validation
20041+ * V0.07 generic rewrite vid
20042+ * V0.08 remove inode type
20043+ * V0.09 added u/wmask info
d337f35e
JR
20044+ *
20045+ */
20046+
4bf69007 20047+#include <linux/proc_fs.h>
ec22aa5c 20048+#include <linux/fs_struct.h>
4bf69007
AM
20049+#include <linux/mount.h>
20050+#include <linux/namei.h>
20051+#include <asm/unistd.h>
2380c486 20052+
d337f35e 20053+#include <linux/vs_context.h>
4bf69007
AM
20054+#include <linux/vs_network.h>
20055+#include <linux/vs_cvirt.h>
d337f35e 20056+
4bf69007
AM
20057+#include <linux/in.h>
20058+#include <linux/inetdevice.h>
20059+#include <linux/vs_inet.h>
20060+#include <linux/vs_inet6.h>
d337f35e 20061+
4bf69007 20062+#include <linux/vserver/global.h>
d337f35e 20063+
4bf69007
AM
20064+#include "cvirt_proc.h"
20065+#include "cacct_proc.h"
20066+#include "limit_proc.h"
20067+#include "sched_proc.h"
20068+#include "vci_config.h"
d337f35e 20069+
09be7631
JR
20070+#include <../../fs/proc/internal.h>
20071+
2380c486 20072+
4bf69007
AM
20073+static inline char *print_cap_t(char *buffer, kernel_cap_t *c)
20074+{
20075+ unsigned __capi;
2380c486 20076+
4bf69007
AM
20077+ CAP_FOR_EACH_U32(__capi) {
20078+ buffer += sprintf(buffer, "%08x",
20079+ c->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
20080+ }
20081+ return buffer;
20082+}
2380c486 20083+
d337f35e 20084+
4bf69007 20085+static struct proc_dir_entry *proc_virtual;
d337f35e 20086+
4bf69007 20087+static struct proc_dir_entry *proc_virtnet;
d337f35e 20088+
d337f35e 20089+
4bf69007 20090+/* first the actual feeds */
d337f35e 20091+
d337f35e 20092+
4bf69007
AM
20093+static int proc_vci(char *buffer)
20094+{
20095+ return sprintf(buffer,
20096+ "VCIVersion:\t%04x:%04x\n"
20097+ "VCISyscall:\t%d\n"
20098+ "VCIKernel:\t%08x\n",
20099+ VCI_VERSION >> 16,
20100+ VCI_VERSION & 0xFFFF,
20101+ __NR_vserver,
20102+ vci_kernel_config());
20103+}
d337f35e 20104+
4bf69007
AM
20105+static int proc_virtual_info(char *buffer)
20106+{
20107+ return proc_vci(buffer);
d337f35e
JR
20108+}
20109+
4bf69007
AM
20110+static int proc_virtual_status(char *buffer)
20111+{
20112+ return sprintf(buffer,
20113+ "#CTotal:\t%d\n"
20114+ "#CActive:\t%d\n"
20115+ "#NSProxy:\t%d\t%d %d %d %d %d %d\n"
20116+ "#InitTask:\t%d\t%d %d\n",
20117+ atomic_read(&vx_global_ctotal),
20118+ atomic_read(&vx_global_cactive),
20119+ atomic_read(&vs_global_nsproxy),
20120+ atomic_read(&vs_global_fs),
20121+ atomic_read(&vs_global_mnt_ns),
20122+ atomic_read(&vs_global_uts_ns),
cc23e853 20123+ atomic_read(&vs_global_ipc_ns),
4bf69007
AM
20124+ atomic_read(&vs_global_user_ns),
20125+ atomic_read(&vs_global_pid_ns),
20126+ atomic_read(&init_task.usage),
20127+ atomic_read(&init_task.nsproxy->count),
20128+ init_task.fs->users);
20129+}
2380c486 20130+
2380c486 20131+
4bf69007 20132+int proc_vxi_info(struct vx_info *vxi, char *buffer)
d337f35e 20133+{
4bf69007 20134+ int length;
d337f35e 20135+
4bf69007
AM
20136+ length = sprintf(buffer,
20137+ "ID:\t%d\n"
20138+ "Info:\t%p\n"
20139+ "Init:\t%d\n"
20140+ "OOM:\t%lld\n",
20141+ vxi->vx_id,
20142+ vxi,
20143+ vxi->vx_initpid,
20144+ vxi->vx_badness_bias);
20145+ return length;
d337f35e
JR
20146+}
20147+
4bf69007 20148+int proc_vxi_status(struct vx_info *vxi, char *buffer)
d337f35e 20149+{
4bf69007 20150+ char *orig = buffer;
d337f35e 20151+
4bf69007
AM
20152+ buffer += sprintf(buffer,
20153+ "UseCnt:\t%d\n"
20154+ "Tasks:\t%d\n"
20155+ "Flags:\t%016llx\n",
20156+ atomic_read(&vxi->vx_usecnt),
20157+ atomic_read(&vxi->vx_tasks),
20158+ (unsigned long long)vxi->vx_flags);
d337f35e 20159+
4bf69007
AM
20160+ buffer += sprintf(buffer, "BCaps:\t");
20161+ buffer = print_cap_t(buffer, &vxi->vx_bcaps);
20162+ buffer += sprintf(buffer, "\n");
ab30d09f 20163+
4bf69007
AM
20164+ buffer += sprintf(buffer,
20165+ "CCaps:\t%016llx\n"
20166+ "Umask:\t%16llx\n"
20167+ "Wmask:\t%16llx\n"
20168+ "Spaces:\t%08lx %08lx\n",
20169+ (unsigned long long)vxi->vx_ccaps,
20170+ (unsigned long long)vxi->vx_umask,
20171+ (unsigned long long)vxi->vx_wmask,
20172+ vxi->space[0].vx_nsmask, vxi->space[1].vx_nsmask);
20173+ return buffer - orig;
20174+}
ab30d09f 20175+
4bf69007
AM
20176+int proc_vxi_limit(struct vx_info *vxi, char *buffer)
20177+{
20178+ return vx_info_proc_limit(&vxi->limit, buffer);
20179+}
d337f35e 20180+
4bf69007
AM
20181+int proc_vxi_sched(struct vx_info *vxi, char *buffer)
20182+{
20183+ int cpu, length;
d337f35e 20184+
4bf69007
AM
20185+ length = vx_info_proc_sched(&vxi->sched, buffer);
20186+ for_each_online_cpu(cpu) {
20187+ length += vx_info_proc_sched_pc(
20188+ &vx_per_cpu(vxi, sched_pc, cpu),
20189+ buffer + length, cpu);
ec22aa5c 20190+ }
4bf69007
AM
20191+ return length;
20192+}
ec22aa5c 20193+
4bf69007
AM
20194+int proc_vxi_nsproxy0(struct vx_info *vxi, char *buffer)
20195+{
20196+ return vx_info_proc_nsproxy(vxi->space[0].vx_nsproxy, buffer);
20197+}
d337f35e 20198+
4bf69007
AM
20199+int proc_vxi_nsproxy1(struct vx_info *vxi, char *buffer)
20200+{
20201+ return vx_info_proc_nsproxy(vxi->space[1].vx_nsproxy, buffer);
20202+}
ec22aa5c 20203+
4bf69007
AM
20204+int proc_vxi_cvirt(struct vx_info *vxi, char *buffer)
20205+{
20206+ int cpu, length;
d33d7b00 20207+
4bf69007
AM
20208+ vx_update_load(vxi);
20209+ length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
20210+ for_each_online_cpu(cpu) {
20211+ length += vx_info_proc_cvirt_pc(
20212+ &vx_per_cpu(vxi, cvirt_pc, cpu),
20213+ buffer + length, cpu);
3bac966d 20214+ }
4bf69007
AM
20215+ return length;
20216+}
3bac966d 20217+
4bf69007
AM
20218+int proc_vxi_cacct(struct vx_info *vxi, char *buffer)
20219+{
20220+ return vx_info_proc_cacct(&vxi->cacct, buffer);
d337f35e
JR
20221+}
20222+
20223+
4bf69007 20224+static int proc_virtnet_info(char *buffer)
d337f35e 20225+{
4bf69007
AM
20226+ return proc_vci(buffer);
20227+}
ab30d09f 20228+
4bf69007
AM
20229+static int proc_virtnet_status(char *buffer)
20230+{
20231+ return sprintf(buffer,
20232+ "#CTotal:\t%d\n"
20233+ "#CActive:\t%d\n",
20234+ atomic_read(&nx_global_ctotal),
20235+ atomic_read(&nx_global_cactive));
20236+}
d337f35e 20237+
4bf69007
AM
20238+int proc_nxi_info(struct nx_info *nxi, char *buffer)
20239+{
20240+ struct nx_addr_v4 *v4a;
20241+#ifdef CONFIG_IPV6
20242+ struct nx_addr_v6 *v6a;
20243+#endif
20244+ int length, i;
ab30d09f 20245+
4bf69007
AM
20246+ length = sprintf(buffer,
20247+ "ID:\t%d\n"
20248+ "Info:\t%p\n"
20249+ "Bcast:\t" NIPQUAD_FMT "\n"
20250+ "Lback:\t" NIPQUAD_FMT "\n",
20251+ nxi->nx_id,
20252+ nxi,
20253+ NIPQUAD(nxi->v4_bcast.s_addr),
20254+ NIPQUAD(nxi->v4_lback.s_addr));
ab30d09f 20255+
4bf69007
AM
20256+ if (!NX_IPV4(nxi))
20257+ goto skip_v4;
20258+ for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
20259+ length += sprintf(buffer + length, "%d:\t" NXAV4_FMT "\n",
20260+ i, NXAV4(v4a));
20261+skip_v4:
20262+#ifdef CONFIG_IPV6
20263+ if (!NX_IPV6(nxi))
20264+ goto skip_v6;
20265+ for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
20266+ length += sprintf(buffer + length, "%d:\t" NXAV6_FMT "\n",
20267+ i, NXAV6(v6a));
20268+skip_v6:
20269+#endif
20270+ return length;
20271+}
2380c486 20272+
4bf69007
AM
20273+int proc_nxi_status(struct nx_info *nxi, char *buffer)
20274+{
20275+ int length;
ec22aa5c 20276+
4bf69007
AM
20277+ length = sprintf(buffer,
20278+ "UseCnt:\t%d\n"
20279+ "Tasks:\t%d\n"
20280+ "Flags:\t%016llx\n"
20281+ "NCaps:\t%016llx\n",
20282+ atomic_read(&nxi->nx_usecnt),
20283+ atomic_read(&nxi->nx_tasks),
20284+ (unsigned long long)nxi->nx_flags,
20285+ (unsigned long long)nxi->nx_ncaps);
20286+ return length;
20287+}
ec22aa5c 20288+
ec22aa5c 20289+
d337f35e 20290+
4bf69007 20291+/* here the inode helpers */
d337f35e 20292+
4bf69007
AM
20293+struct vs_entry {
20294+ int len;
20295+ char *name;
20296+ mode_t mode;
20297+ struct inode_operations *iop;
20298+ struct file_operations *fop;
20299+ union proc_op op;
20300+};
d337f35e 20301+
4bf69007
AM
20302+static struct inode *vs_proc_make_inode(struct super_block *sb, struct vs_entry *p)
20303+{
20304+ struct inode *inode = new_inode(sb);
3bac966d 20305+
4bf69007
AM
20306+ if (!inode)
20307+ goto out;
3bac966d 20308+
4bf69007
AM
20309+ inode->i_mode = p->mode;
20310+ if (p->iop)
20311+ inode->i_op = p->iop;
20312+ if (p->fop)
20313+ inode->i_fop = p->fop;
3bac966d 20314+
4bf69007
AM
20315+ set_nlink(inode, (p->mode & S_IFDIR) ? 2 : 1);
20316+ inode->i_flags |= S_IMMUTABLE;
3bac966d 20317+
4bf69007 20318+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2380c486 20319+
8ce283e1
AM
20320+ i_uid_write(inode, 0);
20321+ i_gid_write(inode, 0);
20322+ i_tag_write(inode, 0);
4bf69007
AM
20323+out:
20324+ return inode;
d337f35e
JR
20325+}
20326+
4bf69007
AM
20327+static struct dentry *vs_proc_instantiate(struct inode *dir,
20328+ struct dentry *dentry, int id, void *ptr)
2380c486 20329+{
4bf69007
AM
20330+ struct vs_entry *p = ptr;
20331+ struct inode *inode = vs_proc_make_inode(dir->i_sb, p);
20332+ struct dentry *error = ERR_PTR(-EINVAL);
2380c486 20333+
4bf69007
AM
20334+ if (!inode)
20335+ goto out;
2380c486 20336+
4bf69007
AM
20337+ PROC_I(inode)->op = p->op;
20338+ PROC_I(inode)->fd = id;
20339+ d_add(dentry, inode);
20340+ error = NULL;
20341+out:
20342+ return error;
2380c486
JR
20343+}
20344+
4bf69007 20345+/* Lookups */
2380c486 20346+
09be7631
JR
20347+typedef struct dentry *vx_instantiate_t(struct inode *, struct dentry *, int, void *);
20348+
2380c486 20349+
4bf69007
AM
20350+/*
20351+ * Fill a directory entry.
20352+ *
20353+ * If possible create the dcache entry and derive our inode number and
20354+ * file type from dcache entry.
20355+ *
20356+ * Since all of the proc inode numbers are dynamically generated, the inode
20357+ * numbers do not exist until the inode is cache. This means creating the
c2e5f7c8
JR
20358+ * the dcache entry in iterate is necessary to keep the inode numbers
20359+ * reported by iterate in sync with the inode numbers reported
4bf69007
AM
20360+ * by stat.
20361+ */
c2e5f7c8 20362+static int vx_proc_fill_cache(struct file *filp, struct dir_context *ctx,
09be7631 20363+ char *name, int len, vx_instantiate_t instantiate, int id, void *ptr)
2380c486 20364+{
cc23e853 20365+ struct dentry *child, *dir = filp->f_path.dentry;
4bf69007
AM
20366+ struct inode *inode;
20367+ struct qstr qname;
20368+ ino_t ino = 0;
20369+ unsigned type = DT_UNKNOWN;
d337f35e 20370+
4bf69007
AM
20371+ qname.name = name;
20372+ qname.len = len;
cc23e853 20373+ qname.hash = full_name_hash(NULL, name, len);
d337f35e 20374+
4bf69007
AM
20375+ child = d_lookup(dir, &qname);
20376+ if (!child) {
20377+ struct dentry *new;
20378+ new = d_alloc(dir, &qname);
20379+ if (new) {
20380+ child = instantiate(dir->d_inode, new, id, ptr);
20381+ if (child)
20382+ dput(new);
20383+ else
20384+ child = new;
20385+ }
20386+ }
20387+ if (!child || IS_ERR(child) || !child->d_inode)
20388+ goto end_instantiate;
20389+ inode = child->d_inode;
20390+ if (inode) {
20391+ ino = inode->i_ino;
20392+ type = inode->i_mode >> 12;
20393+ }
20394+ dput(child);
20395+end_instantiate:
20396+ if (!ino)
4bf69007 20397+ ino = 1;
c2e5f7c8 20398+ return !dir_emit(ctx, name, len, ino, type);
4bf69007 20399+}
d337f35e 20400+
d337f35e 20401+
d337f35e 20402+
4bf69007 20403+/* get and revalidate vx_info/xid */
2380c486 20404+
4bf69007
AM
20405+static inline
20406+struct vx_info *get_proc_vx_info(struct inode *inode)
20407+{
20408+ return lookup_vx_info(PROC_I(inode)->fd);
d337f35e
JR
20409+}
20410+
4bf69007 20411+static int proc_xid_revalidate(struct dentry *dentry, unsigned int flags)
d337f35e 20412+{
4bf69007 20413+ struct inode *inode = dentry->d_inode;
61333608 20414+ vxid_t xid = PROC_I(inode)->fd;
2380c486 20415+
4bf69007
AM
20416+ if (flags & LOOKUP_RCU) /* FIXME: can be dropped? */
20417+ return -ECHILD;
2380c486 20418+
4bf69007
AM
20419+ if (!xid || xid_is_hashed(xid))
20420+ return 1;
20421+ d_drop(dentry);
d337f35e
JR
20422+ return 0;
20423+}
20424+
d337f35e 20425+
4bf69007 20426+/* get and revalidate nx_info/nid */
d337f35e 20427+
4bf69007
AM
20428+static int proc_nid_revalidate(struct dentry *dentry, unsigned int flags)
20429+{
20430+ struct inode *inode = dentry->d_inode;
61333608 20431+ vnid_t nid = PROC_I(inode)->fd;
2380c486 20432+
4bf69007
AM
20433+ if (flags & LOOKUP_RCU) /* FIXME: can be dropped? */
20434+ return -ECHILD;
2380c486 20435+
4bf69007
AM
20436+ if (!nid || nid_is_hashed(nid))
20437+ return 1;
20438+ d_drop(dentry);
20439+ return 0;
d337f35e
JR
20440+}
20441+
4bf69007
AM
20442+
20443+
20444+#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
20445+
20446+static ssize_t proc_vs_info_read(struct file *file, char __user *buf,
20447+ size_t count, loff_t *ppos)
d337f35e 20448+{
cc23e853 20449+ struct inode *inode = file->f_path.dentry->d_inode;
4bf69007
AM
20450+ unsigned long page;
20451+ ssize_t length = 0;
20452+
20453+ if (count > PROC_BLOCK_SIZE)
20454+ count = PROC_BLOCK_SIZE;
20455+
20456+ /* fade that out as soon as stable */
20457+ WARN_ON(PROC_I(inode)->fd);
20458+
20459+ if (!(page = __get_free_page(GFP_KERNEL)))
20460+ return -ENOMEM;
20461+
20462+ BUG_ON(!PROC_I(inode)->op.proc_vs_read);
20463+ length = PROC_I(inode)->op.proc_vs_read((char *)page);
20464+
20465+ if (length >= 0)
20466+ length = simple_read_from_buffer(buf, count, ppos,
20467+ (char *)page, length);
20468+
20469+ free_page(page);
20470+ return length;
d337f35e
JR
20471+}
20472+
4bf69007
AM
20473+static ssize_t proc_vx_info_read(struct file *file, char __user *buf,
20474+ size_t count, loff_t *ppos)
20475+{
cc23e853 20476+ struct inode *inode = file->f_path.dentry->d_inode;
4bf69007 20477+ struct vx_info *vxi = NULL;
61333608 20478+ vxid_t xid = PROC_I(inode)->fd;
4bf69007
AM
20479+ unsigned long page;
20480+ ssize_t length = 0;
d337f35e 20481+
4bf69007
AM
20482+ if (count > PROC_BLOCK_SIZE)
20483+ count = PROC_BLOCK_SIZE;
20484+
20485+ /* fade that out as soon as stable */
20486+ WARN_ON(!xid);
20487+ vxi = lookup_vx_info(xid);
20488+ if (!vxi)
20489+ goto out;
d337f35e 20490+
4bf69007
AM
20491+ length = -ENOMEM;
20492+ if (!(page = __get_free_page(GFP_KERNEL)))
20493+ goto out_put;
d337f35e 20494+
4bf69007
AM
20495+ BUG_ON(!PROC_I(inode)->op.proc_vxi_read);
20496+ length = PROC_I(inode)->op.proc_vxi_read(vxi, (char *)page);
d337f35e 20497+
4bf69007
AM
20498+ if (length >= 0)
20499+ length = simple_read_from_buffer(buf, count, ppos,
20500+ (char *)page, length);
d337f35e 20501+
4bf69007
AM
20502+ free_page(page);
20503+out_put:
20504+ put_vx_info(vxi);
20505+out:
20506+ return length;
20507+}
20508+
20509+static ssize_t proc_nx_info_read(struct file *file, char __user *buf,
20510+ size_t count, loff_t *ppos)
d337f35e 20511+{
cc23e853 20512+ struct inode *inode = file->f_path.dentry->d_inode;
4bf69007 20513+ struct nx_info *nxi = NULL;
61333608 20514+ vnid_t nid = PROC_I(inode)->fd;
4bf69007
AM
20515+ unsigned long page;
20516+ ssize_t length = 0;
d337f35e 20517+
4bf69007
AM
20518+ if (count > PROC_BLOCK_SIZE)
20519+ count = PROC_BLOCK_SIZE;
d337f35e 20520+
4bf69007
AM
20521+ /* fade that out as soon as stable */
20522+ WARN_ON(!nid);
20523+ nxi = lookup_nx_info(nid);
20524+ if (!nxi)
20525+ goto out;
d337f35e 20526+
4bf69007
AM
20527+ length = -ENOMEM;
20528+ if (!(page = __get_free_page(GFP_KERNEL)))
20529+ goto out_put;
d337f35e 20530+
4bf69007
AM
20531+ BUG_ON(!PROC_I(inode)->op.proc_nxi_read);
20532+ length = PROC_I(inode)->op.proc_nxi_read(nxi, (char *)page);
2380c486 20533+
4bf69007
AM
20534+ if (length >= 0)
20535+ length = simple_read_from_buffer(buf, count, ppos,
20536+ (char *)page, length);
d337f35e 20537+
4bf69007
AM
20538+ free_page(page);
20539+out_put:
20540+ put_nx_info(nxi);
20541+out:
20542+ return length;
20543+}
2380c486 20544+
d337f35e 20545+
763640ca 20546+
4bf69007 20547+/* here comes the lower level */
763640ca 20548+
265d6dcc 20549+
4bf69007
AM
20550+#define NOD(NAME, MODE, IOP, FOP, OP) { \
20551+ .len = sizeof(NAME) - 1, \
20552+ .name = (NAME), \
20553+ .mode = MODE, \
20554+ .iop = IOP, \
20555+ .fop = FOP, \
20556+ .op = OP, \
20557+}
d337f35e 20558+
d337f35e 20559+
4bf69007
AM
20560+#define DIR(NAME, MODE, OTYPE) \
20561+ NOD(NAME, (S_IFDIR | (MODE)), \
20562+ &proc_ ## OTYPE ## _inode_operations, \
20563+ &proc_ ## OTYPE ## _file_operations, { } )
d337f35e 20564+
4bf69007
AM
20565+#define INF(NAME, MODE, OTYPE) \
20566+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
20567+ &proc_vs_info_file_operations, \
20568+ { .proc_vs_read = &proc_##OTYPE } )
d337f35e 20569+
4bf69007
AM
20570+#define VINF(NAME, MODE, OTYPE) \
20571+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
20572+ &proc_vx_info_file_operations, \
20573+ { .proc_vxi_read = &proc_##OTYPE } )
2380c486 20574+
4bf69007
AM
20575+#define NINF(NAME, MODE, OTYPE) \
20576+ NOD(NAME, (S_IFREG | (MODE)), NULL, \
20577+ &proc_nx_info_file_operations, \
20578+ { .proc_nxi_read = &proc_##OTYPE } )
d337f35e 20579+
d337f35e 20580+
4bf69007
AM
20581+static struct file_operations proc_vs_info_file_operations = {
20582+ .read = proc_vs_info_read,
20583+};
d337f35e 20584+
4bf69007
AM
20585+static struct file_operations proc_vx_info_file_operations = {
20586+ .read = proc_vx_info_read,
20587+};
d337f35e 20588+
4bf69007
AM
20589+static struct dentry_operations proc_xid_dentry_operations = {
20590+ .d_revalidate = proc_xid_revalidate,
20591+};
d337f35e 20592+
4bf69007
AM
20593+static struct vs_entry vx_base_stuff[] = {
20594+ VINF("info", S_IRUGO, vxi_info),
20595+ VINF("status", S_IRUGO, vxi_status),
20596+ VINF("limit", S_IRUGO, vxi_limit),
20597+ VINF("sched", S_IRUGO, vxi_sched),
20598+ VINF("nsproxy", S_IRUGO, vxi_nsproxy0),
20599+ VINF("nsproxy1",S_IRUGO, vxi_nsproxy1),
20600+ VINF("cvirt", S_IRUGO, vxi_cvirt),
20601+ VINF("cacct", S_IRUGO, vxi_cacct),
20602+ {}
20603+};
2380c486 20604+
d337f35e 20605+
d337f35e 20606+
d337f35e 20607+
4bf69007
AM
20608+static struct dentry *proc_xid_instantiate(struct inode *dir,
20609+ struct dentry *dentry, int id, void *ptr)
20610+{
20611+ dentry->d_op = &proc_xid_dentry_operations;
20612+ return vs_proc_instantiate(dir, dentry, id, ptr);
20613+}
2380c486 20614+
4bf69007
AM
20615+static struct dentry *proc_xid_lookup(struct inode *dir,
20616+ struct dentry *dentry, unsigned int flags)
20617+{
20618+ struct vs_entry *p = vx_base_stuff;
20619+ struct dentry *error = ERR_PTR(-ENOENT);
2380c486 20620+
4bf69007
AM
20621+ for (; p->name; p++) {
20622+ if (p->len != dentry->d_name.len)
20623+ continue;
20624+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20625+ break;
20626+ }
20627+ if (!p->name)
20628+ goto out;
d337f35e 20629+
4bf69007
AM
20630+ error = proc_xid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
20631+out:
20632+ return error;
20633+}
9f7054f1 20634+
c2e5f7c8 20635+static int proc_xid_iterate(struct file *filp, struct dir_context *ctx)
4bf69007 20636+{
cc23e853 20637+ struct dentry *dentry = filp->f_path.dentry;
4bf69007
AM
20638+ struct inode *inode = dentry->d_inode;
20639+ struct vs_entry *p = vx_base_stuff;
20640+ int size = sizeof(vx_base_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20641+ int index;
2380c486 20642+
cc23e853
AM
20643+ if (!dir_emit_dots(filp, ctx))
20644+ return 0;
20645+
20646+ index = ctx->pos - 2;
20647+ if (index < size) {
4bf69007 20648+ for (p += index; p->name; p++) {
c2e5f7c8 20649+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20650+ vs_proc_instantiate, PROC_I(inode)->fd, p))
cc23e853 20651+ return 0;
c2e5f7c8 20652+ ctx->pos++;
4bf69007 20653+ }
d337f35e 20654+ }
4bf69007 20655+ return 1;
d337f35e
JR
20656+}
20657+
20658+
d337f35e 20659+
4bf69007
AM
20660+static struct file_operations proc_nx_info_file_operations = {
20661+ .read = proc_nx_info_read,
20662+};
d337f35e 20663+
4bf69007
AM
20664+static struct dentry_operations proc_nid_dentry_operations = {
20665+ .d_revalidate = proc_nid_revalidate,
20666+};
d337f35e 20667+
4bf69007
AM
20668+static struct vs_entry nx_base_stuff[] = {
20669+ NINF("info", S_IRUGO, nxi_info),
20670+ NINF("status", S_IRUGO, nxi_status),
20671+ {}
20672+};
2380c486 20673+
d337f35e 20674+
4bf69007
AM
20675+static struct dentry *proc_nid_instantiate(struct inode *dir,
20676+ struct dentry *dentry, int id, void *ptr)
d337f35e 20677+{
4bf69007
AM
20678+ dentry->d_op = &proc_nid_dentry_operations;
20679+ return vs_proc_instantiate(dir, dentry, id, ptr);
20680+}
d337f35e 20681+
4bf69007
AM
20682+static struct dentry *proc_nid_lookup(struct inode *dir,
20683+ struct dentry *dentry, unsigned int flags)
20684+{
20685+ struct vs_entry *p = nx_base_stuff;
20686+ struct dentry *error = ERR_PTR(-ENOENT);
d337f35e 20687+
4bf69007
AM
20688+ for (; p->name; p++) {
20689+ if (p->len != dentry->d_name.len)
20690+ continue;
20691+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20692+ break;
20693+ }
20694+ if (!p->name)
20695+ goto out;
d337f35e 20696+
4bf69007
AM
20697+ error = proc_nid_instantiate(dir, dentry, PROC_I(dir)->fd, p);
20698+out:
20699+ return error;
20700+}
d337f35e 20701+
c2e5f7c8 20702+static int proc_nid_iterate(struct file *filp, struct dir_context *ctx)
4bf69007 20703+{
cc23e853 20704+ struct dentry *dentry = filp->f_path.dentry;
4bf69007
AM
20705+ struct inode *inode = dentry->d_inode;
20706+ struct vs_entry *p = nx_base_stuff;
20707+ int size = sizeof(nx_base_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20708+ int index;
d337f35e 20709+
cc23e853
AM
20710+ if (!dir_emit_dots(filp, ctx))
20711+ return 0;
20712+
20713+ index = ctx->pos - 2;
20714+ if (index < size) {
4bf69007 20715+ for (p += index; p->name; p++) {
c2e5f7c8 20716+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20717+ vs_proc_instantiate, PROC_I(inode)->fd, p))
cc23e853 20718+ return 0;
c2e5f7c8 20719+ ctx->pos++;
4bf69007
AM
20720+ }
20721+ }
4bf69007
AM
20722+ return 1;
20723+}
2380c486 20724+
d337f35e 20725+
4bf69007 20726+#define MAX_MULBY10 ((~0U - 9) / 10)
d337f35e 20727+
4bf69007
AM
20728+static inline int atovid(const char *str, int len)
20729+{
20730+ int vid, c;
d337f35e 20731+
4bf69007
AM
20732+ vid = 0;
20733+ while (len-- > 0) {
20734+ c = *str - '0';
20735+ str++;
20736+ if (c > 9)
20737+ return -1;
20738+ if (vid >= MAX_MULBY10)
20739+ return -1;
20740+ vid *= 10;
20741+ vid += c;
20742+ if (!vid)
20743+ return -1;
20744+ }
20745+ return vid;
20746+}
2380c486 20747+
4bf69007 20748+/* now the upper level (virtual) */
2380c486 20749+
2380c486 20750+
4bf69007
AM
20751+static struct file_operations proc_xid_file_operations = {
20752+ .read = generic_read_dir,
c2e5f7c8 20753+ .iterate = proc_xid_iterate,
4bf69007 20754+};
2380c486 20755+
4bf69007
AM
20756+static struct inode_operations proc_xid_inode_operations = {
20757+ .lookup = proc_xid_lookup,
20758+};
d337f35e 20759+
4bf69007
AM
20760+static struct vs_entry vx_virtual_stuff[] = {
20761+ INF("info", S_IRUGO, virtual_info),
20762+ INF("status", S_IRUGO, virtual_status),
20763+ DIR(NULL, S_IRUGO | S_IXUGO, xid),
20764+};
2380c486 20765+
d337f35e 20766+
4bf69007
AM
20767+static struct dentry *proc_virtual_lookup(struct inode *dir,
20768+ struct dentry *dentry, unsigned int flags)
20769+{
20770+ struct vs_entry *p = vx_virtual_stuff;
20771+ struct dentry *error = ERR_PTR(-ENOENT);
20772+ int id = 0;
d337f35e 20773+
4bf69007
AM
20774+ for (; p->name; p++) {
20775+ if (p->len != dentry->d_name.len)
20776+ continue;
20777+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20778+ break;
20779+ }
20780+ if (p->name)
20781+ goto instantiate;
d337f35e 20782+
4bf69007
AM
20783+ id = atovid(dentry->d_name.name, dentry->d_name.len);
20784+ if ((id < 0) || !xid_is_hashed(id))
d337f35e
JR
20785+ goto out;
20786+
4bf69007
AM
20787+instantiate:
20788+ error = proc_xid_instantiate(dir, dentry, id, p);
20789+out:
20790+ return error;
20791+}
d337f35e 20792+
4bf69007
AM
20793+static struct file_operations proc_nid_file_operations = {
20794+ .read = generic_read_dir,
c2e5f7c8 20795+ .iterate = proc_nid_iterate,
4bf69007 20796+};
d337f35e 20797+
4bf69007
AM
20798+static struct inode_operations proc_nid_inode_operations = {
20799+ .lookup = proc_nid_lookup,
20800+};
d337f35e 20801+
4bf69007
AM
20802+static struct vs_entry nx_virtnet_stuff[] = {
20803+ INF("info", S_IRUGO, virtnet_info),
20804+ INF("status", S_IRUGO, virtnet_status),
20805+ DIR(NULL, S_IRUGO | S_IXUGO, nid),
20806+};
d337f35e 20807+
d337f35e 20808+
4bf69007
AM
20809+static struct dentry *proc_virtnet_lookup(struct inode *dir,
20810+ struct dentry *dentry, unsigned int flags)
20811+{
20812+ struct vs_entry *p = nx_virtnet_stuff;
20813+ struct dentry *error = ERR_PTR(-ENOENT);
20814+ int id = 0;
d337f35e 20815+
4bf69007
AM
20816+ for (; p->name; p++) {
20817+ if (p->len != dentry->d_name.len)
20818+ continue;
20819+ if (!memcmp(dentry->d_name.name, p->name, p->len))
20820+ break;
20821+ }
20822+ if (p->name)
20823+ goto instantiate;
d337f35e 20824+
4bf69007
AM
20825+ id = atovid(dentry->d_name.name, dentry->d_name.len);
20826+ if ((id < 0) || !nid_is_hashed(id))
d337f35e
JR
20827+ goto out;
20828+
4bf69007
AM
20829+instantiate:
20830+ error = proc_nid_instantiate(dir, dentry, id, p);
20831+out:
20832+ return error;
20833+}
2380c486 20834+
d337f35e 20835+
4bf69007
AM
20836+#define PROC_MAXVIDS 32
20837+
c2e5f7c8 20838+int proc_virtual_iterate(struct file *filp, struct dir_context *ctx)
4bf69007 20839+{
4bf69007
AM
20840+ struct vs_entry *p = vx_virtual_stuff;
20841+ int size = sizeof(vx_virtual_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20842+ int index;
4bf69007
AM
20843+ unsigned int xid_array[PROC_MAXVIDS];
20844+ char buf[PROC_NUMBUF];
20845+ unsigned int nr_xids, i;
4bf69007 20846+
cc23e853
AM
20847+ if (!dir_emit_dots(filp, ctx))
20848+ return 0;
20849+
20850+ index = ctx->pos - 2;
20851+ if (index < size) {
4bf69007 20852+ for (p += index; p->name; p++) {
c2e5f7c8 20853+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20854+ vs_proc_instantiate, 0, p))
cc23e853 20855+ return 0;
c2e5f7c8 20856+ ctx->pos++;
d337f35e
JR
20857+ }
20858+ }
cc23e853
AM
20859+
20860+ index = ctx->pos - size;
20861+ p = &vx_virtual_stuff[size - 1];
20862+ nr_xids = get_xid_list(index, xid_array, PROC_MAXVIDS);
20863+ for (i = 0; i < nr_xids; i++) {
20864+ int n, xid = xid_array[i];
20865+ unsigned int j = PROC_NUMBUF;
20866+
20867+ n = xid;
20868+ do
20869+ buf[--j] = '0' + (n % 10);
20870+ while (n /= 10);
20871+
20872+ if (vx_proc_fill_cache(filp, ctx,
20873+ buf + j, PROC_NUMBUF - j,
20874+ vs_proc_instantiate, xid, p))
20875+ return 0;
20876+ ctx->pos++;
20877+ }
4bf69007 20878+ return 0;
d337f35e
JR
20879+}
20880+
4bf69007
AM
20881+static int proc_virtual_getattr(struct vfsmount *mnt,
20882+ struct dentry *dentry, struct kstat *stat)
d337f35e 20883+{
4bf69007 20884+ struct inode *inode = dentry->d_inode;
d337f35e 20885+
4bf69007
AM
20886+ generic_fillattr(inode, stat);
20887+ stat->nlink = 2 + atomic_read(&vx_global_cactive);
20888+ return 0;
d337f35e
JR
20889+}
20890+
4bf69007
AM
20891+static struct file_operations proc_virtual_dir_operations = {
20892+ .read = generic_read_dir,
c2e5f7c8 20893+ .iterate = proc_virtual_iterate,
d337f35e
JR
20894+};
20895+
4bf69007
AM
20896+static struct inode_operations proc_virtual_dir_inode_operations = {
20897+ .getattr = proc_virtual_getattr,
20898+ .lookup = proc_virtual_lookup,
20899+};
d337f35e 20900+
d337f35e
JR
20901+
20902+
c2e5f7c8 20903+int proc_virtnet_iterate(struct file *filp, struct dir_context *ctx)
d337f35e 20904+{
4bf69007
AM
20905+ struct vs_entry *p = nx_virtnet_stuff;
20906+ int size = sizeof(nx_virtnet_stuff) / sizeof(struct vs_entry);
c2e5f7c8 20907+ int index;
4bf69007
AM
20908+ unsigned int nid_array[PROC_MAXVIDS];
20909+ char buf[PROC_NUMBUF];
20910+ unsigned int nr_nids, i;
d337f35e 20911+
cc23e853
AM
20912+ if (!dir_emit_dots(filp, ctx))
20913+ return 0;
20914+
20915+ index = ctx->pos - 2;
20916+ if (index < size) {
4bf69007 20917+ for (p += index; p->name; p++) {
c2e5f7c8 20918+ if (vx_proc_fill_cache(filp, ctx, p->name, p->len,
4bf69007 20919+ vs_proc_instantiate, 0, p))
cc23e853 20920+ return 0;
c2e5f7c8 20921+ ctx->pos++;
d337f35e
JR
20922+ }
20923+ }
cc23e853
AM
20924+
20925+ index = ctx->pos - size;
20926+ p = &nx_virtnet_stuff[size - 1];
20927+ nr_nids = get_nid_list(index, nid_array, PROC_MAXVIDS);
20928+ for (i = 0; i < nr_nids; i++) {
20929+ int n, nid = nid_array[i];
20930+ unsigned int j = PROC_NUMBUF;
20931+
20932+ n = nid;
20933+ do
20934+ buf[--j] = '0' + (n % 10);
20935+ while (n /= 10);
20936+
20937+ if (vx_proc_fill_cache(filp, ctx,
20938+ buf + j, PROC_NUMBUF - j,
20939+ vs_proc_instantiate, nid, p))
20940+ return 0;
20941+ ctx->pos++;
20942+ }
d337f35e
JR
20943+ return 0;
20944+}
20945+
4bf69007
AM
20946+static int proc_virtnet_getattr(struct vfsmount *mnt,
20947+ struct dentry *dentry, struct kstat *stat)
20948+{
20949+ struct inode *inode = dentry->d_inode;
d337f35e 20950+
4bf69007
AM
20951+ generic_fillattr(inode, stat);
20952+ stat->nlink = 2 + atomic_read(&nx_global_cactive);
20953+ return 0;
20954+}
d337f35e 20955+
4bf69007
AM
20956+static struct file_operations proc_virtnet_dir_operations = {
20957+ .read = generic_read_dir,
c2e5f7c8 20958+ .iterate = proc_virtnet_iterate,
d337f35e
JR
20959+};
20960+
4bf69007
AM
20961+static struct inode_operations proc_virtnet_dir_inode_operations = {
20962+ .getattr = proc_virtnet_getattr,
20963+ .lookup = proc_virtnet_lookup,
d337f35e
JR
20964+};
20965+
d337f35e
JR
20966+
20967+
4bf69007 20968+void proc_vx_init(void)
d337f35e 20969+{
4bf69007 20970+ struct proc_dir_entry *ent;
d337f35e 20971+
4bf69007
AM
20972+ ent = proc_mkdir("virtual", 0);
20973+ if (ent) {
20974+ ent->proc_fops = &proc_virtual_dir_operations;
20975+ ent->proc_iops = &proc_virtual_dir_inode_operations;
20976+ }
20977+ proc_virtual = ent;
d337f35e 20978+
4bf69007
AM
20979+ ent = proc_mkdir("virtnet", 0);
20980+ if (ent) {
20981+ ent->proc_fops = &proc_virtnet_dir_operations;
20982+ ent->proc_iops = &proc_virtnet_dir_inode_operations;
d337f35e 20983+ }
4bf69007 20984+ proc_virtnet = ent;
d337f35e
JR
20985+}
20986+
d337f35e 20987+
2380c486 20988+
2380c486 20989+
4bf69007 20990+/* per pid info */
2380c486 20991+
bb20add7
AM
20992+void render_cap_t(struct seq_file *, const char *,
20993+ struct vx_info *, kernel_cap_t *);
20994+
2380c486 20995+
bb20add7
AM
20996+int proc_pid_vx_info(
20997+ struct seq_file *m,
20998+ struct pid_namespace *ns,
20999+ struct pid *pid,
21000+ struct task_struct *p)
2380c486 21001+{
4bf69007 21002+ struct vx_info *vxi;
2380c486 21003+
bb20add7 21004+ seq_printf(m, "XID:\t%d\n", vx_task_xid(p));
2380c486 21005+
4bf69007
AM
21006+ vxi = task_get_vx_info(p);
21007+ if (!vxi)
bb20add7 21008+ return 0;
2380c486 21009+
bb20add7
AM
21010+ render_cap_t(m, "BCaps:\t", vxi, &vxi->vx_bcaps);
21011+ seq_printf(m, "CCaps:\t%016llx\n",
4bf69007 21012+ (unsigned long long)vxi->vx_ccaps);
bb20add7 21013+ seq_printf(m, "CFlags:\t%016llx\n",
4bf69007 21014+ (unsigned long long)vxi->vx_flags);
bb20add7 21015+ seq_printf(m, "CIPid:\t%d\n", vxi->vx_initpid);
4bf69007
AM
21016+
21017+ put_vx_info(vxi);
bb20add7 21018+ return 0;
2380c486
JR
21019+}
21020+
2380c486 21021+
bb20add7
AM
21022+int proc_pid_nx_info(
21023+ struct seq_file *m,
21024+ struct pid_namespace *ns,
21025+ struct pid *pid,
21026+ struct task_struct *p)
4bf69007
AM
21027+{
21028+ struct nx_info *nxi;
21029+ struct nx_addr_v4 *v4a;
21030+#ifdef CONFIG_IPV6
21031+ struct nx_addr_v6 *v6a;
21032+#endif
4bf69007 21033+ int i;
2380c486 21034+
bb20add7 21035+ seq_printf(m, "NID:\t%d\n", nx_task_nid(p));
2380c486 21036+
4bf69007
AM
21037+ nxi = task_get_nx_info(p);
21038+ if (!nxi)
bb20add7 21039+ return 0;
2380c486 21040+
bb20add7 21041+ seq_printf(m, "NCaps:\t%016llx\n",
4bf69007 21042+ (unsigned long long)nxi->nx_ncaps);
bb20add7 21043+ seq_printf(m, "NFlags:\t%016llx\n",
4bf69007
AM
21044+ (unsigned long long)nxi->nx_flags);
21045+
bb20add7 21046+ seq_printf(m, "V4Root[bcast]:\t" NIPQUAD_FMT "\n",
4bf69007 21047+ NIPQUAD(nxi->v4_bcast.s_addr));
bb20add7 21048+ seq_printf(m, "V4Root[lback]:\t" NIPQUAD_FMT "\n",
4bf69007
AM
21049+ NIPQUAD(nxi->v4_lback.s_addr));
21050+ if (!NX_IPV4(nxi))
21051+ goto skip_v4;
21052+ for (i = 0, v4a = &nxi->v4; v4a; i++, v4a = v4a->next)
bb20add7 21053+ seq_printf(m, "V4Root[%d]:\t" NXAV4_FMT "\n",
4bf69007
AM
21054+ i, NXAV4(v4a));
21055+skip_v4:
21056+#ifdef CONFIG_IPV6
21057+ if (!NX_IPV6(nxi))
21058+ goto skip_v6;
21059+ for (i = 0, v6a = &nxi->v6; v6a; i++, v6a = v6a->next)
bb20add7 21060+ seq_printf(m, "V6Root[%d]:\t" NXAV6_FMT "\n",
4bf69007
AM
21061+ i, NXAV6(v6a));
21062+skip_v6:
21063+#endif
21064+ put_nx_info(nxi);
bb20add7 21065+ return 0;
2380c486
JR
21066+}
21067+
09a55596
AM
21068diff -NurpP --minimal linux-4.9.135/kernel/vserver/sched.c linux-4.9.135-vs2.3.9.8/kernel/vserver/sched.c
21069--- linux-4.9.135/kernel/vserver/sched.c 1970-01-01 00:00:00.000000000 +0000
21070+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/sched.c 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
21071@@ -0,0 +1,83 @@
21072+/*
21073+ * linux/kernel/vserver/sched.c
21074+ *
21075+ * Virtual Server: Scheduler Support
21076+ *
cc23e853 21077+ * Copyright (C) 2004-2010 Herbert P?tzl
4bf69007
AM
21078+ *
21079+ * V0.01 adapted Sam Vilains version to 2.6.3
21080+ * V0.02 removed legacy interface
21081+ * V0.03 changed vcmds to vxi arg
21082+ * V0.04 removed older and legacy interfaces
21083+ * V0.05 removed scheduler code/commands
21084+ *
21085+ */
21086+
21087+#include <linux/vs_context.h>
21088+#include <linux/vs_sched.h>
21089+#include <linux/cpumask.h>
21090+#include <linux/vserver/sched_cmd.h>
2380c486 21091+
4bf69007
AM
21092+#include <asm/uaccess.h>
21093+
21094+
21095+void vx_update_sched_param(struct _vx_sched *sched,
21096+ struct _vx_sched_pc *sched_pc)
2380c486 21097+{
4bf69007 21098+ sched_pc->prio_bias = sched->prio_bias;
2380c486
JR
21099+}
21100+
4bf69007
AM
21101+static int do_set_prio_bias(struct vx_info *vxi, struct vcmd_prio_bias *data)
21102+{
21103+ int cpu;
2380c486 21104+
4bf69007
AM
21105+ if (data->prio_bias > MAX_PRIO_BIAS)
21106+ data->prio_bias = MAX_PRIO_BIAS;
21107+ if (data->prio_bias < MIN_PRIO_BIAS)
21108+ data->prio_bias = MIN_PRIO_BIAS;
2380c486 21109+
4bf69007 21110+ if (data->cpu_id != ~0) {
cc23e853 21111+ vxi->sched.update = *get_cpu_mask(data->cpu_id);
4bf69007
AM
21112+ cpumask_and(&vxi->sched.update, &vxi->sched.update,
21113+ cpu_online_mask);
21114+ } else
21115+ cpumask_copy(&vxi->sched.update, cpu_online_mask);
2380c486 21116+
cc23e853 21117+ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)vxi->sched.update)
4bf69007
AM
21118+ vx_update_sched_param(&vxi->sched,
21119+ &vx_per_cpu(vxi, sched_pc, cpu));
21120+ return 0;
21121+}
2380c486 21122+
4bf69007
AM
21123+int vc_set_prio_bias(struct vx_info *vxi, void __user *data)
21124+{
21125+ struct vcmd_prio_bias vc_data;
d337f35e 21126+
4bf69007
AM
21127+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21128+ return -EFAULT;
d337f35e 21129+
4bf69007
AM
21130+ return do_set_prio_bias(vxi, &vc_data);
21131+}
d337f35e 21132+
4bf69007
AM
21133+int vc_get_prio_bias(struct vx_info *vxi, void __user *data)
21134+{
21135+ struct vcmd_prio_bias vc_data;
21136+ struct _vx_sched_pc *pcd;
21137+ int cpu;
d337f35e 21138+
4bf69007
AM
21139+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21140+ return -EFAULT;
2380c486 21141+
4bf69007 21142+ cpu = vc_data.cpu_id;
d337f35e 21143+
4bf69007
AM
21144+ if (!cpu_possible(cpu))
21145+ return -EINVAL;
d337f35e 21146+
4bf69007
AM
21147+ pcd = &vx_per_cpu(vxi, sched_pc, cpu);
21148+ vc_data.prio_bias = pcd->prio_bias;
d337f35e 21149+
4bf69007
AM
21150+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21151+ return -EFAULT;
21152+ return 0;
21153+}
d337f35e 21154+
09a55596
AM
21155diff -NurpP --minimal linux-4.9.135/kernel/vserver/sched_init.h linux-4.9.135-vs2.3.9.8/kernel/vserver/sched_init.h
21156--- linux-4.9.135/kernel/vserver/sched_init.h 1970-01-01 00:00:00.000000000 +0000
21157+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/sched_init.h 2018-10-20 04:58:15.000000000 +0000
4bf69007 21158@@ -0,0 +1,27 @@
2380c486 21159+
4bf69007
AM
21160+static inline void vx_info_init_sched(struct _vx_sched *sched)
21161+{
21162+ /* scheduling; hard code starting values as constants */
21163+ sched->prio_bias = 0;
d337f35e
JR
21164+}
21165+
4bf69007
AM
21166+static inline
21167+void vx_info_init_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
e3afe727 21168+{
4bf69007
AM
21169+ sched_pc->prio_bias = 0;
21170+
21171+ sched_pc->user_ticks = 0;
21172+ sched_pc->sys_ticks = 0;
21173+ sched_pc->hold_ticks = 0;
e3afe727
AM
21174+}
21175+
4bf69007 21176+static inline void vx_info_exit_sched(struct _vx_sched *sched)
e3afe727 21177+{
4bf69007 21178+ return;
e3afe727
AM
21179+}
21180+
4bf69007
AM
21181+static inline
21182+void vx_info_exit_sched_pc(struct _vx_sched_pc *sched_pc, int cpu)
e3afe727 21183+{
4bf69007 21184+ return;
e3afe727 21185+}
09a55596
AM
21186diff -NurpP --minimal linux-4.9.135/kernel/vserver/sched_proc.h linux-4.9.135-vs2.3.9.8/kernel/vserver/sched_proc.h
21187--- linux-4.9.135/kernel/vserver/sched_proc.h 1970-01-01 00:00:00.000000000 +0000
21188+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/sched_proc.h 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
21189@@ -0,0 +1,32 @@
21190+#ifndef _VX_SCHED_PROC_H
21191+#define _VX_SCHED_PROC_H
e3afe727 21192+
4bf69007
AM
21193+
21194+static inline
21195+int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
e3afe727 21196+{
4bf69007
AM
21197+ int length = 0;
21198+
21199+ length += sprintf(buffer,
21200+ "PrioBias:\t%8d\n",
21201+ sched->prio_bias);
21202+ return length;
e3afe727
AM
21203+}
21204+
4bf69007
AM
21205+static inline
21206+int vx_info_proc_sched_pc(struct _vx_sched_pc *sched_pc,
21207+ char *buffer, int cpu)
e3afe727 21208+{
4bf69007 21209+ int length = 0;
e3afe727 21210+
4bf69007
AM
21211+ length += sprintf(buffer + length,
21212+ "cpu %d: %lld %lld %lld", cpu,
21213+ (unsigned long long)sched_pc->user_ticks,
21214+ (unsigned long long)sched_pc->sys_ticks,
21215+ (unsigned long long)sched_pc->hold_ticks);
21216+ length += sprintf(buffer + length,
21217+ " %d\n", sched_pc->prio_bias);
21218+ return length;
21219+}
93de0823 21220+
4bf69007 21221+#endif /* _VX_SCHED_PROC_H */
09a55596
AM
21222diff -NurpP --minimal linux-4.9.135/kernel/vserver/signal.c linux-4.9.135-vs2.3.9.8/kernel/vserver/signal.c
21223--- linux-4.9.135/kernel/vserver/signal.c 1970-01-01 00:00:00.000000000 +0000
21224+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/signal.c 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
21225@@ -0,0 +1,134 @@
21226+/*
21227+ * linux/kernel/vserver/signal.c
21228+ *
21229+ * Virtual Server: Signal Support
21230+ *
cc23e853 21231+ * Copyright (C) 2003-2007 Herbert P?tzl
4bf69007
AM
21232+ *
21233+ * V0.01 broken out from vcontext V0.05
21234+ * V0.02 changed vcmds to vxi arg
21235+ * V0.03 adjusted siginfo for kill
21236+ *
21237+ */
99a884b4 21238+
4bf69007 21239+#include <asm/uaccess.h>
93de0823 21240+
4bf69007
AM
21241+#include <linux/vs_context.h>
21242+#include <linux/vs_pid.h>
21243+#include <linux/vserver/signal_cmd.h>
d337f35e 21244+
d337f35e 21245+
4bf69007
AM
21246+int vx_info_kill(struct vx_info *vxi, int pid, int sig)
21247+{
21248+ int retval, count = 0;
21249+ struct task_struct *p;
21250+ struct siginfo *sip = SEND_SIG_PRIV;
d33d7b00 21251+
4bf69007
AM
21252+ retval = -ESRCH;
21253+ vxdprintk(VXD_CBIT(misc, 4),
21254+ "vx_info_kill(%p[#%d],%d,%d)*",
21255+ vxi, vxi->vx_id, pid, sig);
21256+ read_lock(&tasklist_lock);
21257+ switch (pid) {
21258+ case 0:
21259+ case -1:
21260+ for_each_process(p) {
21261+ int err = 0;
d337f35e 21262+
4bf69007
AM
21263+ if (vx_task_xid(p) != vxi->vx_id || p->pid <= 1 ||
21264+ (pid && vxi->vx_initpid == p->pid))
21265+ continue;
d337f35e 21266+
4bf69007
AM
21267+ err = group_send_sig_info(sig, sip, p);
21268+ ++count;
21269+ if (err != -EPERM)
21270+ retval = err;
21271+ }
21272+ break;
d337f35e 21273+
4bf69007
AM
21274+ case 1:
21275+ if (vxi->vx_initpid) {
21276+ pid = vxi->vx_initpid;
21277+ /* for now, only SIGINT to private init ... */
21278+ if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
21279+ /* ... as long as there are tasks left */
21280+ (atomic_read(&vxi->vx_tasks) > 1))
21281+ sig = SIGINT;
21282+ }
21283+ /* fallthrough */
21284+ default:
21285+ rcu_read_lock();
21286+ p = find_task_by_real_pid(pid);
21287+ rcu_read_unlock();
21288+ if (p) {
21289+ if (vx_task_xid(p) == vxi->vx_id)
21290+ retval = group_send_sig_info(sig, sip, p);
21291+ }
21292+ break;
21293+ }
21294+ read_unlock(&tasklist_lock);
21295+ vxdprintk(VXD_CBIT(misc, 4),
21296+ "vx_info_kill(%p[#%d],%d,%d,%ld) = %d",
21297+ vxi, vxi->vx_id, pid, sig, (long)sip, retval);
21298+ return retval;
21299+}
d337f35e 21300+
4bf69007 21301+int vc_ctx_kill(struct vx_info *vxi, void __user *data)
d337f35e 21302+{
4bf69007 21303+ struct vcmd_ctx_kill_v0 vc_data;
d337f35e 21304+
4bf69007
AM
21305+ if (copy_from_user(&vc_data, data, sizeof(vc_data)))
21306+ return -EFAULT;
d337f35e 21307+
4bf69007
AM
21308+ /* special check to allow guest shutdown */
21309+ if (!vx_info_flags(vxi, VXF_STATE_ADMIN, 0) &&
21310+ /* forbid killall pid=0 when init is present */
21311+ (((vc_data.pid < 1) && vxi->vx_initpid) ||
21312+ (vc_data.pid > 1)))
21313+ return -EACCES;
21314+
21315+ return vx_info_kill(vxi, vc_data.pid, vc_data.sig);
d337f35e
JR
21316+}
21317+
4bf69007
AM
21318+
21319+static int __wait_exit(struct vx_info *vxi)
d337f35e 21320+{
4bf69007
AM
21321+ DECLARE_WAITQUEUE(wait, current);
21322+ int ret = 0;
d337f35e 21323+
4bf69007
AM
21324+ add_wait_queue(&vxi->vx_wait, &wait);
21325+ set_current_state(TASK_INTERRUPTIBLE);
d337f35e 21326+
4bf69007
AM
21327+wait:
21328+ if (vx_info_state(vxi,
21329+ VXS_SHUTDOWN | VXS_HASHED | VXS_HELPER) == VXS_SHUTDOWN)
21330+ goto out;
21331+ if (signal_pending(current)) {
21332+ ret = -ERESTARTSYS;
21333+ goto out;
21334+ }
21335+ schedule();
21336+ goto wait;
21337+
21338+out:
21339+ set_current_state(TASK_RUNNING);
21340+ remove_wait_queue(&vxi->vx_wait, &wait);
21341+ return ret;
d337f35e
JR
21342+}
21343+
4a036bed 21344+
7b17263b 21345+
4bf69007 21346+int vc_wait_exit(struct vx_info *vxi, void __user *data)
7b17263b 21347+{
4bf69007
AM
21348+ struct vcmd_wait_exit_v0 vc_data;
21349+ int ret;
7b17263b 21350+
4bf69007
AM
21351+ ret = __wait_exit(vxi);
21352+ vc_data.reboot_cmd = vxi->reboot_cmd;
21353+ vc_data.exit_code = vxi->exit_code;
21354+
21355+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
21356+ ret = -EFAULT;
21357+ return ret;
7b17263b 21358+}
2380c486 21359+
09a55596
AM
21360diff -NurpP --minimal linux-4.9.135/kernel/vserver/space.c linux-4.9.135-vs2.3.9.8/kernel/vserver/space.c
21361--- linux-4.9.135/kernel/vserver/space.c 1970-01-01 00:00:00.000000000 +0000
21362+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/space.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 21363@@ -0,0 +1,437 @@
4bf69007
AM
21364+/*
21365+ * linux/kernel/vserver/space.c
21366+ *
21367+ * Virtual Server: Context Space Support
21368+ *
cc23e853 21369+ * Copyright (C) 2003-2010 Herbert P?tzl
4bf69007
AM
21370+ *
21371+ * V0.01 broken out from context.c 0.07
21372+ * V0.02 added task locking for namespace
21373+ * V0.03 broken out vx_enter_namespace
21374+ * V0.04 added *space support and commands
21375+ * V0.05 added credential support
21376+ *
21377+ */
21378+
21379+#include <linux/utsname.h>
21380+#include <linux/nsproxy.h>
21381+#include <linux/err.h>
21382+#include <linux/fs_struct.h>
21383+#include <linux/cred.h>
21384+#include <asm/uaccess.h>
d337f35e 21385+
d337f35e 21386+#include <linux/vs_context.h>
4bf69007
AM
21387+#include <linux/vserver/space.h>
21388+#include <linux/vserver/space_cmd.h>
2380c486 21389+
4bf69007
AM
21390+atomic_t vs_global_nsproxy = ATOMIC_INIT(0);
21391+atomic_t vs_global_fs = ATOMIC_INIT(0);
21392+atomic_t vs_global_mnt_ns = ATOMIC_INIT(0);
21393+atomic_t vs_global_uts_ns = ATOMIC_INIT(0);
cc23e853 21394+atomic_t vs_global_ipc_ns = ATOMIC_INIT(0);
4bf69007
AM
21395+atomic_t vs_global_user_ns = ATOMIC_INIT(0);
21396+atomic_t vs_global_pid_ns = ATOMIC_INIT(0);
d337f35e 21397+
2380c486 21398+
4bf69007 21399+/* namespace functions */
2380c486 21400+
4bf69007
AM
21401+#include <linux/mnt_namespace.h>
21402+#include <linux/user_namespace.h>
21403+#include <linux/pid_namespace.h>
21404+#include <linux/ipc_namespace.h>
21405+#include <net/net_namespace.h>
21406+#include "../fs/mount.h"
2380c486 21407+
2380c486 21408+
4bf69007
AM
21409+static const struct vcmd_space_mask_v1 space_mask_v0 = {
21410+ .mask = CLONE_FS |
21411+ CLONE_NEWNS |
21412+#ifdef CONFIG_UTS_NS
21413+ CLONE_NEWUTS |
21414+#endif
21415+#ifdef CONFIG_IPC_NS
21416+ CLONE_NEWIPC |
21417+#endif
21418+#ifdef CONFIG_USER_NS
21419+ CLONE_NEWUSER |
21420+#endif
21421+ 0
21422+};
2380c486 21423+
4bf69007
AM
21424+static const struct vcmd_space_mask_v1 space_mask = {
21425+ .mask = CLONE_FS |
21426+ CLONE_NEWNS |
21427+#ifdef CONFIG_UTS_NS
21428+ CLONE_NEWUTS |
21429+#endif
21430+#ifdef CONFIG_IPC_NS
21431+ CLONE_NEWIPC |
21432+#endif
21433+#ifdef CONFIG_USER_NS
21434+ CLONE_NEWUSER |
21435+#endif
21436+#ifdef CONFIG_PID_NS
21437+ CLONE_NEWPID |
21438+#endif
21439+#ifdef CONFIG_NET_NS
21440+ CLONE_NEWNET |
21441+#endif
21442+ 0
21443+};
2380c486 21444+
4bf69007
AM
21445+static const struct vcmd_space_mask_v1 default_space_mask = {
21446+ .mask = CLONE_FS |
21447+ CLONE_NEWNS |
21448+#ifdef CONFIG_UTS_NS
21449+ CLONE_NEWUTS |
21450+#endif
21451+#ifdef CONFIG_IPC_NS
21452+ CLONE_NEWIPC |
21453+#endif
21454+#ifdef CONFIG_USER_NS
bb20add7 21455+// CLONE_NEWUSER |
4bf69007
AM
21456+#endif
21457+#ifdef CONFIG_PID_NS
21458+// CLONE_NEWPID |
21459+#endif
21460+ 0
21461+};
2380c486 21462+
4bf69007
AM
21463+/*
21464+ * build a new nsproxy mix
21465+ * assumes that both proxies are 'const'
21466+ * does not touch nsproxy refcounts
21467+ * will hold a reference on the result.
21468+ */
7b17263b 21469+
4bf69007
AM
21470+struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
21471+ struct nsproxy *new_nsproxy, unsigned long mask)
21472+{
21473+ struct mnt_namespace *old_ns;
21474+ struct uts_namespace *old_uts;
21475+ struct ipc_namespace *old_ipc;
21476+#ifdef CONFIG_PID_NS
21477+ struct pid_namespace *old_pid;
21478+#endif
21479+#ifdef CONFIG_NET_NS
21480+ struct net *old_net;
21481+#endif
21482+ struct nsproxy *nsproxy;
d337f35e 21483+
4bf69007
AM
21484+ nsproxy = copy_nsproxy(old_nsproxy);
21485+ if (!nsproxy)
21486+ goto out;
bd0a9c15 21487+
4bf69007
AM
21488+ if (mask & CLONE_NEWNS) {
21489+ old_ns = nsproxy->mnt_ns;
21490+ nsproxy->mnt_ns = new_nsproxy->mnt_ns;
21491+ if (nsproxy->mnt_ns)
21492+ get_mnt_ns(nsproxy->mnt_ns);
21493+ } else
21494+ old_ns = NULL;
d337f35e 21495+
4bf69007
AM
21496+ if (mask & CLONE_NEWUTS) {
21497+ old_uts = nsproxy->uts_ns;
21498+ nsproxy->uts_ns = new_nsproxy->uts_ns;
21499+ if (nsproxy->uts_ns)
21500+ get_uts_ns(nsproxy->uts_ns);
21501+ } else
21502+ old_uts = NULL;
2380c486 21503+
4bf69007
AM
21504+ if (mask & CLONE_NEWIPC) {
21505+ old_ipc = nsproxy->ipc_ns;
21506+ nsproxy->ipc_ns = new_nsproxy->ipc_ns;
21507+ if (nsproxy->ipc_ns)
21508+ get_ipc_ns(nsproxy->ipc_ns);
21509+ } else
21510+ old_ipc = NULL;
ec22aa5c 21511+
4bf69007
AM
21512+#ifdef CONFIG_PID_NS
21513+ if (mask & CLONE_NEWPID) {
5f23d63e
AM
21514+ old_pid = nsproxy->pid_ns_for_children;
21515+ nsproxy->pid_ns_for_children = new_nsproxy->pid_ns_for_children;
21516+ if (nsproxy->pid_ns_for_children)
21517+ get_pid_ns(nsproxy->pid_ns_for_children);
4bf69007
AM
21518+ } else
21519+ old_pid = NULL;
21520+#endif
21521+#ifdef CONFIG_NET_NS
21522+ if (mask & CLONE_NEWNET) {
21523+ old_net = nsproxy->net_ns;
21524+ nsproxy->net_ns = new_nsproxy->net_ns;
21525+ if (nsproxy->net_ns)
21526+ get_net(nsproxy->net_ns);
21527+ } else
21528+ old_net = NULL;
21529+#endif
21530+ if (old_ns)
21531+ put_mnt_ns(old_ns);
21532+ if (old_uts)
21533+ put_uts_ns(old_uts);
21534+ if (old_ipc)
21535+ put_ipc_ns(old_ipc);
21536+#ifdef CONFIG_PID_NS
21537+ if (old_pid)
21538+ put_pid_ns(old_pid);
21539+#endif
21540+#ifdef CONFIG_NET_NS
21541+ if (old_net)
21542+ put_net(old_net);
21543+#endif
21544+out:
21545+ return nsproxy;
21546+}
2380c486 21547+
bd0a9c15 21548+
4bf69007
AM
21549+/*
21550+ * merge two nsproxy structs into a new one.
21551+ * will hold a reference on the result.
21552+ */
d337f35e 21553+
4bf69007
AM
21554+static inline
21555+struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
21556+ struct nsproxy *proxy, unsigned long mask)
21557+{
21558+ struct nsproxy null_proxy = { .mnt_ns = NULL };
2380c486 21559+
4bf69007
AM
21560+ if (!proxy)
21561+ return NULL;
d337f35e 21562+
4bf69007
AM
21563+ if (mask) {
21564+ /* vs_mix_nsproxy returns with reference */
21565+ return vs_mix_nsproxy(old ? old : &null_proxy,
21566+ proxy, mask);
21567+ }
21568+ get_nsproxy(proxy);
21569+ return proxy;
21570+}
2380c486 21571+
ec22aa5c 21572+
4bf69007
AM
21573+int vx_enter_space(struct vx_info *vxi, unsigned long mask, unsigned index)
21574+{
21575+ struct nsproxy *proxy, *proxy_cur, *proxy_new;
21576+ struct fs_struct *fs_cur, *fs = NULL;
21577+ struct _vx_space *space;
21578+ int ret, kill = 0;
2380c486 21579+
4bf69007
AM
21580+ vxdprintk(VXD_CBIT(space, 8), "vx_enter_space(%p[#%u],0x%08lx,%d)",
21581+ vxi, vxi->vx_id, mask, index);
2380c486 21582+
4bf69007
AM
21583+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
21584+ return -EACCES;
2380c486 21585+
4bf69007
AM
21586+ if (index >= VX_SPACES)
21587+ return -EINVAL;
2380c486 21588+
4bf69007
AM
21589+ space = &vxi->space[index];
21590+
21591+ if (!mask)
21592+ mask = space->vx_nsmask;
21593+
21594+ if ((mask & space->vx_nsmask) != mask)
21595+ return -EINVAL;
21596+
21597+ if (mask & CLONE_FS) {
21598+ fs = copy_fs_struct(space->vx_fs);
21599+ if (!fs)
21600+ return -ENOMEM;
2380c486 21601+ }
4bf69007
AM
21602+ proxy = space->vx_nsproxy;
21603+
21604+ vxdprintk(VXD_CBIT(space, 9),
21605+ "vx_enter_space(%p[#%u],0x%08lx,%d) -> (%p,%p)",
21606+ vxi, vxi->vx_id, mask, index, proxy, fs);
21607+
21608+ task_lock(current);
21609+ fs_cur = current->fs;
21610+
21611+ if (mask & CLONE_FS) {
21612+ spin_lock(&fs_cur->lock);
21613+ current->fs = fs;
21614+ kill = !--fs_cur->users;
21615+ spin_unlock(&fs_cur->lock);
ec22aa5c 21616+ }
ec22aa5c 21617+
4bf69007
AM
21618+ proxy_cur = current->nsproxy;
21619+ get_nsproxy(proxy_cur);
21620+ task_unlock(current);
21621+
21622+ if (kill)
21623+ free_fs_struct(fs_cur);
21624+
21625+ proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
21626+ if (IS_ERR(proxy_new)) {
21627+ ret = PTR_ERR(proxy_new);
21628+ goto out_put;
eab5a9a6 21629+ }
4bf69007
AM
21630+
21631+ proxy_new = xchg(&current->nsproxy, proxy_new);
21632+
21633+ if (mask & CLONE_NEWUSER) {
21634+ struct cred *cred;
21635+
21636+ vxdprintk(VXD_CBIT(space, 10),
21637+ "vx_enter_space(%p[#%u],%p) cred (%p,%p)",
21638+ vxi, vxi->vx_id, space->vx_cred,
21639+ current->real_cred, current->cred);
21640+
21641+ if (space->vx_cred) {
21642+ cred = __prepare_creds(space->vx_cred);
21643+ if (cred)
21644+ commit_creds(cred);
21645+ }
d337f35e 21646+ }
4bf69007
AM
21647+
21648+ ret = 0;
21649+
21650+ if (proxy_new)
21651+ put_nsproxy(proxy_new);
21652+out_put:
21653+ if (proxy_cur)
21654+ put_nsproxy(proxy_cur);
21655+ return ret;
21656+}
21657+
21658+
21659+int vx_set_space(struct vx_info *vxi, unsigned long mask, unsigned index)
21660+{
21661+ struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
21662+ struct fs_struct *fs_vxi, *fs = NULL;
21663+ struct _vx_space *space;
21664+ int ret, kill = 0;
21665+
21666+ vxdprintk(VXD_CBIT(space, 8), "vx_set_space(%p[#%u],0x%08lx,%d)",
21667+ vxi, vxi->vx_id, mask, index);
21668+
21669+ if ((mask & space_mask.mask) != mask)
21670+ return -EINVAL;
21671+
21672+ if (index >= VX_SPACES)
21673+ return -EINVAL;
21674+
21675+ space = &vxi->space[index];
21676+
21677+ proxy_vxi = space->vx_nsproxy;
21678+ fs_vxi = space->vx_fs;
21679+
21680+ if (mask & CLONE_FS) {
21681+ fs = copy_fs_struct(current->fs);
21682+ if (!fs)
21683+ return -ENOMEM;
2380c486 21684+ }
d337f35e 21685+
4bf69007 21686+ task_lock(current);
2ba6f0dd 21687+
4bf69007
AM
21688+ if (mask & CLONE_FS) {
21689+ spin_lock(&fs_vxi->lock);
21690+ space->vx_fs = fs;
21691+ kill = !--fs_vxi->users;
21692+ spin_unlock(&fs_vxi->lock);
21693+ }
2ba6f0dd 21694+
4bf69007
AM
21695+ proxy_cur = current->nsproxy;
21696+ get_nsproxy(proxy_cur);
21697+ task_unlock(current);
2ba6f0dd 21698+
4bf69007
AM
21699+ if (kill)
21700+ free_fs_struct(fs_vxi);
2ba6f0dd 21701+
4bf69007
AM
21702+ proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
21703+ if (IS_ERR(proxy_new)) {
21704+ ret = PTR_ERR(proxy_new);
21705+ goto out_put;
21706+ }
2ba6f0dd 21707+
4bf69007
AM
21708+ proxy_new = xchg(&space->vx_nsproxy, proxy_new);
21709+ space->vx_nsmask |= mask;
2ba6f0dd 21710+
4bf69007
AM
21711+ if (mask & CLONE_NEWUSER) {
21712+ struct cred *cred;
2ba6f0dd 21713+
4bf69007
AM
21714+ vxdprintk(VXD_CBIT(space, 10),
21715+ "vx_set_space(%p[#%u],%p) cred (%p,%p)",
21716+ vxi, vxi->vx_id, space->vx_cred,
21717+ current->real_cred, current->cred);
2ba6f0dd 21718+
4bf69007
AM
21719+ cred = prepare_creds();
21720+ cred = (struct cred *)xchg(&space->vx_cred, cred);
21721+ if (cred)
21722+ abort_creds(cred);
21723+ }
2ba6f0dd 21724+
4bf69007 21725+ ret = 0;
2ba6f0dd 21726+
4bf69007
AM
21727+ if (proxy_new)
21728+ put_nsproxy(proxy_new);
21729+out_put:
21730+ if (proxy_cur)
21731+ put_nsproxy(proxy_cur);
21732+ return ret;
21733+}
2ba6f0dd
AM
21734+
21735+
4bf69007
AM
21736+int vc_enter_space_v1(struct vx_info *vxi, void __user *data)
21737+{
21738+ struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
2ba6f0dd 21739+
4bf69007
AM
21740+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21741+ return -EFAULT;
2ba6f0dd 21742+
4bf69007
AM
21743+ return vx_enter_space(vxi, vc_data.mask, 0);
21744+}
2ba6f0dd 21745+
4bf69007
AM
21746+int vc_enter_space(struct vx_info *vxi, void __user *data)
21747+{
21748+ struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
2ba6f0dd 21749+
4bf69007
AM
21750+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21751+ return -EFAULT;
2ba6f0dd 21752+
4bf69007
AM
21753+ if (vc_data.index >= VX_SPACES)
21754+ return -EINVAL;
2ba6f0dd 21755+
4bf69007
AM
21756+ return vx_enter_space(vxi, vc_data.mask, vc_data.index);
21757+}
2ba6f0dd 21758+
4bf69007
AM
21759+int vc_set_space_v1(struct vx_info *vxi, void __user *data)
21760+{
21761+ struct vcmd_space_mask_v1 vc_data = { .mask = 0 };
2ba6f0dd 21762+
4bf69007
AM
21763+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21764+ return -EFAULT;
2ba6f0dd 21765+
4bf69007
AM
21766+ return vx_set_space(vxi, vc_data.mask, 0);
21767+}
2ba6f0dd 21768+
4bf69007
AM
21769+int vc_set_space(struct vx_info *vxi, void __user *data)
21770+{
21771+ struct vcmd_space_mask_v2 vc_data = { .mask = 0 };
2ba6f0dd 21772+
4bf69007
AM
21773+ if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
21774+ return -EFAULT;
2ba6f0dd 21775+
4bf69007
AM
21776+ if (vc_data.index >= VX_SPACES)
21777+ return -EINVAL;
2ba6f0dd 21778+
4bf69007
AM
21779+ return vx_set_space(vxi, vc_data.mask, vc_data.index);
21780+}
2ba6f0dd 21781+
4bf69007
AM
21782+int vc_get_space_mask(void __user *data, int type)
21783+{
21784+ const struct vcmd_space_mask_v1 *mask;
2ba6f0dd 21785+
4bf69007
AM
21786+ if (type == 0)
21787+ mask = &space_mask_v0;
21788+ else if (type == 1)
21789+ mask = &space_mask;
21790+ else
21791+ mask = &default_space_mask;
2ba6f0dd 21792+
4bf69007
AM
21793+ vxdprintk(VXD_CBIT(space, 10),
21794+ "vc_get_space_mask(%d) = %08llx", type, mask->mask);
2ba6f0dd 21795+
4bf69007
AM
21796+ if (copy_to_user(data, mask, sizeof(*mask)))
21797+ return -EFAULT;
21798+ return 0;
21799+}
2ba6f0dd 21800+
09a55596
AM
21801diff -NurpP --minimal linux-4.9.135/kernel/vserver/switch.c linux-4.9.135-vs2.3.9.8/kernel/vserver/switch.c
21802--- linux-4.9.135/kernel/vserver/switch.c 1970-01-01 00:00:00.000000000 +0000
21803+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/switch.c 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
21804@@ -0,0 +1,556 @@
21805+/*
21806+ * linux/kernel/vserver/switch.c
21807+ *
21808+ * Virtual Server: Syscall Switch
21809+ *
cc23e853 21810+ * Copyright (C) 2003-2011 Herbert P?tzl
4bf69007
AM
21811+ *
21812+ * V0.01 syscall switch
21813+ * V0.02 added signal to context
21814+ * V0.03 added rlimit functions
21815+ * V0.04 added iattr, task/xid functions
21816+ * V0.05 added debug/history stuff
21817+ * V0.06 added compat32 layer
21818+ * V0.07 vcmd args and perms
21819+ * V0.08 added status commands
21820+ * V0.09 added tag commands
21821+ * V0.10 added oom bias
21822+ * V0.11 added device commands
21823+ * V0.12 added warn mask
21824+ *
21825+ */
2ba6f0dd 21826+
4bf69007
AM
21827+#include <linux/vs_context.h>
21828+#include <linux/vs_network.h>
21829+#include <linux/vserver/switch.h>
2ba6f0dd 21830+
4bf69007 21831+#include "vci_config.h"
2ba6f0dd 21832+
2ba6f0dd 21833+
4bf69007
AM
21834+static inline
21835+int vc_get_version(uint32_t id)
21836+{
21837+ return VCI_VERSION;
21838+}
2ba6f0dd 21839+
4bf69007
AM
21840+static inline
21841+int vc_get_vci(uint32_t id)
21842+{
21843+ return vci_kernel_config();
21844+}
2ba6f0dd 21845+
4bf69007
AM
21846+#include <linux/vserver/context_cmd.h>
21847+#include <linux/vserver/cvirt_cmd.h>
21848+#include <linux/vserver/cacct_cmd.h>
21849+#include <linux/vserver/limit_cmd.h>
21850+#include <linux/vserver/network_cmd.h>
21851+#include <linux/vserver/sched_cmd.h>
21852+#include <linux/vserver/debug_cmd.h>
21853+#include <linux/vserver/inode_cmd.h>
21854+#include <linux/vserver/dlimit_cmd.h>
21855+#include <linux/vserver/signal_cmd.h>
21856+#include <linux/vserver/space_cmd.h>
21857+#include <linux/vserver/tag_cmd.h>
21858+#include <linux/vserver/device_cmd.h>
2ba6f0dd 21859+
4bf69007
AM
21860+#include <linux/vserver/inode.h>
21861+#include <linux/vserver/dlimit.h>
2ba6f0dd 21862+
2ba6f0dd 21863+
4bf69007
AM
21864+#ifdef CONFIG_COMPAT
21865+#define __COMPAT(name, id, data, compat) \
21866+ (compat) ? name ## _x32(id, data) : name(id, data)
21867+#define __COMPAT_NO_ID(name, data, compat) \
21868+ (compat) ? name ## _x32(data) : name(data)
21869+#else
21870+#define __COMPAT(name, id, data, compat) \
21871+ name(id, data)
21872+#define __COMPAT_NO_ID(name, data, compat) \
21873+ name(data)
21874+#endif
2ba6f0dd 21875+
2ba6f0dd 21876+
4bf69007
AM
21877+static inline
21878+long do_vcmd(uint32_t cmd, uint32_t id,
21879+ struct vx_info *vxi, struct nx_info *nxi,
21880+ void __user *data, int compat)
21881+{
21882+ switch (cmd) {
2ba6f0dd 21883+
4bf69007
AM
21884+ case VCMD_get_version:
21885+ return vc_get_version(id);
21886+ case VCMD_get_vci:
21887+ return vc_get_vci(id);
2ba6f0dd 21888+
4bf69007
AM
21889+ case VCMD_task_xid:
21890+ return vc_task_xid(id);
21891+ case VCMD_vx_info:
21892+ return vc_vx_info(vxi, data);
2ba6f0dd 21893+
4bf69007
AM
21894+ case VCMD_task_nid:
21895+ return vc_task_nid(id);
21896+ case VCMD_nx_info:
21897+ return vc_nx_info(nxi, data);
2ba6f0dd 21898+
4bf69007
AM
21899+ case VCMD_task_tag:
21900+ return vc_task_tag(id);
2ba6f0dd 21901+
4bf69007
AM
21902+ case VCMD_set_space_v1:
21903+ return vc_set_space_v1(vxi, data);
21904+ /* this is version 2 */
21905+ case VCMD_set_space:
21906+ return vc_set_space(vxi, data);
2ba6f0dd 21907+
4bf69007
AM
21908+ case VCMD_get_space_mask_v0:
21909+ return vc_get_space_mask(data, 0);
21910+ /* this is version 1 */
21911+ case VCMD_get_space_mask:
21912+ return vc_get_space_mask(data, 1);
2ba6f0dd 21913+
4bf69007
AM
21914+ case VCMD_get_space_default:
21915+ return vc_get_space_mask(data, -1);
2ba6f0dd 21916+
4bf69007
AM
21917+ case VCMD_set_umask:
21918+ return vc_set_umask(vxi, data);
2ba6f0dd 21919+
4bf69007
AM
21920+ case VCMD_get_umask:
21921+ return vc_get_umask(vxi, data);
2ba6f0dd 21922+
4bf69007
AM
21923+ case VCMD_set_wmask:
21924+ return vc_set_wmask(vxi, data);
2ba6f0dd 21925+
4bf69007
AM
21926+ case VCMD_get_wmask:
21927+ return vc_get_wmask(vxi, data);
21928+#ifdef CONFIG_IA32_EMULATION
21929+ case VCMD_get_rlimit:
21930+ return __COMPAT(vc_get_rlimit, vxi, data, compat);
21931+ case VCMD_set_rlimit:
21932+ return __COMPAT(vc_set_rlimit, vxi, data, compat);
21933+#else
21934+ case VCMD_get_rlimit:
21935+ return vc_get_rlimit(vxi, data);
21936+ case VCMD_set_rlimit:
21937+ return vc_set_rlimit(vxi, data);
21938+#endif
21939+ case VCMD_get_rlimit_mask:
21940+ return vc_get_rlimit_mask(id, data);
21941+ case VCMD_reset_hits:
21942+ return vc_reset_hits(vxi, data);
21943+ case VCMD_reset_minmax:
21944+ return vc_reset_minmax(vxi, data);
2ba6f0dd 21945+
4bf69007
AM
21946+ case VCMD_get_vhi_name:
21947+ return vc_get_vhi_name(vxi, data);
21948+ case VCMD_set_vhi_name:
21949+ return vc_set_vhi_name(vxi, data);
2ba6f0dd 21950+
4bf69007
AM
21951+ case VCMD_ctx_stat:
21952+ return vc_ctx_stat(vxi, data);
21953+ case VCMD_virt_stat:
21954+ return vc_virt_stat(vxi, data);
21955+ case VCMD_sock_stat:
21956+ return vc_sock_stat(vxi, data);
21957+ case VCMD_rlimit_stat:
21958+ return vc_rlimit_stat(vxi, data);
2ba6f0dd 21959+
4bf69007
AM
21960+ case VCMD_set_cflags:
21961+ return vc_set_cflags(vxi, data);
21962+ case VCMD_get_cflags:
21963+ return vc_get_cflags(vxi, data);
2ba6f0dd 21964+
4bf69007
AM
21965+ /* this is version 1 */
21966+ case VCMD_set_ccaps:
21967+ return vc_set_ccaps(vxi, data);
21968+ /* this is version 1 */
21969+ case VCMD_get_ccaps:
21970+ return vc_get_ccaps(vxi, data);
21971+ case VCMD_set_bcaps:
21972+ return vc_set_bcaps(vxi, data);
21973+ case VCMD_get_bcaps:
21974+ return vc_get_bcaps(vxi, data);
2ba6f0dd 21975+
4bf69007
AM
21976+ case VCMD_set_badness:
21977+ return vc_set_badness(vxi, data);
21978+ case VCMD_get_badness:
21979+ return vc_get_badness(vxi, data);
2ba6f0dd 21980+
4bf69007
AM
21981+ case VCMD_set_nflags:
21982+ return vc_set_nflags(nxi, data);
21983+ case VCMD_get_nflags:
21984+ return vc_get_nflags(nxi, data);
2ba6f0dd 21985+
4bf69007
AM
21986+ case VCMD_set_ncaps:
21987+ return vc_set_ncaps(nxi, data);
21988+ case VCMD_get_ncaps:
21989+ return vc_get_ncaps(nxi, data);
2ba6f0dd 21990+
4bf69007
AM
21991+ case VCMD_set_prio_bias:
21992+ return vc_set_prio_bias(vxi, data);
21993+ case VCMD_get_prio_bias:
21994+ return vc_get_prio_bias(vxi, data);
21995+ case VCMD_add_dlimit:
21996+ return __COMPAT(vc_add_dlimit, id, data, compat);
21997+ case VCMD_rem_dlimit:
21998+ return __COMPAT(vc_rem_dlimit, id, data, compat);
21999+ case VCMD_set_dlimit:
22000+ return __COMPAT(vc_set_dlimit, id, data, compat);
22001+ case VCMD_get_dlimit:
22002+ return __COMPAT(vc_get_dlimit, id, data, compat);
2ba6f0dd 22003+
4bf69007
AM
22004+ case VCMD_ctx_kill:
22005+ return vc_ctx_kill(vxi, data);
2ba6f0dd 22006+
4bf69007
AM
22007+ case VCMD_wait_exit:
22008+ return vc_wait_exit(vxi, data);
2ba6f0dd 22009+
4bf69007
AM
22010+ case VCMD_get_iattr:
22011+ return __COMPAT_NO_ID(vc_get_iattr, data, compat);
22012+ case VCMD_set_iattr:
22013+ return __COMPAT_NO_ID(vc_set_iattr, data, compat);
2ba6f0dd 22014+
4bf69007
AM
22015+ case VCMD_fget_iattr:
22016+ return vc_fget_iattr(id, data);
22017+ case VCMD_fset_iattr:
22018+ return vc_fset_iattr(id, data);
2ba6f0dd 22019+
4bf69007
AM
22020+ case VCMD_enter_space_v0:
22021+ return vc_enter_space_v1(vxi, NULL);
22022+ case VCMD_enter_space_v1:
22023+ return vc_enter_space_v1(vxi, data);
22024+ /* this is version 2 */
22025+ case VCMD_enter_space:
22026+ return vc_enter_space(vxi, data);
2ba6f0dd 22027+
4bf69007
AM
22028+ case VCMD_ctx_create_v0:
22029+ return vc_ctx_create(id, NULL);
22030+ case VCMD_ctx_create:
22031+ return vc_ctx_create(id, data);
22032+ case VCMD_ctx_migrate_v0:
22033+ return vc_ctx_migrate(vxi, NULL);
22034+ case VCMD_ctx_migrate:
22035+ return vc_ctx_migrate(vxi, data);
2ba6f0dd 22036+
4bf69007
AM
22037+ case VCMD_net_create_v0:
22038+ return vc_net_create(id, NULL);
22039+ case VCMD_net_create:
22040+ return vc_net_create(id, data);
22041+ case VCMD_net_migrate:
22042+ return vc_net_migrate(nxi, data);
2ba6f0dd 22043+
4bf69007
AM
22044+ case VCMD_tag_migrate:
22045+ return vc_tag_migrate(id);
2ba6f0dd 22046+
4bf69007
AM
22047+ case VCMD_net_add:
22048+ return vc_net_add(nxi, data);
22049+ case VCMD_net_remove:
22050+ return vc_net_remove(nxi, data);
2ba6f0dd 22051+
4bf69007
AM
22052+ case VCMD_net_add_ipv4_v1:
22053+ return vc_net_add_ipv4_v1(nxi, data);
22054+ /* this is version 2 */
22055+ case VCMD_net_add_ipv4:
22056+ return vc_net_add_ipv4(nxi, data);
2ba6f0dd 22057+
4bf69007
AM
22058+ case VCMD_net_rem_ipv4_v1:
22059+ return vc_net_rem_ipv4_v1(nxi, data);
22060+ /* this is version 2 */
22061+ case VCMD_net_rem_ipv4:
22062+ return vc_net_rem_ipv4(nxi, data);
22063+#ifdef CONFIG_IPV6
22064+ case VCMD_net_add_ipv6:
22065+ return vc_net_add_ipv6(nxi, data);
22066+ case VCMD_net_remove_ipv6:
22067+ return vc_net_remove_ipv6(nxi, data);
22068+#endif
22069+/* case VCMD_add_match_ipv4:
22070+ return vc_add_match_ipv4(nxi, data);
22071+ case VCMD_get_match_ipv4:
22072+ return vc_get_match_ipv4(nxi, data);
22073+#ifdef CONFIG_IPV6
22074+ case VCMD_add_match_ipv6:
22075+ return vc_add_match_ipv6(nxi, data);
22076+ case VCMD_get_match_ipv6:
22077+ return vc_get_match_ipv6(nxi, data);
22078+#endif */
2ba6f0dd 22079+
4bf69007
AM
22080+#ifdef CONFIG_VSERVER_DEVICE
22081+ case VCMD_set_mapping:
22082+ return __COMPAT(vc_set_mapping, vxi, data, compat);
22083+ case VCMD_unset_mapping:
22084+ return __COMPAT(vc_unset_mapping, vxi, data, compat);
22085+#endif
22086+#ifdef CONFIG_VSERVER_HISTORY
22087+ case VCMD_dump_history:
22088+ return vc_dump_history(id);
22089+ case VCMD_read_history:
22090+ return __COMPAT(vc_read_history, id, data, compat);
22091+#endif
22092+ default:
22093+ vxwprintk_task(1, "unimplemented VCMD_%02d_%d[%d]",
22094+ VC_CATEGORY(cmd), VC_COMMAND(cmd), VC_VERSION(cmd));
22095+ }
22096+ return -ENOSYS;
22097+}
2ba6f0dd 22098+
2ba6f0dd 22099+
4bf69007
AM
22100+#define __VCMD(vcmd, _perm, _args, _flags) \
22101+ case VCMD_ ## vcmd: perm = _perm; \
22102+ args = _args; flags = _flags; break
2ba6f0dd 22103+
2ba6f0dd 22104+
4bf69007
AM
22105+#define VCA_NONE 0x00
22106+#define VCA_VXI 0x01
22107+#define VCA_NXI 0x02
2ba6f0dd 22108+
4bf69007
AM
22109+#define VCF_NONE 0x00
22110+#define VCF_INFO 0x01
22111+#define VCF_ADMIN 0x02
22112+#define VCF_ARES 0x06 /* includes admin */
22113+#define VCF_SETUP 0x08
2ba6f0dd 22114+
4bf69007 22115+#define VCF_ZIDOK 0x10 /* zero id okay */
2ba6f0dd 22116+
2ba6f0dd
AM
22117+
22118+static inline
4bf69007 22119+long do_vserver(uint32_t cmd, uint32_t id, void __user *data, int compat)
2ba6f0dd 22120+{
4bf69007
AM
22121+ long ret;
22122+ int permit = -1, state = 0;
22123+ int perm = -1, args = 0, flags = 0;
22124+ struct vx_info *vxi = NULL;
22125+ struct nx_info *nxi = NULL;
2ba6f0dd 22126+
4bf69007
AM
22127+ switch (cmd) {
22128+ /* unpriviledged commands */
22129+ __VCMD(get_version, 0, VCA_NONE, 0);
22130+ __VCMD(get_vci, 0, VCA_NONE, 0);
22131+ __VCMD(get_rlimit_mask, 0, VCA_NONE, 0);
22132+ __VCMD(get_space_mask_v0,0, VCA_NONE, 0);
22133+ __VCMD(get_space_mask, 0, VCA_NONE, 0);
22134+ __VCMD(get_space_default,0, VCA_NONE, 0);
2ba6f0dd 22135+
4bf69007
AM
22136+ /* info commands */
22137+ __VCMD(task_xid, 2, VCA_NONE, 0);
22138+ __VCMD(reset_hits, 2, VCA_VXI, 0);
22139+ __VCMD(reset_minmax, 2, VCA_VXI, 0);
22140+ __VCMD(vx_info, 3, VCA_VXI, VCF_INFO);
22141+ __VCMD(get_bcaps, 3, VCA_VXI, VCF_INFO);
22142+ __VCMD(get_ccaps, 3, VCA_VXI, VCF_INFO);
22143+ __VCMD(get_cflags, 3, VCA_VXI, VCF_INFO);
22144+ __VCMD(get_umask, 3, VCA_VXI, VCF_INFO);
22145+ __VCMD(get_wmask, 3, VCA_VXI, VCF_INFO);
22146+ __VCMD(get_badness, 3, VCA_VXI, VCF_INFO);
22147+ __VCMD(get_vhi_name, 3, VCA_VXI, VCF_INFO);
22148+ __VCMD(get_rlimit, 3, VCA_VXI, VCF_INFO);
2ba6f0dd 22149+
4bf69007
AM
22150+ __VCMD(ctx_stat, 3, VCA_VXI, VCF_INFO);
22151+ __VCMD(virt_stat, 3, VCA_VXI, VCF_INFO);
22152+ __VCMD(sock_stat, 3, VCA_VXI, VCF_INFO);
22153+ __VCMD(rlimit_stat, 3, VCA_VXI, VCF_INFO);
2ba6f0dd 22154+
4bf69007
AM
22155+ __VCMD(task_nid, 2, VCA_NONE, 0);
22156+ __VCMD(nx_info, 3, VCA_NXI, VCF_INFO);
22157+ __VCMD(get_ncaps, 3, VCA_NXI, VCF_INFO);
22158+ __VCMD(get_nflags, 3, VCA_NXI, VCF_INFO);
2ba6f0dd 22159+
4bf69007 22160+ __VCMD(task_tag, 2, VCA_NONE, 0);
2ba6f0dd 22161+
4bf69007
AM
22162+ __VCMD(get_iattr, 2, VCA_NONE, 0);
22163+ __VCMD(fget_iattr, 2, VCA_NONE, 0);
22164+ __VCMD(get_dlimit, 3, VCA_NONE, VCF_INFO);
22165+ __VCMD(get_prio_bias, 3, VCA_VXI, VCF_INFO);
2ba6f0dd 22166+
4bf69007
AM
22167+ /* lower admin commands */
22168+ __VCMD(wait_exit, 4, VCA_VXI, VCF_INFO);
22169+ __VCMD(ctx_create_v0, 5, VCA_NONE, 0);
22170+ __VCMD(ctx_create, 5, VCA_NONE, 0);
22171+ __VCMD(ctx_migrate_v0, 5, VCA_VXI, VCF_ADMIN);
22172+ __VCMD(ctx_migrate, 5, VCA_VXI, VCF_ADMIN);
22173+ __VCMD(enter_space_v0, 5, VCA_VXI, VCF_ADMIN);
22174+ __VCMD(enter_space_v1, 5, VCA_VXI, VCF_ADMIN);
22175+ __VCMD(enter_space, 5, VCA_VXI, VCF_ADMIN);
2ba6f0dd 22176+
4bf69007
AM
22177+ __VCMD(net_create_v0, 5, VCA_NONE, 0);
22178+ __VCMD(net_create, 5, VCA_NONE, 0);
22179+ __VCMD(net_migrate, 5, VCA_NXI, VCF_ADMIN);
2ba6f0dd 22180+
4bf69007 22181+ __VCMD(tag_migrate, 5, VCA_NONE, VCF_ADMIN);
2ba6f0dd 22182+
4bf69007
AM
22183+ /* higher admin commands */
22184+ __VCMD(ctx_kill, 6, VCA_VXI, VCF_ARES);
22185+ __VCMD(set_space_v1, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22186+ __VCMD(set_space, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
2ba6f0dd 22187+
4bf69007
AM
22188+ __VCMD(set_ccaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22189+ __VCMD(set_bcaps, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22190+ __VCMD(set_cflags, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22191+ __VCMD(set_umask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22192+ __VCMD(set_wmask, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22193+ __VCMD(set_badness, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
2ba6f0dd 22194+
4bf69007
AM
22195+ __VCMD(set_vhi_name, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22196+ __VCMD(set_rlimit, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
22197+ __VCMD(set_prio_bias, 7, VCA_VXI, VCF_ARES | VCF_SETUP);
2ba6f0dd 22198+
4bf69007
AM
22199+ __VCMD(set_ncaps, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
22200+ __VCMD(set_nflags, 7, VCA_NXI, VCF_ARES | VCF_SETUP);
22201+ __VCMD(net_add, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22202+ __VCMD(net_remove, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22203+ __VCMD(net_add_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22204+ __VCMD(net_rem_ipv4_v1, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22205+ __VCMD(net_add_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22206+ __VCMD(net_rem_ipv4, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22207+#ifdef CONFIG_IPV6
22208+ __VCMD(net_add_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22209+ __VCMD(net_remove_ipv6, 8, VCA_NXI, VCF_ARES | VCF_SETUP);
22210+#endif
22211+ __VCMD(set_iattr, 7, VCA_NONE, 0);
22212+ __VCMD(fset_iattr, 7, VCA_NONE, 0);
22213+ __VCMD(set_dlimit, 7, VCA_NONE, VCF_ARES);
22214+ __VCMD(add_dlimit, 8, VCA_NONE, VCF_ARES);
22215+ __VCMD(rem_dlimit, 8, VCA_NONE, VCF_ARES);
2ba6f0dd 22216+
4bf69007
AM
22217+#ifdef CONFIG_VSERVER_DEVICE
22218+ __VCMD(set_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
22219+ __VCMD(unset_mapping, 8, VCA_VXI, VCF_ARES|VCF_ZIDOK);
22220+#endif
22221+ /* debug level admin commands */
22222+#ifdef CONFIG_VSERVER_HISTORY
22223+ __VCMD(dump_history, 9, VCA_NONE, 0);
22224+ __VCMD(read_history, 9, VCA_NONE, 0);
22225+#endif
2ba6f0dd 22226+
4bf69007
AM
22227+ default:
22228+ perm = -1;
22229+ }
2ba6f0dd 22230+
4bf69007
AM
22231+ vxdprintk(VXD_CBIT(switch, 0),
22232+ "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]",
22233+ VC_CATEGORY(cmd), VC_COMMAND(cmd),
22234+ VC_VERSION(cmd), id, data, compat,
22235+ perm, args, flags);
2ba6f0dd 22236+
4bf69007
AM
22237+ ret = -ENOSYS;
22238+ if (perm < 0)
22239+ goto out;
2ba6f0dd 22240+
4bf69007
AM
22241+ state = 1;
22242+ if (!capable(CAP_CONTEXT))
22243+ goto out;
2ba6f0dd 22244+
4bf69007
AM
22245+ state = 2;
22246+ /* moved here from the individual commands */
22247+ ret = -EPERM;
22248+ if ((perm > 1) && !capable(CAP_SYS_ADMIN))
22249+ goto out;
2ba6f0dd 22250+
4bf69007
AM
22251+ state = 3;
22252+ /* vcmd involves resource management */
22253+ ret = -EPERM;
22254+ if ((flags & VCF_ARES) && !capable(CAP_SYS_RESOURCE))
22255+ goto out;
2ba6f0dd 22256+
4bf69007
AM
22257+ state = 4;
22258+ /* various legacy exceptions */
22259+ switch (cmd) {
22260+ /* will go away when spectator is a cap */
22261+ case VCMD_ctx_migrate_v0:
22262+ case VCMD_ctx_migrate:
22263+ if (id == 1) {
22264+ current->xid = 1;
22265+ ret = 1;
22266+ goto out;
22267+ }
22268+ break;
2ba6f0dd 22269+
4bf69007
AM
22270+ /* will go away when spectator is a cap */
22271+ case VCMD_net_migrate:
22272+ if (id == 1) {
22273+ current->nid = 1;
22274+ ret = 1;
22275+ goto out;
22276+ }
22277+ break;
22278+ }
2ba6f0dd 22279+
4bf69007
AM
22280+ /* vcmds are fine by default */
22281+ permit = 1;
2ba6f0dd 22282+
4bf69007
AM
22283+ /* admin type vcmds require admin ... */
22284+ if (flags & VCF_ADMIN)
22285+ permit = vx_check(0, VS_ADMIN) ? 1 : 0;
2ba6f0dd 22286+
4bf69007
AM
22287+ /* ... but setup type vcmds override that */
22288+ if (!permit && (flags & VCF_SETUP))
22289+ permit = vx_flags(VXF_STATE_SETUP, 0) ? 2 : 0;
2ba6f0dd 22290+
4bf69007
AM
22291+ state = 5;
22292+ ret = -EPERM;
22293+ if (!permit)
22294+ goto out;
2ba6f0dd 22295+
4bf69007
AM
22296+ state = 6;
22297+ if (!id && (flags & VCF_ZIDOK))
22298+ goto skip_id;
2ba6f0dd 22299+
4bf69007
AM
22300+ ret = -ESRCH;
22301+ if (args & VCA_VXI) {
22302+ vxi = lookup_vx_info(id);
22303+ if (!vxi)
22304+ goto out;
2ba6f0dd 22305+
4bf69007
AM
22306+ if ((flags & VCF_ADMIN) &&
22307+ /* special case kill for shutdown */
22308+ (cmd != VCMD_ctx_kill) &&
22309+ /* can context be administrated? */
22310+ !vx_info_flags(vxi, VXF_STATE_ADMIN, 0)) {
22311+ ret = -EACCES;
22312+ goto out_vxi;
22313+ }
22314+ }
22315+ state = 7;
22316+ if (args & VCA_NXI) {
22317+ nxi = lookup_nx_info(id);
22318+ if (!nxi)
22319+ goto out_vxi;
2ba6f0dd 22320+
4bf69007
AM
22321+ if ((flags & VCF_ADMIN) &&
22322+ /* can context be administrated? */
22323+ !nx_info_flags(nxi, NXF_STATE_ADMIN, 0)) {
22324+ ret = -EACCES;
22325+ goto out_nxi;
22326+ }
22327+ }
22328+skip_id:
22329+ state = 8;
22330+ ret = do_vcmd(cmd, id, vxi, nxi, data, compat);
2ba6f0dd 22331+
4bf69007
AM
22332+out_nxi:
22333+ if ((args & VCA_NXI) && nxi)
22334+ put_nx_info(nxi);
22335+out_vxi:
22336+ if ((args & VCA_VXI) && vxi)
22337+ put_vx_info(vxi);
22338+out:
22339+ vxdprintk(VXD_CBIT(switch, 1),
22340+ "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]",
22341+ VC_CATEGORY(cmd), VC_COMMAND(cmd),
22342+ VC_VERSION(cmd), ret, ret, state, permit);
22343+ return ret;
22344+}
2ba6f0dd 22345+
4bf69007
AM
22346+asmlinkage long
22347+sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
22348+{
22349+ return do_vserver(cmd, id, data, 0);
22350+}
2ba6f0dd 22351+
4bf69007 22352+#ifdef CONFIG_COMPAT
2ba6f0dd 22353+
4bf69007
AM
22354+asmlinkage long
22355+sys32_vserver(uint32_t cmd, uint32_t id, void __user *data)
22356+{
22357+ return do_vserver(cmd, id, data, 1);
22358+}
2ba6f0dd 22359+
4bf69007 22360+#endif /* CONFIG_COMPAT */
09a55596
AM
22361diff -NurpP --minimal linux-4.9.135/kernel/vserver/sysctl.c linux-4.9.135-vs2.3.9.8/kernel/vserver/sysctl.c
22362--- linux-4.9.135/kernel/vserver/sysctl.c 1970-01-01 00:00:00.000000000 +0000
22363+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/sysctl.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 22364@@ -0,0 +1,249 @@
4bf69007
AM
22365+/*
22366+ * kernel/vserver/sysctl.c
22367+ *
22368+ * Virtual Context Support
22369+ *
cc23e853 22370+ * Copyright (C) 2004-2007 Herbert P?tzl
4bf69007
AM
22371+ *
22372+ * V0.01 basic structure
22373+ *
22374+ */
2ba6f0dd 22375+
4bf69007
AM
22376+#include <linux/module.h>
22377+#include <linux/ctype.h>
22378+#include <linux/sysctl.h>
22379+#include <linux/parser.h>
cc23e853
AM
22380+#include <linux/utsname.h>
22381+
4bf69007 22382+#include <asm/uaccess.h>
cc23e853 22383+#include <asm/sections.h>
2ba6f0dd 22384+
4bf69007
AM
22385+enum {
22386+ CTL_DEBUG_ERROR = 0,
22387+ CTL_DEBUG_SWITCH = 1,
22388+ CTL_DEBUG_XID,
22389+ CTL_DEBUG_NID,
22390+ CTL_DEBUG_TAG,
22391+ CTL_DEBUG_NET,
22392+ CTL_DEBUG_LIMIT,
22393+ CTL_DEBUG_CRES,
22394+ CTL_DEBUG_DLIM,
22395+ CTL_DEBUG_QUOTA,
22396+ CTL_DEBUG_CVIRT,
22397+ CTL_DEBUG_SPACE,
22398+ CTL_DEBUG_PERM,
22399+ CTL_DEBUG_MISC,
2ba6f0dd
AM
22400+};
22401+
2ba6f0dd 22402+
4bf69007
AM
22403+unsigned int vs_debug_switch = 0;
22404+unsigned int vs_debug_xid = 0;
22405+unsigned int vs_debug_nid = 0;
22406+unsigned int vs_debug_tag = 0;
22407+unsigned int vs_debug_net = 0;
22408+unsigned int vs_debug_limit = 0;
22409+unsigned int vs_debug_cres = 0;
22410+unsigned int vs_debug_dlim = 0;
22411+unsigned int vs_debug_quota = 0;
22412+unsigned int vs_debug_cvirt = 0;
22413+unsigned int vs_debug_space = 0;
22414+unsigned int vs_debug_perm = 0;
22415+unsigned int vs_debug_misc = 0;
2ba6f0dd 22416+
2ba6f0dd 22417+
4bf69007 22418+static struct ctl_table_header *vserver_table_header;
bb20add7 22419+static struct ctl_table vserver_root_table[];
4bf69007 22420+
2ba6f0dd 22421+
4bf69007
AM
22422+void vserver_register_sysctl(void)
22423+{
22424+ if (!vserver_table_header) {
22425+ vserver_table_header = register_sysctl_table(vserver_root_table);
22426+ }
2ba6f0dd 22427+
4bf69007 22428+}
2ba6f0dd 22429+
4bf69007
AM
22430+void vserver_unregister_sysctl(void)
22431+{
22432+ if (vserver_table_header) {
22433+ unregister_sysctl_table(vserver_table_header);
22434+ vserver_table_header = NULL;
22435+ }
22436+}
2ba6f0dd 22437+
bb20add7 22438+static int proc_dodebug(struct ctl_table *table, int write,
4bf69007
AM
22439+ void __user *buffer, size_t *lenp, loff_t *ppos)
22440+{
22441+ char tmpbuf[20], *p, c;
22442+ unsigned int value;
22443+ size_t left, len;
2ba6f0dd 22444+
4bf69007
AM
22445+ if ((*ppos && !write) || !*lenp) {
22446+ *lenp = 0;
22447+ return 0;
22448+ }
2ba6f0dd 22449+
4bf69007 22450+ left = *lenp;
2ba6f0dd 22451+
4bf69007
AM
22452+ if (write) {
22453+ if (!access_ok(VERIFY_READ, buffer, left))
22454+ return -EFAULT;
22455+ p = (char *)buffer;
22456+ while (left && __get_user(c, p) >= 0 && isspace(c))
22457+ left--, p++;
22458+ if (!left)
22459+ goto done;
2ba6f0dd 22460+
4bf69007
AM
22461+ if (left > sizeof(tmpbuf) - 1)
22462+ return -EINVAL;
22463+ if (copy_from_user(tmpbuf, p, left))
22464+ return -EFAULT;
22465+ tmpbuf[left] = '\0';
2ba6f0dd 22466+
4bf69007
AM
22467+ for (p = tmpbuf, value = 0; '0' <= *p && *p <= '9'; p++, left--)
22468+ value = 10 * value + (*p - '0');
22469+ if (*p && !isspace(*p))
22470+ return -EINVAL;
22471+ while (left && isspace(*p))
22472+ left--, p++;
22473+ *(unsigned int *)table->data = value;
22474+ } else {
22475+ if (!access_ok(VERIFY_WRITE, buffer, left))
22476+ return -EFAULT;
22477+ len = sprintf(tmpbuf, "%d", *(unsigned int *)table->data);
22478+ if (len > left)
22479+ len = left;
22480+ if (__copy_to_user(buffer, tmpbuf, len))
22481+ return -EFAULT;
22482+ if ((left -= len) > 0) {
22483+ if (put_user('\n', (char *)buffer + len))
22484+ return -EFAULT;
22485+ left--;
22486+ }
22487+ }
2ba6f0dd 22488+
4bf69007
AM
22489+done:
22490+ *lenp -= left;
22491+ *ppos += *lenp;
22492+ return 0;
22493+}
2ba6f0dd 22494+
4bf69007 22495+static int zero;
2ba6f0dd 22496+
4bf69007
AM
22497+#define CTL_ENTRY(ctl, name) \
22498+ { \
22499+ .procname = #name, \
22500+ .data = &vs_ ## name, \
22501+ .maxlen = sizeof(int), \
22502+ .mode = 0644, \
22503+ .proc_handler = &proc_dodebug, \
22504+ .extra1 = &zero, \
22505+ .extra2 = &zero, \
22506+ }
2ba6f0dd 22507+
bb20add7 22508+static struct ctl_table vserver_debug_table[] = {
4bf69007
AM
22509+ CTL_ENTRY(CTL_DEBUG_SWITCH, debug_switch),
22510+ CTL_ENTRY(CTL_DEBUG_XID, debug_xid),
22511+ CTL_ENTRY(CTL_DEBUG_NID, debug_nid),
22512+ CTL_ENTRY(CTL_DEBUG_TAG, debug_tag),
22513+ CTL_ENTRY(CTL_DEBUG_NET, debug_net),
22514+ CTL_ENTRY(CTL_DEBUG_LIMIT, debug_limit),
22515+ CTL_ENTRY(CTL_DEBUG_CRES, debug_cres),
22516+ CTL_ENTRY(CTL_DEBUG_DLIM, debug_dlim),
22517+ CTL_ENTRY(CTL_DEBUG_QUOTA, debug_quota),
22518+ CTL_ENTRY(CTL_DEBUG_CVIRT, debug_cvirt),
22519+ CTL_ENTRY(CTL_DEBUG_SPACE, debug_space),
22520+ CTL_ENTRY(CTL_DEBUG_PERM, debug_perm),
22521+ CTL_ENTRY(CTL_DEBUG_MISC, debug_misc),
22522+ { 0 }
22523+};
2ba6f0dd 22524+
bb20add7 22525+static struct ctl_table vserver_root_table[] = {
4bf69007
AM
22526+ {
22527+ .procname = "vserver",
22528+ .mode = 0555,
22529+ .child = vserver_debug_table
22530+ },
22531+ { 0 }
22532+};
2ba6f0dd 22533+
2ba6f0dd 22534+
4bf69007
AM
22535+static match_table_t tokens = {
22536+ { CTL_DEBUG_SWITCH, "switch=%x" },
22537+ { CTL_DEBUG_XID, "xid=%x" },
22538+ { CTL_DEBUG_NID, "nid=%x" },
22539+ { CTL_DEBUG_TAG, "tag=%x" },
22540+ { CTL_DEBUG_NET, "net=%x" },
22541+ { CTL_DEBUG_LIMIT, "limit=%x" },
22542+ { CTL_DEBUG_CRES, "cres=%x" },
22543+ { CTL_DEBUG_DLIM, "dlim=%x" },
22544+ { CTL_DEBUG_QUOTA, "quota=%x" },
22545+ { CTL_DEBUG_CVIRT, "cvirt=%x" },
22546+ { CTL_DEBUG_SPACE, "space=%x" },
22547+ { CTL_DEBUG_PERM, "perm=%x" },
22548+ { CTL_DEBUG_MISC, "misc=%x" },
22549+ { CTL_DEBUG_ERROR, NULL }
22550+};
2ba6f0dd 22551+
4bf69007
AM
22552+#define HANDLE_CASE(id, name, val) \
22553+ case CTL_DEBUG_ ## id: \
22554+ vs_debug_ ## name = val; \
22555+ printk("vs_debug_" #name "=0x%x\n", val); \
22556+ break
2ba6f0dd 22557+
2ba6f0dd 22558+
4bf69007
AM
22559+static int __init vs_debug_setup(char *str)
22560+{
22561+ char *p;
22562+ int token;
2ba6f0dd 22563+
4bf69007
AM
22564+ printk("vs_debug_setup(%s)\n", str);
22565+ while ((p = strsep(&str, ",")) != NULL) {
22566+ substring_t args[MAX_OPT_ARGS];
22567+ unsigned int value;
2ba6f0dd 22568+
4bf69007
AM
22569+ if (!*p)
22570+ continue;
2ba6f0dd 22571+
4bf69007
AM
22572+ token = match_token(p, tokens, args);
22573+ value = (token > 0) ? simple_strtoul(args[0].from, NULL, 0) : 0;
2ba6f0dd 22574+
4bf69007
AM
22575+ switch (token) {
22576+ HANDLE_CASE(SWITCH, switch, value);
22577+ HANDLE_CASE(XID, xid, value);
22578+ HANDLE_CASE(NID, nid, value);
22579+ HANDLE_CASE(TAG, tag, value);
22580+ HANDLE_CASE(NET, net, value);
22581+ HANDLE_CASE(LIMIT, limit, value);
22582+ HANDLE_CASE(CRES, cres, value);
22583+ HANDLE_CASE(DLIM, dlim, value);
22584+ HANDLE_CASE(QUOTA, quota, value);
22585+ HANDLE_CASE(CVIRT, cvirt, value);
22586+ HANDLE_CASE(SPACE, space, value);
22587+ HANDLE_CASE(PERM, perm, value);
22588+ HANDLE_CASE(MISC, misc, value);
22589+ default:
22590+ return -EINVAL;
22591+ break;
22592+ }
22593+ }
22594+ return 1;
22595+}
2ba6f0dd 22596+
4bf69007 22597+__setup("vsdebug=", vs_debug_setup);
2ba6f0dd 22598+
2ba6f0dd 22599+
2ba6f0dd 22600+
4bf69007
AM
22601+EXPORT_SYMBOL_GPL(vs_debug_switch);
22602+EXPORT_SYMBOL_GPL(vs_debug_xid);
22603+EXPORT_SYMBOL_GPL(vs_debug_nid);
22604+EXPORT_SYMBOL_GPL(vs_debug_net);
22605+EXPORT_SYMBOL_GPL(vs_debug_limit);
22606+EXPORT_SYMBOL_GPL(vs_debug_cres);
22607+EXPORT_SYMBOL_GPL(vs_debug_dlim);
22608+EXPORT_SYMBOL_GPL(vs_debug_quota);
22609+EXPORT_SYMBOL_GPL(vs_debug_cvirt);
22610+EXPORT_SYMBOL_GPL(vs_debug_space);
22611+EXPORT_SYMBOL_GPL(vs_debug_perm);
22612+EXPORT_SYMBOL_GPL(vs_debug_misc);
2ba6f0dd 22613+
09a55596
AM
22614diff -NurpP --minimal linux-4.9.135/kernel/vserver/tag.c linux-4.9.135-vs2.3.9.8/kernel/vserver/tag.c
22615--- linux-4.9.135/kernel/vserver/tag.c 1970-01-01 00:00:00.000000000 +0000
22616+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/tag.c 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
22617@@ -0,0 +1,63 @@
22618+/*
22619+ * linux/kernel/vserver/tag.c
22620+ *
22621+ * Virtual Server: Shallow Tag Space
22622+ *
cc23e853 22623+ * Copyright (C) 2007 Herbert P?tzl
4bf69007
AM
22624+ *
22625+ * V0.01 basic implementation
22626+ *
22627+ */
2ba6f0dd 22628+
4bf69007
AM
22629+#include <linux/sched.h>
22630+#include <linux/vserver/debug.h>
22631+#include <linux/vs_pid.h>
22632+#include <linux/vs_tag.h>
2ba6f0dd 22633+
4bf69007 22634+#include <linux/vserver/tag_cmd.h>
2ba6f0dd 22635+
2ba6f0dd 22636+
61333608 22637+int dx_migrate_task(struct task_struct *p, vtag_t tag)
4bf69007
AM
22638+{
22639+ if (!p)
22640+ BUG();
2ba6f0dd 22641+
4bf69007
AM
22642+ vxdprintk(VXD_CBIT(tag, 5),
22643+ "dx_migrate_task(%p[#%d],#%d)", p, p->tag, tag);
2ba6f0dd 22644+
4bf69007
AM
22645+ task_lock(p);
22646+ p->tag = tag;
22647+ task_unlock(p);
2ba6f0dd 22648+
4bf69007
AM
22649+ vxdprintk(VXD_CBIT(tag, 5),
22650+ "moved task %p into [#%d]", p, tag);
22651+ return 0;
22652+}
2ba6f0dd 22653+
4bf69007 22654+/* vserver syscall commands below here */
2ba6f0dd 22655+
4bf69007 22656+/* taks xid and vx_info functions */
2ba6f0dd 22657+
2ba6f0dd 22658+
4bf69007
AM
22659+int vc_task_tag(uint32_t id)
22660+{
61333608 22661+ vtag_t tag;
2ba6f0dd 22662+
4bf69007
AM
22663+ if (id) {
22664+ struct task_struct *tsk;
22665+ rcu_read_lock();
22666+ tsk = find_task_by_real_pid(id);
22667+ tag = (tsk) ? tsk->tag : -ESRCH;
22668+ rcu_read_unlock();
22669+ } else
22670+ tag = dx_current_tag();
22671+ return tag;
22672+}
2ba6f0dd 22673+
2ba6f0dd 22674+
4bf69007
AM
22675+int vc_tag_migrate(uint32_t tag)
22676+{
22677+ return dx_migrate_task(current, tag & 0xFFFF);
22678+}
2ba6f0dd 22679+
2ba6f0dd 22680+
09a55596
AM
22681diff -NurpP --minimal linux-4.9.135/kernel/vserver/vci_config.h linux-4.9.135-vs2.3.9.8/kernel/vserver/vci_config.h
22682--- linux-4.9.135/kernel/vserver/vci_config.h 1970-01-01 00:00:00.000000000 +0000
22683+++ linux-4.9.135-vs2.3.9.8/kernel/vserver/vci_config.h 2018-10-20 04:58:15.000000000 +0000
4bf69007 22684@@ -0,0 +1,80 @@
2ba6f0dd 22685+
4bf69007 22686+/* interface version */
2ba6f0dd 22687+
4bf69007 22688+#define VCI_VERSION 0x00020308
2ba6f0dd 22689+
2ba6f0dd 22690+
4bf69007
AM
22691+enum {
22692+ VCI_KCBIT_NO_DYNAMIC = 0,
2ba6f0dd 22693+
4bf69007
AM
22694+ VCI_KCBIT_PROC_SECURE = 4,
22695+ /* VCI_KCBIT_HARDCPU = 5, */
22696+ /* VCI_KCBIT_IDLELIMIT = 6, */
22697+ /* VCI_KCBIT_IDLETIME = 7, */
2ba6f0dd 22698+
4bf69007
AM
22699+ VCI_KCBIT_COWBL = 8,
22700+ VCI_KCBIT_FULLCOWBL = 9,
22701+ VCI_KCBIT_SPACES = 10,
22702+ VCI_KCBIT_NETV2 = 11,
22703+ VCI_KCBIT_MEMCG = 12,
22704+ VCI_KCBIT_MEMCG_SWAP = 13,
2ba6f0dd 22705+
4bf69007
AM
22706+ VCI_KCBIT_DEBUG = 16,
22707+ VCI_KCBIT_HISTORY = 20,
22708+ VCI_KCBIT_TAGGED = 24,
22709+ VCI_KCBIT_PPTAG = 28,
2ba6f0dd 22710+
4bf69007 22711+ VCI_KCBIT_MORE = 31,
2ba6f0dd
AM
22712+};
22713+
2ba6f0dd 22714+
4bf69007
AM
22715+static inline uint32_t vci_kernel_config(void)
22716+{
22717+ return
22718+ (1 << VCI_KCBIT_NO_DYNAMIC) |
2ba6f0dd 22719+
4bf69007
AM
22720+ /* configured features */
22721+#ifdef CONFIG_VSERVER_PROC_SECURE
22722+ (1 << VCI_KCBIT_PROC_SECURE) |
22723+#endif
22724+#ifdef CONFIG_VSERVER_COWBL
22725+ (1 << VCI_KCBIT_COWBL) |
22726+ (1 << VCI_KCBIT_FULLCOWBL) |
22727+#endif
22728+ (1 << VCI_KCBIT_SPACES) |
22729+ (1 << VCI_KCBIT_NETV2) |
22730+#ifdef CONFIG_MEMCG
22731+ (1 << VCI_KCBIT_MEMCG) |
22732+#endif
22733+#ifdef CONFIG_MEMCG_SWAP
22734+ (1 << VCI_KCBIT_MEMCG_SWAP) |
22735+#endif
2ba6f0dd 22736+
4bf69007
AM
22737+ /* debug options */
22738+#ifdef CONFIG_VSERVER_DEBUG
22739+ (1 << VCI_KCBIT_DEBUG) |
22740+#endif
22741+#ifdef CONFIG_VSERVER_HISTORY
22742+ (1 << VCI_KCBIT_HISTORY) |
22743+#endif
2ba6f0dd 22744+
4bf69007
AM
22745+ /* inode context tagging */
22746+#if defined(CONFIG_TAGGING_NONE)
22747+ (0 << VCI_KCBIT_TAGGED) |
22748+#elif defined(CONFIG_TAGGING_UID16)
22749+ (1 << VCI_KCBIT_TAGGED) |
22750+#elif defined(CONFIG_TAGGING_GID16)
22751+ (2 << VCI_KCBIT_TAGGED) |
22752+#elif defined(CONFIG_TAGGING_ID24)
22753+ (3 << VCI_KCBIT_TAGGED) |
22754+#elif defined(CONFIG_TAGGING_INTERN)
22755+ (4 << VCI_KCBIT_TAGGED) |
22756+#elif defined(CONFIG_TAGGING_RUNTIME)
22757+ (5 << VCI_KCBIT_TAGGED) |
22758+#else
22759+ (7 << VCI_KCBIT_TAGGED) |
22760+#endif
22761+ (1 << VCI_KCBIT_PPTAG) |
22762+ 0;
22763+}
2ba6f0dd 22764+
09a55596
AM
22765diff -NurpP --minimal linux-4.9.135/mm/memcontrol.c linux-4.9.135-vs2.3.9.8/mm/memcontrol.c
22766--- linux-4.9.135/mm/memcontrol.c 2018-10-20 10:39:23.000000000 +0000
22767+++ linux-4.9.135-vs2.3.9.8/mm/memcontrol.c 2018-10-20 05:55:43.000000000 +0000
369dbd59 22768@@ -2825,6 +2825,41 @@ static u64 mem_cgroup_read_u64(struct cg
cc23e853 22769 }
4bf69007
AM
22770 }
22771
369dbd59
AM
22772+unsigned long mem_cgroup_mem_usage_pages(struct mem_cgroup *memcg)
22773+{
22774+ return mem_cgroup_usage(memcg, false);
22775+}
22776+
22777+unsigned long mem_cgroup_mem_limit_pages(struct mem_cgroup *memcg)
4bf69007 22778+{
369dbd59 22779+ return memcg->memory.limit;
4bf69007 22780+}
2ba6f0dd 22781+
369dbd59 22782+unsigned long mem_cgroup_memsw_usage_pages(struct mem_cgroup *memcg)
4bf69007 22783+{
369dbd59 22784+ return mem_cgroup_usage(memcg, true);
4bf69007 22785+}
2ba6f0dd 22786+
369dbd59 22787+unsigned long mem_cgroup_memsw_limit_pages(struct mem_cgroup *memcg)
4bf69007 22788+{
369dbd59 22789+ return memcg->memsw.limit;
4bf69007 22790+}
2ba6f0dd 22791+
369dbd59 22792+void dump_mem_cgroup(struct mem_cgroup *memcg)
4bf69007 22793+{
369dbd59
AM
22794+ printk(KERN_INFO "memcg: %p/%d:\n"
22795+ "\tmemory:\t%lu/%lu %lu/%lu\n"
22796+ "\tmemsw:\t%lu/%lu %lu/%lu\n"
22797+ "\tkmem:\t%lu/%lu %lu/%lu\n",
22798+ memcg, memcg->id.id,
22799+ page_counter_read(&memcg->memory), memcg->memory.limit,
22800+ memcg->memory.watermark, memcg->memory.failcnt,
22801+ page_counter_read(&memcg->memsw), memcg->memsw.limit,
22802+ memcg->memsw.watermark, memcg->memsw.failcnt,
22803+ page_counter_read(&memcg->kmem), memcg->kmem.limit,
22804+ memcg->kmem.watermark, memcg->kmem.failcnt);
4bf69007 22805+}
2ba6f0dd 22806+
cc23e853
AM
22807 #ifndef CONFIG_SLOB
22808 static int memcg_online_kmem(struct mem_cgroup *memcg)
22809 {
09a55596
AM
22810diff -NurpP --minimal linux-4.9.135/mm/oom_kill.c linux-4.9.135-vs2.3.9.8/mm/oom_kill.c
22811--- linux-4.9.135/mm/oom_kill.c 2018-10-20 10:39:23.000000000 +0000
22812+++ linux-4.9.135-vs2.3.9.8/mm/oom_kill.c 2018-10-20 04:58:15.000000000 +0000
cc23e853
AM
22813@@ -38,6 +38,8 @@
22814 #include <linux/kthread.h>
22815 #include <linux/init.h>
22816 #include <linux/mmu_notifier.h>
4bf69007
AM
22817+#include <linux/reboot.h>
22818+#include <linux/vs_context.h>
22819
cc23e853
AM
22820 #include <asm/tlb.h>
22821 #include "internal.h"
22822@@ -142,11 +144,18 @@ static inline bool is_memcg_oom(struct o
4bf69007 22823 static bool oom_unkillable_task(struct task_struct *p,
cc23e853 22824 struct mem_cgroup *memcg, const nodemask_t *nodemask)
4bf69007
AM
22825 {
22826- if (is_global_init(p))
22827+ unsigned xid = vx_current_xid();
2ba6f0dd 22828+
4bf69007
AM
22829+ /* skip the init task, global and per guest */
22830+ if (task_is_init(p))
22831 return true;
22832 if (p->flags & PF_KTHREAD)
22833 return true;
22834
22835+ /* skip other guest and host processes if oom in guest */
22836+ if (xid && vx_task_xid(p) != xid)
22837+ return true;
2ba6f0dd 22838+
4bf69007
AM
22839 /* When mem_cgroup_out_of_memory() and p is not member of the group */
22840 if (memcg && !task_in_mem_cgroup(p, memcg))
22841 return true;
cc23e853
AM
22842@@ -851,8 +860,8 @@ static void oom_kill_process(struct oom_
22843 if (__ratelimit(&oom_rs))
22844 dump_header(oc, p);
4bf69007 22845
cc23e853 22846- pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
4bf69007
AM
22847- message, task_pid_nr(p), p->comm, points);
22848+ pr_err("%s: Kill process %d:#%u (%s) score %d or sacrifice child\n",
22849+ message, task_pid_nr(p), p->xid, p->comm, points);
4bf69007
AM
22850
22851 /*
cc23e853
AM
22852 * If any of p's children has a different mm and is eligible for kill,
22853@@ -902,8 +911,8 @@ static void oom_kill_process(struct oom_
22854 */
22855 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
22856 mark_oom_victim(victim);
22857- pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
4bf69007 22858- task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
cc23e853 22859+ pr_err("Killed process %d:%u (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
4bf69007
AM
22860+ task_pid_nr(victim), victim->xid, victim->comm, K(victim->mm->total_vm),
22861 K(get_mm_counter(victim->mm, MM_ANONPAGES)),
cc23e853
AM
22862 K(get_mm_counter(victim->mm, MM_FILEPAGES)),
22863 K(get_mm_counter(victim->mm, MM_SHMEMPAGES)));
22864@@ -950,6 +959,8 @@ static void oom_kill_process(struct oom_
4bf69007 22865 }
cc23e853 22866 #undef K
4bf69007
AM
22867
22868+long vs_oom_action(unsigned int);
2ba6f0dd 22869+
4bf69007 22870 /*
cc23e853
AM
22871 * Determines whether the kernel must panic because of the panic_on_oom sysctl.
22872 */
22873@@ -1055,7 +1066,12 @@ bool out_of_memory(struct oom_control *o
4bf69007 22874 /* Found nothing?!?! Either we hang forever, or we panic. */
cc23e853
AM
22875 if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) {
22876 dump_header(oc, NULL);
4bf69007 22877- panic("Out of memory and no killable processes...\n");
2ba6f0dd 22878+
4bf69007
AM
22879+ /* avoid panic for guest OOM */
22880+ if (vx_current_xid())
22881+ vs_oom_action(LINUX_REBOOT_CMD_OOM);
22882+ else
22883+ panic("Out of memory and no killable processes...\n");
22884 }
cc23e853
AM
22885 if (oc->chosen && oc->chosen != (void *)-1UL) {
22886 oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
09a55596
AM
22887diff -NurpP --minimal linux-4.9.135/mm/page_alloc.c linux-4.9.135-vs2.3.9.8/mm/page_alloc.c
22888--- linux-4.9.135/mm/page_alloc.c 2018-10-20 10:39:23.000000000 +0000
22889+++ linux-4.9.135-vs2.3.9.8/mm/page_alloc.c 2018-10-20 04:58:15.000000000 +0000
cc23e853
AM
22890@@ -64,6 +64,8 @@
22891 #include <linux/page_owner.h>
22892 #include <linux/kthread.h>
22893 #include <linux/memcontrol.h>
4bf69007
AM
22894+#include <linux/vs_base.h>
22895+#include <linux/vs_limit.h>
22896
c2e5f7c8 22897 #include <asm/sections.h>
4bf69007 22898 #include <asm/tlbflush.h>
09a55596 22899@@ -4198,6 +4200,9 @@ void si_meminfo(struct sysinfo *val)
4bf69007
AM
22900 val->totalhigh = totalhigh_pages;
22901 val->freehigh = nr_free_highpages();
22902 val->mem_unit = PAGE_SIZE;
2ba6f0dd 22903+
4bf69007
AM
22904+ if (vx_flags(VXF_VIRT_MEM, 0))
22905+ vx_vsi_meminfo(val);
22906 }
22907
22908 EXPORT_SYMBOL(si_meminfo);
09a55596 22909@@ -4232,6 +4237,9 @@ void si_meminfo_node(struct sysinfo *val
cc23e853 22910 val->freehigh = free_highpages;
4bf69007
AM
22911 #endif
22912 val->mem_unit = PAGE_SIZE;
2ba6f0dd 22913+
4bf69007
AM
22914+ if (vx_flags(VXF_VIRT_MEM, 0))
22915+ vx_vsi_meminfo(val);
22916 }
22917 #endif
22918
09a55596
AM
22919diff -NurpP --minimal linux-4.9.135/mm/pgtable-generic.c linux-4.9.135-vs2.3.9.8/mm/pgtable-generic.c
22920--- linux-4.9.135/mm/pgtable-generic.c 2016-12-11 19:17:54.000000000 +0000
22921+++ linux-4.9.135-vs2.3.9.8/mm/pgtable-generic.c 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
22922@@ -6,6 +6,8 @@
22923 * Copyright (C) 2010 Linus Torvalds
22924 */
22925
22926+#include <linux/mm.h>
2ba6f0dd 22927+
4bf69007
AM
22928 #include <linux/pagemap.h>
22929 #include <asm/tlb.h>
22930 #include <asm-generic/pgtable.h>
09a55596
AM
22931diff -NurpP --minimal linux-4.9.135/mm/shmem.c linux-4.9.135-vs2.3.9.8/mm/shmem.c
22932--- linux-4.9.135/mm/shmem.c 2018-10-20 10:39:23.000000000 +0000
22933+++ linux-4.9.135-vs2.3.9.8/mm/shmem.c 2018-10-20 05:55:43.000000000 +0000
22934@@ -2796,7 +2796,7 @@ static int shmem_statfs(struct dentry *d
4bf69007
AM
22935 {
22936 struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
22937
22938- buf->f_type = TMPFS_MAGIC;
22939+ buf->f_type = TMPFS_SUPER_MAGIC;
cc23e853 22940 buf->f_bsize = PAGE_SIZE;
4bf69007
AM
22941 buf->f_namelen = NAME_MAX;
22942 if (sbinfo->max_blocks) {
09a55596 22943@@ -3617,7 +3617,7 @@ int shmem_fill_super(struct super_block
4bf69007 22944 sb->s_maxbytes = MAX_LFS_FILESIZE;
cc23e853
AM
22945 sb->s_blocksize = PAGE_SIZE;
22946 sb->s_blocksize_bits = PAGE_SHIFT;
4bf69007
AM
22947- sb->s_magic = TMPFS_MAGIC;
22948+ sb->s_magic = TMPFS_SUPER_MAGIC;
22949 sb->s_op = &shmem_ops;
22950 sb->s_time_gran = 1;
22951 #ifdef CONFIG_TMPFS_XATTR
09a55596
AM
22952diff -NurpP --minimal linux-4.9.135/mm/slab.c linux-4.9.135-vs2.3.9.8/mm/slab.c
22953--- linux-4.9.135/mm/slab.c 2018-10-20 10:39:23.000000000 +0000
22954+++ linux-4.9.135-vs2.3.9.8/mm/slab.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 22955@@ -307,6 +307,8 @@ static void kmem_cache_node_init(struct
4bf69007
AM
22956 #define STATS_INC_FREEMISS(x) do { } while (0)
22957 #endif
22958
22959+#include "slab_vs.h"
2ba6f0dd 22960+
4bf69007
AM
22961 #if DEBUG
22962
22963 /*
cc23e853 22964@@ -3341,6 +3343,7 @@ slab_alloc_node(struct kmem_cache *cache
4bf69007
AM
22965 /* ___cache_alloc_node can fall back to other nodes */
22966 ptr = ____cache_alloc_node(cachep, flags, nodeid);
22967 out:
22968+ vx_slab_alloc(cachep, flags);
22969 local_irq_restore(save_flags);
22970 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
cc23e853
AM
22971
22972@@ -3522,6 +3525,7 @@ void ___cache_free(struct kmem_cache *ca
4bf69007
AM
22973 check_irq_off();
22974 kmemleak_free_recursive(objp, cachep->flags);
22975 objp = cache_free_debugcheck(cachep, objp, caller);
22976+ vx_slab_free(cachep);
22977
22978 kmemcheck_slab_free(cachep, objp, cachep->object_size);
22979
09a55596
AM
22980diff -NurpP --minimal linux-4.9.135/mm/slab_vs.h linux-4.9.135-vs2.3.9.8/mm/slab_vs.h
22981--- linux-4.9.135/mm/slab_vs.h 1970-01-01 00:00:00.000000000 +0000
22982+++ linux-4.9.135-vs2.3.9.8/mm/slab_vs.h 2018-10-20 04:58:15.000000000 +0000
4bf69007 22983@@ -0,0 +1,29 @@
2ba6f0dd 22984+
4bf69007 22985+#include <linux/vserver/context.h>
2ba6f0dd 22986+
4bf69007 22987+#include <linux/vs_context.h>
2ba6f0dd 22988+
4bf69007
AM
22989+static inline
22990+void vx_slab_alloc(struct kmem_cache *cachep, gfp_t flags)
22991+{
22992+ int what = gfp_zone(cachep->allocflags);
22993+ struct vx_info *vxi = current_vx_info();
2ba6f0dd 22994+
4bf69007
AM
22995+ if (!vxi)
22996+ return;
2ba6f0dd 22997+
4bf69007
AM
22998+ atomic_add(cachep->size, &vxi->cacct.slab[what]);
22999+}
2ba6f0dd 23000+
4bf69007
AM
23001+static inline
23002+void vx_slab_free(struct kmem_cache *cachep)
23003+{
23004+ int what = gfp_zone(cachep->allocflags);
23005+ struct vx_info *vxi = current_vx_info();
2ba6f0dd 23006+
4bf69007
AM
23007+ if (!vxi)
23008+ return;
2ba6f0dd 23009+
4bf69007
AM
23010+ atomic_sub(cachep->size, &vxi->cacct.slab[what]);
23011+}
2ba6f0dd 23012+
09a55596
AM
23013diff -NurpP --minimal linux-4.9.135/mm/swapfile.c linux-4.9.135-vs2.3.9.8/mm/swapfile.c
23014--- linux-4.9.135/mm/swapfile.c 2018-10-20 10:39:23.000000000 +0000
23015+++ linux-4.9.135-vs2.3.9.8/mm/swapfile.c 2018-10-20 05:55:43.000000000 +0000
4bf69007
AM
23016@@ -39,6 +39,7 @@
23017 #include <asm/tlbflush.h>
23018 #include <linux/swapops.h>
cc23e853 23019 #include <linux/swap_cgroup.h>
4bf69007
AM
23020+#include <linux/vs_base.h>
23021
23022 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
23023 unsigned char);
cc23e853 23024@@ -2083,6 +2084,16 @@ static int swap_show(struct seq_file *sw
4bf69007
AM
23025
23026 if (si == SEQ_START_TOKEN) {
23027 seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
23028+ if (vx_flags(VXF_VIRT_MEM, 0)) {
cc23e853 23029+ struct sysinfo si = { 0 };
2ba6f0dd 23030+
4bf69007
AM
23031+ vx_vsi_swapinfo(&si);
23032+ if (si.totalswap < (1 << 10))
23033+ return 0;
23034+ seq_printf(swap, "%s\t\t\t\t\t%s\t%lu\t%lu\t%d\n",
23035+ "hdv0", "partition", si.totalswap >> 10,
23036+ (si.totalswap - si.freeswap) >> 10, -1);
23037+ }
23038 return 0;
23039 }
23040
09a55596 23041@@ -2630,6 +2641,8 @@ void si_swapinfo(struct sysinfo *val)
b00e13aa 23042 val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
4bf69007
AM
23043 val->totalswap = total_swap_pages + nr_to_be_unused;
23044 spin_unlock(&swap_lock);
23045+ if (vx_flags(VXF_VIRT_MEM, 0))
23046+ vx_vsi_swapinfo(val);
23047 }
23048
23049 /*
09a55596
AM
23050diff -NurpP --minimal linux-4.9.135/net/bridge/br_multicast.c linux-4.9.135-vs2.3.9.8/net/bridge/br_multicast.c
23051--- linux-4.9.135/net/bridge/br_multicast.c 2016-12-11 19:17:54.000000000 +0000
23052+++ linux-4.9.135-vs2.3.9.8/net/bridge/br_multicast.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 23053@@ -465,7 +465,7 @@ static struct sk_buff *br_ip6_multicast_
4bf69007
AM
23054 ip6h->hop_limit = 1;
23055 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
23056 if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
23057- &ip6h->saddr)) {
23058+ &ip6h->saddr, NULL)) {
23059 kfree_skb(skb);
cc23e853 23060 br->has_ipv6_addr = 0;
4bf69007 23061 return NULL;
09a55596
AM
23062diff -NurpP --minimal linux-4.9.135/net/core/dev.c linux-4.9.135-vs2.3.9.8/net/core/dev.c
23063--- linux-4.9.135/net/core/dev.c 2018-10-20 10:39:24.000000000 +0000
23064+++ linux-4.9.135-vs2.3.9.8/net/core/dev.c 2018-10-20 05:55:44.000000000 +0000
cc23e853 23065@@ -126,6 +126,7 @@
4bf69007
AM
23066 #include <linux/in.h>
23067 #include <linux/jhash.h>
23068 #include <linux/random.h>
23069+#include <linux/vs_inet.h>
23070 #include <trace/events/napi.h>
23071 #include <trace/events/net.h>
23072 #include <trace/events/skb.h>
cc23e853 23073@@ -730,7 +731,8 @@ struct net_device *__dev_get_by_name(str
4bf69007
AM
23074 struct hlist_head *head = dev_name_hash(net, name);
23075
b00e13aa 23076 hlist_for_each_entry(dev, head, name_hlist)
4bf69007
AM
23077- if (!strncmp(dev->name, name, IFNAMSIZ))
23078+ if (!strncmp(dev->name, name, IFNAMSIZ) &&
23079+ nx_dev_visible(current_nx_info(), dev))
23080 return dev;
23081
23082 return NULL;
cc23e853 23083@@ -755,7 +757,8 @@ struct net_device *dev_get_by_name_rcu(s
4bf69007
AM
23084 struct hlist_head *head = dev_name_hash(net, name);
23085
b00e13aa 23086 hlist_for_each_entry_rcu(dev, head, name_hlist)
4bf69007
AM
23087- if (!strncmp(dev->name, name, IFNAMSIZ))
23088+ if (!strncmp(dev->name, name, IFNAMSIZ) &&
23089+ nx_dev_visible(current_nx_info(), dev))
23090 return dev;
23091
23092 return NULL;
cc23e853 23093@@ -805,7 +808,8 @@ struct net_device *__dev_get_by_index(st
4bf69007
AM
23094 struct hlist_head *head = dev_index_hash(net, ifindex);
23095
b00e13aa 23096 hlist_for_each_entry(dev, head, index_hlist)
4bf69007
AM
23097- if (dev->ifindex == ifindex)
23098+ if ((dev->ifindex == ifindex) &&
23099+ nx_dev_visible(current_nx_info(), dev))
23100 return dev;
23101
23102 return NULL;
cc23e853 23103@@ -823,7 +827,7 @@ EXPORT_SYMBOL(__dev_get_by_index);
4bf69007
AM
23104 * about locking. The caller must hold RCU lock.
23105 */
23106
23107-struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
23108+struct net_device *dev_get_by_index_real_rcu(struct net *net, int ifindex)
23109 {
4bf69007 23110 struct net_device *dev;
b00e13aa 23111 struct hlist_head *head = dev_index_hash(net, ifindex);
cc23e853 23112@@ -834,6 +838,16 @@ struct net_device *dev_get_by_index_rcu(
4bf69007
AM
23113
23114 return NULL;
23115 }
23116+EXPORT_SYMBOL(dev_get_by_index_real_rcu);
2ba6f0dd 23117+
4bf69007
AM
23118+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
23119+{
23120+ struct net_device *dev = dev_get_by_index_real_rcu(net, ifindex);
2ba6f0dd 23121+
4bf69007
AM
23122+ if (nx_dev_visible(current_nx_info(), dev))
23123+ return dev;
23124+ return NULL;
23125+}
23126 EXPORT_SYMBOL(dev_get_by_index_rcu);
23127
23128
cc23e853 23129@@ -916,7 +930,8 @@ struct net_device *dev_getbyhwaddr_rcu(s
4bf69007
AM
23130
23131 for_each_netdev_rcu(net, dev)
23132 if (dev->type == type &&
23133- !memcmp(dev->dev_addr, ha, dev->addr_len))
23134+ !memcmp(dev->dev_addr, ha, dev->addr_len) &&
23135+ nx_dev_visible(current_nx_info(), dev))
23136 return dev;
23137
23138 return NULL;
cc23e853 23139@@ -928,9 +943,11 @@ struct net_device *__dev_getfirstbyhwtyp
4bf69007
AM
23140 struct net_device *dev;
23141
23142 ASSERT_RTNL();
23143- for_each_netdev(net, dev)
23144- if (dev->type == type)
23145+ for_each_netdev(net, dev) {
23146+ if ((dev->type == type) &&
23147+ nx_dev_visible(current_nx_info(), dev))
23148 return dev;
23149+ }
23150
23151 return NULL;
23152 }
cc23e853 23153@@ -942,7 +959,8 @@ struct net_device *dev_getfirstbyhwtype(
b00e13aa
AM
23154
23155 rcu_read_lock();
23156 for_each_netdev_rcu(net, dev)
23157- if (dev->type == type) {
23158+ if ((dev->type == type) &&
23159+ nx_dev_visible(current_nx_info(), dev)) {
23160 dev_hold(dev);
23161 ret = dev;
23162 break;
cc23e853 23163@@ -972,7 +990,8 @@ struct net_device *__dev_get_by_flags(st
b00e13aa
AM
23164
23165 ret = NULL;
bb20add7 23166 for_each_netdev(net, dev) {
b00e13aa
AM
23167- if (((dev->flags ^ if_flags) & mask) == 0) {
23168+ if ((((dev->flags ^ if_flags) & mask) == 0) &&
23169+ nx_dev_visible(current_nx_info(), dev)) {
23170 ret = dev;
23171 break;
23172 }
cc23e853 23173@@ -1050,6 +1069,8 @@ static int __dev_alloc_name(struct net *
4bf69007
AM
23174 continue;
23175 if (i < 0 || i >= max_netdevices)
23176 continue;
23177+ if (!nx_dev_visible(current_nx_info(), d))
23178+ continue;
23179
23180 /* avoid cases where sscanf is not exact inverse of printf */
23181 snprintf(buf, IFNAMSIZ, name, i);
09a55596
AM
23182diff -NurpP --minimal linux-4.9.135/net/core/net-procfs.c linux-4.9.135-vs2.3.9.8/net/core/net-procfs.c
23183--- linux-4.9.135/net/core/net-procfs.c 2016-12-11 19:17:54.000000000 +0000
23184+++ linux-4.9.135-vs2.3.9.8/net/core/net-procfs.c 2018-10-20 04:58:15.000000000 +0000
8ce283e1
AM
23185@@ -1,6 +1,7 @@
23186 #include <linux/netdevice.h>
23187 #include <linux/proc_fs.h>
23188 #include <linux/seq_file.h>
23189+#include <linux/vs_inet.h>
23190 #include <net/wext.h>
23191
23192 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
23193@@ -77,8 +78,13 @@ static void dev_seq_stop(struct seq_file
23194 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
23195 {
23196 struct rtnl_link_stats64 temp;
23197- const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
23198+ const struct rtnl_link_stats64 *stats;
23199+
23200+ /* device visible inside network context? */
23201+ if (!nx_dev_visible(current_nx_info(), dev))
23202+ return;
23203
23204+ stats = dev_get_stats(dev, &temp);
23205 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
23206 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
23207 dev->name, stats->rx_bytes, stats->rx_packets,
09a55596
AM
23208diff -NurpP --minimal linux-4.9.135/net/core/rtnetlink.c linux-4.9.135-vs2.3.9.8/net/core/rtnetlink.c
23209--- linux-4.9.135/net/core/rtnetlink.c 2018-10-20 10:39:24.000000000 +0000
23210+++ linux-4.9.135-vs2.3.9.8/net/core/rtnetlink.c 2018-10-20 05:55:44.000000000 +0000
cc23e853
AM
23211@@ -1615,6 +1615,8 @@ static int rtnl_dump_ifinfo(struct sk_bu
23212 goto cont;
4bf69007
AM
23213 if (idx < s_idx)
23214 goto cont;
23215+ if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
23216+ continue;
7ed51edd
JR
23217 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
23218 NETLINK_CB(cb->skb).portid,
23219 cb->nlh->nlmsg_seq, 0,
09a55596 23220@@ -2820,6 +2822,9 @@ void rtmsg_ifinfo(int type, struct net_d
cc23e853
AM
23221 {
23222 struct sk_buff *skb;
4bf69007
AM
23223
23224+ if (!nx_dev_visible(current_nx_info(), dev))
23225+ return;
2ba6f0dd 23226+
cc23e853
AM
23227 if (dev->reg_state != NETREG_REGISTERED)
23228 return;
23229
09a55596
AM
23230diff -NurpP --minimal linux-4.9.135/net/core/sock.c linux-4.9.135-vs2.3.9.8/net/core/sock.c
23231--- linux-4.9.135/net/core/sock.c 2018-10-20 10:39:24.000000000 +0000
23232+++ linux-4.9.135-vs2.3.9.8/net/core/sock.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 23233@@ -135,6 +135,10 @@
4bf69007
AM
23234
23235 #include <linux/filter.h>
cc23e853 23236 #include <net/sock_reuseport.h>
4bf69007
AM
23237+#include <linux/vs_socket.h>
23238+#include <linux/vs_limit.h>
23239+#include <linux/vs_context.h>
23240+#include <linux/vs_network.h>
23241
23242 #include <trace/events/sock.h>
23243
cc23e853 23244@@ -1339,6 +1343,8 @@ static struct sock *sk_prot_alloc(struct
4bf69007
AM
23245 goto out_free_sec;
23246 sk_tx_queue_clear(sk);
23247 }
23248+ sock_vx_init(sk);
23249+ sock_nx_init(sk);
23250
23251 return sk;
23252
cc23e853 23253@@ -1444,6 +1450,11 @@ static void __sk_destruct(struct rcu_hea
4bf69007 23254 put_pid(sk->sk_peer_pid);
cc23e853
AM
23255 if (likely(sk->sk_net_refcnt))
23256 put_net(sock_net(sk));
4bf69007
AM
23257+ vx_sock_dec(sk);
23258+ clr_vx_info(&sk->sk_vx_info);
23259+ sk->sk_xid = -1;
23260+ clr_nx_info(&sk->sk_nx_info);
23261+ sk->sk_nid = -1;
23262 sk_prot_free(sk->sk_prot_creator, sk);
23263 }
23264
cc23e853 23265@@ -1498,6 +1509,8 @@ struct sock *sk_clone_lock(const struct
4bf69007 23266 /* SANITY */
cc23e853
AM
23267 if (likely(newsk->sk_net_refcnt))
23268 get_net(sock_net(newsk));
4bf69007
AM
23269+ sock_vx_init(newsk);
23270+ sock_nx_init(newsk);
23271 sk_node_init(&newsk->sk_node);
23272 sock_lock_init(newsk);
23273 bh_lock_sock(newsk);
cc23e853 23274@@ -1568,6 +1581,12 @@ struct sock *sk_clone_lock(const struct
4bf69007
AM
23275 smp_wmb();
23276 atomic_set(&newsk->sk_refcnt, 2);
23277
23278+ set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info);
23279+ newsk->sk_xid = sk->sk_xid;
23280+ vx_sock_inc(newsk);
23281+ set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info);
23282+ newsk->sk_nid = sk->sk_nid;
2ba6f0dd 23283+
4bf69007
AM
23284 /*
23285 * Increment the counter in the same struct proto as the master
23286 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
cc23e853 23287@@ -2468,6 +2487,12 @@ void sock_init_data(struct socket *sock,
4bf69007
AM
23288
23289 sk->sk_stamp = ktime_set(-1L, 0);
23290
23291+ set_vx_info(&sk->sk_vx_info, current_vx_info());
23292+ sk->sk_xid = vx_current_xid();
23293+ vx_sock_inc(sk);
23294+ set_nx_info(&sk->sk_nx_info, current_nx_info());
23295+ sk->sk_nid = nx_current_nid();
2ba6f0dd 23296+
c2e5f7c8
JR
23297 #ifdef CONFIG_NET_RX_BUSY_POLL
23298 sk->sk_napi_id = 0;
23299 sk->sk_ll_usec = sysctl_net_busy_read;
09a55596
AM
23300diff -NurpP --minimal linux-4.9.135/net/ipv4/af_inet.c linux-4.9.135-vs2.3.9.8/net/ipv4/af_inet.c
23301--- linux-4.9.135/net/ipv4/af_inet.c 2018-10-20 10:39:24.000000000 +0000
23302+++ linux-4.9.135-vs2.3.9.8/net/ipv4/af_inet.c 2018-10-20 05:55:44.000000000 +0000
cc23e853 23303@@ -303,10 +303,15 @@ lookup_protocol:
4bf69007
AM
23304 }
23305
23306 err = -EPERM;
23307+ if ((protocol == IPPROTO_ICMP) &&
23308+ nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
23309+ goto override;
cc23e853 23310+
b00e13aa
AM
23311 if (sock->type == SOCK_RAW && !kern &&
23312 !ns_capable(net->user_ns, CAP_NET_RAW))
4bf69007 23313 goto out_rcu_unlock;
cc23e853 23314
a4a22af8
AM
23315+override:
23316 sock->ops = answer->ops;
23317 answer_prot = answer->prot;
bb20add7 23318 answer_flags = answer->flags;
cc23e853 23319@@ -424,6 +429,7 @@ int inet_bind(struct socket *sock, struc
4bf69007
AM
23320 struct sock *sk = sock->sk;
23321 struct inet_sock *inet = inet_sk(sk);
b00e13aa 23322 struct net *net = sock_net(sk);
cc23e853 23323+ struct nx_v4_sock_addr nsa;
4bf69007
AM
23324 unsigned short snum;
23325 int chk_addr_ret;
cc23e853
AM
23326 u32 tb_id = RT_TABLE_LOCAL;
23327@@ -449,7 +455,11 @@ int inet_bind(struct socket *sock, struc
4bf69007
AM
23328 }
23329
cc23e853
AM
23330 tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
23331- chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
4bf69007
AM
23332+ err = v4_map_sock_addr(inet, addr, &nsa);
23333+ if (err)
23334+ goto out;
2ba6f0dd 23335+
cc23e853 23336+ chk_addr_ret = inet_addr_type_table(net, nsa.saddr, tb_id);
4bf69007
AM
23337
23338 /* Not specified by any standard per-se, however it breaks too
23339 * many applications when removed. It is unfortunate since
cc23e853 23340@@ -461,7 +471,7 @@ int inet_bind(struct socket *sock, struc
4bf69007 23341 err = -EADDRNOTAVAIL;
bb20add7 23342 if (!net->ipv4.sysctl_ip_nonlocal_bind &&
4bf69007
AM
23343 !(inet->freebind || inet->transparent) &&
23344- addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
23345+ nsa.saddr != htonl(INADDR_ANY) &&
23346 chk_addr_ret != RTN_LOCAL &&
23347 chk_addr_ret != RTN_MULTICAST &&
23348 chk_addr_ret != RTN_BROADCAST)
cc23e853 23349@@ -487,7 +497,7 @@ int inet_bind(struct socket *sock, struc
4bf69007
AM
23350 if (sk->sk_state != TCP_CLOSE || inet->inet_num)
23351 goto out_release_sock;
23352
23353- inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
23354+ v4_set_sock_addr(inet, &nsa);
23355 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
23356 inet->inet_saddr = 0; /* Use device */
23357
cc23e853 23358@@ -706,11 +716,13 @@ int inet_getname(struct socket *sock, st
4bf69007
AM
23359 peer == 1))
23360 return -ENOTCONN;
23361 sin->sin_port = inet->inet_dport;
23362- sin->sin_addr.s_addr = inet->inet_daddr;
23363+ sin->sin_addr.s_addr =
23364+ nx_map_sock_lback(sk->sk_nx_info, inet->inet_daddr);
23365 } else {
23366 __be32 addr = inet->inet_rcv_saddr;
23367 if (!addr)
23368 addr = inet->inet_saddr;
23369+ addr = nx_map_sock_lback(sk->sk_nx_info, addr);
23370 sin->sin_port = inet->inet_sport;
23371 sin->sin_addr.s_addr = addr;
23372 }
cc23e853
AM
23373@@ -894,6 +906,7 @@ static int inet_compat_ioctl(struct sock
23374 return err;
23375 }
23376 #endif
23377+#include <linux/vs_limit.h>
23378
23379 const struct proto_ops inet_stream_ops = {
23380 .family = PF_INET,
09a55596
AM
23381diff -NurpP --minimal linux-4.9.135/net/ipv4/arp.c linux-4.9.135-vs2.3.9.8/net/ipv4/arp.c
23382--- linux-4.9.135/net/ipv4/arp.c 2018-10-20 10:39:24.000000000 +0000
23383+++ linux-4.9.135-vs2.3.9.8/net/ipv4/arp.c 2018-10-20 04:58:15.000000000 +0000
23384@@ -1320,6 +1320,7 @@ static void arp_format_neigh_entry(struc
4bf69007
AM
23385 struct net_device *dev = n->dev;
23386 int hatype = dev->type;
23387
23388+ /* FIXME: check for network context */
23389 read_lock(&n->lock);
23390 /* Convert hardware address to XX:XX:XX:XX ... form. */
23391 #if IS_ENABLED(CONFIG_AX25)
09a55596 23392@@ -1351,6 +1352,7 @@ static void arp_format_pneigh_entry(stru
4bf69007
AM
23393 int hatype = dev ? dev->type : 0;
23394 char tbuf[16];
23395
23396+ /* FIXME: check for network context */
23397 sprintf(tbuf, "%pI4", n->key);
23398 seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
23399 tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00",
09a55596
AM
23400diff -NurpP --minimal linux-4.9.135/net/ipv4/devinet.c linux-4.9.135-vs2.3.9.8/net/ipv4/devinet.c
23401--- linux-4.9.135/net/ipv4/devinet.c 2018-10-20 10:39:24.000000000 +0000
23402+++ linux-4.9.135-vs2.3.9.8/net/ipv4/devinet.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 23403@@ -538,6 +538,7 @@ struct in_device *inetdev_by_index(struc
4bf69007
AM
23404 }
23405 EXPORT_SYMBOL(inetdev_by_index);
23406
2ba6f0dd 23407+
4bf69007
AM
23408 /* Called only from RTNL semaphored context. No locks. */
23409
23410 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
cc23e853 23411@@ -992,6 +993,8 @@ int devinet_ioctl(struct net *net, unsig
4bf69007
AM
23412
23413 in_dev = __in_dev_get_rtnl(dev);
23414 if (in_dev) {
23415+ struct nx_info *nxi = current_nx_info();
2ba6f0dd 23416+
4bf69007
AM
23417 if (tryaddrmatch) {
23418 /* Matthias Andree */
23419 /* compare label and address (4.4BSD style) */
cc23e853 23420@@ -1000,6 +1003,8 @@ int devinet_ioctl(struct net *net, unsig
4bf69007
AM
23421 This is checked above. */
23422 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
23423 ifap = &ifa->ifa_next) {
23424+ if (!nx_v4_ifa_visible(nxi, ifa))
23425+ continue;
23426 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
23427 sin_orig.sin_addr.s_addr ==
23428 ifa->ifa_local) {
cc23e853 23429@@ -1012,9 +1017,12 @@ int devinet_ioctl(struct net *net, unsig
4bf69007
AM
23430 comparing just the label */
23431 if (!ifa) {
23432 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
23433- ifap = &ifa->ifa_next)
23434+ ifap = &ifa->ifa_next) {
23435+ if (!nx_v4_ifa_visible(nxi, ifa))
23436+ continue;
23437 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
23438 break;
23439+ }
23440 }
23441 }
23442
cc23e853 23443@@ -1168,6 +1176,8 @@ static int inet_gifconf(struct net_devic
4bf69007
AM
23444 goto out;
23445
23446 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
23447+ if (!nx_v4_ifa_visible(current_nx_info(), ifa))
23448+ continue;
23449 if (!buf) {
23450 done += sizeof(ifr);
23451 continue;
cc23e853 23452@@ -1595,6 +1605,7 @@ static int inet_dump_ifaddr(struct sk_bu
4bf69007
AM
23453 struct net_device *dev;
23454 struct in_device *in_dev;
23455 struct in_ifaddr *ifa;
23456+ struct sock *sk = skb->sk;
23457 struct hlist_head *head;
4bf69007 23458
b00e13aa 23459 s_h = cb->args[0];
cc23e853 23460@@ -1618,6 +1629,8 @@ static int inet_dump_ifaddr(struct sk_bu
4bf69007
AM
23461
23462 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
23463 ifa = ifa->ifa_next, ip_idx++) {
23464+ if (sk && !nx_v4_ifa_visible(sk->sk_nx_info, ifa))
23465+ continue;
23466 if (ip_idx < s_ip_idx)
23467 continue;
23468 if (inet_fill_ifaddr(skb, ifa,
09a55596
AM
23469diff -NurpP --minimal linux-4.9.135/net/ipv4/fib_trie.c linux-4.9.135-vs2.3.9.8/net/ipv4/fib_trie.c
23470--- linux-4.9.135/net/ipv4/fib_trie.c 2018-10-20 10:39:24.000000000 +0000
23471+++ linux-4.9.135-vs2.3.9.8/net/ipv4/fib_trie.c 2018-10-20 04:58:15.000000000 +0000
cc23e853
AM
23472@@ -2617,6 +2617,7 @@ static int fib_route_seq_show(struct seq
23473
23474 seq_setwidth(seq, 127);
23475
23476+ /* FIXME: check for network context? */
23477 if (fi)
23478 seq_printf(seq,
23479 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
09a55596
AM
23480diff -NurpP --minimal linux-4.9.135/net/ipv4/inet_connection_sock.c linux-4.9.135-vs2.3.9.8/net/ipv4/inet_connection_sock.c
23481--- linux-4.9.135/net/ipv4/inet_connection_sock.c 2018-10-20 10:39:24.000000000 +0000
23482+++ linux-4.9.135-vs2.3.9.8/net/ipv4/inet_connection_sock.c 2018-10-20 05:55:44.000000000 +0000
cc23e853
AM
23483@@ -16,6 +16,7 @@
23484 #include <linux/module.h>
23485 #include <linux/jhash.h>
23486
23487+#include <net/addrconf.h>
23488 #include <net/inet_connection_sock.h>
23489 #include <net/inet_hashtables.h>
23490 #include <net/inet_timewait_sock.h>
23491@@ -44,6 +45,7 @@ void inet_get_local_port_range(struct ne
4bf69007
AM
23492 }
23493 EXPORT_SYMBOL(inet_get_local_port_range);
23494
2ba6f0dd 23495+
4bf69007
AM
23496 int inet_csk_bind_conflict(const struct sock *sk,
23497 const struct inet_bind_bucket *tb, bool relax)
23498 {
cc23e853
AM
23499@@ -72,15 +74,13 @@ int inet_csk_bind_conflict(const struct
23500 (sk2->sk_state != TCP_TIME_WAIT &&
b00e13aa 23501 !uid_eq(uid, sock_i_uid(sk2))))) {
c2e5f7c8
JR
23502
23503- if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
23504- sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
cc23e853 23505+ if (ipv4_rcv_saddr_equal(sk, sk2, true))
4bf69007
AM
23506 break;
23507 }
23508 if (!relax && reuse && sk2->sk_reuse &&
b00e13aa 23509 sk2->sk_state != TCP_LISTEN) {
c2e5f7c8
JR
23510
23511- if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
23512- sk2->sk_rcv_saddr == sk->sk_rcv_saddr)
cc23e853 23513+ if (ipv4_rcv_saddr_equal(sk, sk2, true))
b00e13aa
AM
23514 break;
23515 }
23516 }
09a55596
AM
23517diff -NurpP --minimal linux-4.9.135/net/ipv4/inet_diag.c linux-4.9.135-vs2.3.9.8/net/ipv4/inet_diag.c
23518--- linux-4.9.135/net/ipv4/inet_diag.c 2016-12-11 19:17:54.000000000 +0000
23519+++ linux-4.9.135-vs2.3.9.8/net/ipv4/inet_diag.c 2018-10-20 06:33:52.000000000 +0000
4bf69007
AM
23520@@ -31,6 +31,8 @@
23521
23522 #include <linux/inet.h>
23523 #include <linux/stddef.h>
23524+#include <linux/vs_network.h>
23525+#include <linux/vs_inet.h>
23526
23527 #include <linux/inet_diag.h>
23528 #include <linux/sock_diag.h>
09a55596
AM
23529@@ -87,8 +89,8 @@ void inet_diag_msg_common_fill(struct in
23530 memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
23531 memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
23532
23533- r->id.idiag_src[0] = sk->sk_rcv_saddr;
23534- r->id.idiag_dst[0] = sk->sk_daddr;
23535+ r->id.idiag_src[0] = nx_map_sock_lback(sk->sk_nx_info, sk->sk_rcv_saddr);
23536+ r->id.idiag_dst[0] = nx_map_sock_lback(sk->sk_nx_info, sk->sk_daddr);
23537 }
23538 }
23539 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
23540@@ -879,6 +881,9 @@ void inet_diag_dump_icsk(struct inet_has
4bf69007
AM
23541 if (!net_eq(sock_net(sk), net))
23542 continue;
23543
23544+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23545+ continue;
09a55596 23546+
4bf69007
AM
23547 if (num < s_num) {
23548 num++;
23549 continue;
09a55596 23550@@ -941,6 +946,8 @@ skip_listen_ht:
4bf69007
AM
23551
23552 if (!net_eq(sock_net(sk), net))
23553 continue;
23554+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23555+ continue;
23556 if (num < s_num)
23557 goto next_normal;
c2e5f7c8 23558 state = (sk->sk_state == TCP_TIME_WAIT) ?
09a55596
AM
23559diff -NurpP --minimal linux-4.9.135/net/ipv4/inet_hashtables.c linux-4.9.135-vs2.3.9.8/net/ipv4/inet_hashtables.c
23560--- linux-4.9.135/net/ipv4/inet_hashtables.c 2018-10-20 10:39:24.000000000 +0000
23561+++ linux-4.9.135-vs2.3.9.8/net/ipv4/inet_hashtables.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 23562@@ -24,6 +24,7 @@
4bf69007
AM
23563 #include <net/inet_connection_sock.h>
23564 #include <net/inet_hashtables.h>
23565 #include <net/secure_seq.h>
23566+#include <net/route.h>
23567 #include <net/ip.h>
cc23e853
AM
23568 #include <net/tcp.h>
23569 #include <net/sock_reuseport.h>
23570@@ -186,6 +187,11 @@ static inline int compute_score(struct s
4bf69007
AM
23571 if (rcv_saddr != daddr)
23572 return -1;
b00e13aa 23573 score += 4;
4bf69007
AM
23574+ } else {
23575+ /* block non nx_info ips */
23576+ if (!v4_addr_in_nx_info(sk->sk_nx_info,
23577+ daddr, NXA_MASK_BIND))
23578+ return -1;
23579 }
cc23e853 23580 if (sk->sk_bound_dev_if || exact_dif) {
4bf69007 23581 if (sk->sk_bound_dev_if != dif)
cc23e853
AM
23582@@ -300,6 +306,7 @@ begin:
23583 goto found;
4bf69007
AM
23584 }
23585 }
2ba6f0dd 23586+
4bf69007
AM
23587 /*
23588 * if the nulls value we got at the end of this lookup is
23589 * not the expected one, we must restart lookup.
09a55596
AM
23590diff -NurpP --minimal linux-4.9.135/net/ipv4/netfilter.c linux-4.9.135-vs2.3.9.8/net/ipv4/netfilter.c
23591--- linux-4.9.135/net/ipv4/netfilter.c 2018-10-20 10:39:24.000000000 +0000
23592+++ linux-4.9.135-vs2.3.9.8/net/ipv4/netfilter.c 2018-10-20 04:58:15.000000000 +0000
09be7631 23593@@ -11,7 +11,7 @@
4bf69007
AM
23594 #include <linux/skbuff.h>
23595 #include <linux/gfp.h>
23596 #include <linux/export.h>
23597-#include <net/route.h>
23598+// #include <net/route.h>
23599 #include <net/xfrm.h>
23600 #include <net/ip.h>
23601 #include <net/netfilter/nf_queue.h>
09a55596
AM
23602diff -NurpP --minimal linux-4.9.135/net/ipv4/raw.c linux-4.9.135-vs2.3.9.8/net/ipv4/raw.c
23603--- linux-4.9.135/net/ipv4/raw.c 2018-10-20 10:39:25.000000000 +0000
23604+++ linux-4.9.135-vs2.3.9.8/net/ipv4/raw.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 23605@@ -128,7 +128,7 @@ static struct sock *__raw_v4_lookup(stru
4bf69007
AM
23606
23607 if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
23608 !(inet->inet_daddr && inet->inet_daddr != raddr) &&
23609- !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
23610+ v4_sock_addr_match(sk->sk_nx_info, inet, laddr) &&
23611 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
23612 goto found; /* gotcha */
23613 }
cc23e853
AM
23614@@ -418,6 +418,12 @@ static int raw_send_hdrinc(struct sock *
23615 skb_transport_header(skb))->type);
23616 }
4bf69007
AM
23617
23618+ err = -EPERM;
23619+ if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
23620+ sk->sk_nx_info &&
23621+ !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
23622+ goto error_free;
2ba6f0dd 23623+
cc23e853
AM
23624 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
23625 net, sk, skb, NULL, rt->dst.dev,
23626 dst_output);
23627@@ -623,6 +629,16 @@ static int raw_sendmsg(struct sock *sk,
4bf69007
AM
23628 goto done;
23629 }
23630
23631+ if (sk->sk_nx_info) {
23632+ rt = ip_v4_find_src(sock_net(sk), sk->sk_nx_info, &fl4);
23633+ if (IS_ERR(rt)) {
23634+ err = PTR_ERR(rt);
23635+ rt = NULL;
23636+ goto done;
23637+ }
23638+ ip_rt_put(rt);
23639+ }
2ba6f0dd 23640+
4bf69007 23641 security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
cc23e853 23642 rt = ip_route_output_flow(net, &fl4, sk);
4bf69007 23643 if (IS_ERR(rt)) {
cc23e853 23644@@ -701,17 +717,19 @@ static int raw_bind(struct sock *sk, str
4bf69007
AM
23645 {
23646 struct inet_sock *inet = inet_sk(sk);
23647 struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
23648+ struct nx_v4_sock_addr nsa = { 0 };
23649 int ret = -EINVAL;
23650 int chk_addr_ret;
23651
23652 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
23653 goto out;
23654- chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
23655+ v4_map_sock_addr(inet, addr, &nsa);
23656+ chk_addr_ret = inet_addr_type(sock_net(sk), nsa.saddr);
23657 ret = -EADDRNOTAVAIL;
23658- if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
23659+ if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
23660 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
23661 goto out;
23662- inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
23663+ v4_set_sock_addr(inet, &nsa);
23664 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
23665 inet->inet_saddr = 0; /* Use device */
23666 sk_dst_reset(sk);
cc23e853 23667@@ -760,7 +778,8 @@ static int raw_recvmsg(struct sock *sk,
4bf69007
AM
23668 /* Copy the address. */
23669 if (sin) {
23670 sin->sin_family = AF_INET;
23671- sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
23672+ sin->sin_addr.s_addr =
23673+ nx_map_sock_lback(sk->sk_nx_info, ip_hdr(skb)->saddr);
23674 sin->sin_port = 0;
23675 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
c2e5f7c8 23676 *addr_len = sizeof(*sin);
cc23e853 23677@@ -956,7 +975,8 @@ static struct sock *raw_get_first(struct
b00e13aa
AM
23678 for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
23679 ++state->bucket) {
23680 sk_for_each(sk, &state->h->ht[state->bucket])
4bf69007
AM
23681- if (sock_net(sk) == seq_file_net(seq))
23682+ if ((sock_net(sk) == seq_file_net(seq)) &&
b00e13aa 23683+ nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
4bf69007
AM
23684 goto found;
23685 }
23686 sk = NULL;
cc23e853 23687@@ -972,7 +992,8 @@ static struct sock *raw_get_next(struct
4bf69007
AM
23688 sk = sk_next(sk);
23689 try_again:
23690 ;
23691- } while (sk && sock_net(sk) != seq_file_net(seq));
23692+ } while (sk && ((sock_net(sk) != seq_file_net(seq)) ||
23693+ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
23694
23695 if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
23696 sk = sk_head(&state->h->ht[state->bucket]);
09a55596
AM
23697diff -NurpP --minimal linux-4.9.135/net/ipv4/route.c linux-4.9.135-vs2.3.9.8/net/ipv4/route.c
23698--- linux-4.9.135/net/ipv4/route.c 2018-10-20 10:39:25.000000000 +0000
23699+++ linux-4.9.135-vs2.3.9.8/net/ipv4/route.c 2018-10-20 04:58:15.000000000 +0000
23700@@ -2250,7 +2250,7 @@ struct rtable *__ip_route_output_key_has
4bf69007
AM
23701
23702
23703 if (fl4->flowi4_oif) {
23704- dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
23705+ dev_out = dev_get_by_index_real_rcu(net, fl4->flowi4_oif);
23706 rth = ERR_PTR(-ENODEV);
cc23e853 23707 if (!dev_out)
4bf69007 23708 goto out;
09a55596
AM
23709diff -NurpP --minimal linux-4.9.135/net/ipv4/tcp.c linux-4.9.135-vs2.3.9.8/net/ipv4/tcp.c
23710--- linux-4.9.135/net/ipv4/tcp.c 2018-10-20 10:39:25.000000000 +0000
23711+++ linux-4.9.135-vs2.3.9.8/net/ipv4/tcp.c 2018-10-20 05:55:44.000000000 +0000
cc23e853
AM
23712@@ -269,6 +269,7 @@
23713 #include <linux/err.h>
4bf69007
AM
23714 #include <linux/time.h>
23715 #include <linux/slab.h>
23716+#include <linux/in.h>
23717
23718 #include <net/icmp.h>
23719 #include <net/inet_common.h>
09a55596
AM
23720diff -NurpP --minimal linux-4.9.135/net/ipv4/tcp_ipv4.c linux-4.9.135-vs2.3.9.8/net/ipv4/tcp_ipv4.c
23721--- linux-4.9.135/net/ipv4/tcp_ipv4.c 2018-10-20 10:39:25.000000000 +0000
23722+++ linux-4.9.135-vs2.3.9.8/net/ipv4/tcp_ipv4.c 2018-10-20 05:55:44.000000000 +0000
23723@@ -1933,8 +1933,12 @@ get_head:
5ba7a31c
AM
23724 sk = sk_next(sk);
23725 get_sk:
23726 sk_for_each_from(sk) {
23727+ vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
23728+ sk, sk->sk_nid, nx_current_nid());
23729 if (!net_eq(sock_net(sk), net))
23730 continue;
23731+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23732+ continue;
23733 if (sk->sk_family == st->family)
23734 return sk;
23735 }
09a55596 23736@@ -1988,6 +1992,11 @@ static void *established_get_first(struc
4bf69007
AM
23737
23738 spin_lock_bh(lock);
23739 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
23740+ vxdprintk(VXD_CBIT(net, 6),
23741+ "sk,egf: %p [#%d] (from %d)",
23742+ sk, sk->sk_nid, nx_current_nid());
23743+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23744+ continue;
23745 if (sk->sk_family != st->family ||
23746 !net_eq(sock_net(sk), net)) {
23747 continue;
09a55596 23748@@ -2014,6 +2023,11 @@ static void *established_get_next(struct
c2e5f7c8 23749 sk = sk_nulls_next(sk);
4bf69007
AM
23750
23751 sk_nulls_for_each_from(sk, node) {
23752+ vxdprintk(VXD_CBIT(net, 6),
23753+ "sk,egn: %p [#%d] (from %d)",
23754+ sk, sk->sk_nid, nx_current_nid());
23755+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23756+ continue;
23757 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
c2e5f7c8 23758 return sk;
4bf69007 23759 }
09a55596 23760@@ -2205,9 +2219,9 @@ static void get_openreq4(const struct re
4bf69007 23761 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
c2e5f7c8 23762 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
4bf69007 23763 i,
c2e5f7c8
JR
23764- ireq->ir_loc_addr,
23765+ nx_map_sock_lback(current_nx_info(), ireq->ir_loc_addr),
cc23e853 23766 ireq->ir_num,
c2e5f7c8
JR
23767- ireq->ir_rmt_addr,
23768+ nx_map_sock_lback(current_nx_info(), ireq->ir_rmt_addr),
23769 ntohs(ireq->ir_rmt_port),
4bf69007
AM
23770 TCP_SYN_RECV,
23771 0, 0, /* could print option size, but that is af dependent. */
09a55596 23772@@ -2230,8 +2244,8 @@ static void get_tcp4_sock(struct sock *s
4bf69007
AM
23773 const struct inet_connection_sock *icsk = inet_csk(sk);
23774 const struct inet_sock *inet = inet_sk(sk);
cc23e853 23775 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
4bf69007
AM
23776- __be32 dest = inet->inet_daddr;
23777- __be32 src = inet->inet_rcv_saddr;
23778+ __be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
23779+ __be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
23780 __u16 destp = ntohs(inet->inet_dport);
23781 __u16 srcp = ntohs(inet->inet_sport);
23782 int rx_queue;
09a55596 23783@@ -2290,8 +2304,8 @@ static void get_timewait4_sock(const str
cc23e853 23784 __be32 dest, src;
4bf69007 23785 __u16 destp, srcp;
4bf69007
AM
23786
23787- dest = tw->tw_daddr;
23788- src = tw->tw_rcv_saddr;
23789+ dest = nx_map_sock_lback(current_nx_info(), tw->tw_daddr);
23790+ src = nx_map_sock_lback(current_nx_info(), tw->tw_rcv_saddr);
23791 destp = ntohs(tw->tw_dport);
23792 srcp = ntohs(tw->tw_sport);
23793
09a55596
AM
23794diff -NurpP --minimal linux-4.9.135/net/ipv4/tcp_minisocks.c linux-4.9.135-vs2.3.9.8/net/ipv4/tcp_minisocks.c
23795--- linux-4.9.135/net/ipv4/tcp_minisocks.c 2018-10-20 10:39:25.000000000 +0000
23796+++ linux-4.9.135-vs2.3.9.8/net/ipv4/tcp_minisocks.c 2018-10-20 05:55:44.000000000 +0000
4bf69007
AM
23797@@ -23,6 +23,9 @@
23798 #include <linux/slab.h>
23799 #include <linux/sysctl.h>
23800 #include <linux/workqueue.h>
23801+#include <linux/vs_limit.h>
23802+#include <linux/vs_socket.h>
23803+#include <linux/vs_context.h>
23804 #include <net/tcp.h>
23805 #include <net/inet_common.h>
23806 #include <net/xfrm.h>
09a55596 23807@@ -286,6 +289,11 @@ void tcp_time_wait(struct sock *sk, int
b00e13aa 23808 tcptw->tw_ts_offset = tp->tsoffset;
cc23e853 23809 tcptw->tw_last_oow_ack_time = 0;
4bf69007
AM
23810
23811+ tw->tw_xid = sk->sk_xid;
23812+ tw->tw_vx_info = NULL;
23813+ tw->tw_nid = sk->sk_nid;
23814+ tw->tw_nx_info = NULL;
2ba6f0dd 23815+
4bf69007
AM
23816 #if IS_ENABLED(CONFIG_IPV6)
23817 if (tw->tw_family == PF_INET6) {
23818 struct ipv6_pinfo *np = inet6_sk(sk);
09a55596
AM
23819diff -NurpP --minimal linux-4.9.135/net/ipv4/udp.c linux-4.9.135-vs2.3.9.8/net/ipv4/udp.c
23820--- linux-4.9.135/net/ipv4/udp.c 2018-10-20 10:39:25.000000000 +0000
23821+++ linux-4.9.135-vs2.3.9.8/net/ipv4/udp.c 2018-10-20 05:55:44.000000000 +0000
23822@@ -361,12 +361,26 @@ int ipv4_rcv_saddr_equal(const struct so
cc23e853
AM
23823 bool match_wildcard)
23824 {
23825 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
23826+ __be32 sk1_rcv_saddr = inet1->inet_rcv_saddr,
23827+ sk2_rcv_saddr = inet2->inet_rcv_saddr;
23828
23829- if (!ipv6_only_sock(sk2)) {
23830- if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)
23831- return 1;
23832- if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr)
23833- return match_wildcard;
cc23e853
AM
23834+ if (ipv6_only_sock(sk2))
23835+ return 0;
23836+
23837+ if (sk1_rcv_saddr && sk2_rcv_saddr && sk1_rcv_saddr == sk2_rcv_saddr)
23838+ return 1;
23839+
23840+ if (match_wildcard) {
23841+ if (!sk2_rcv_saddr && !sk1_rcv_saddr)
23842+ return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
23843+
23844+ if (!sk2_rcv_saddr && sk1_rcv_saddr)
23845+ return v4_addr_in_nx_info(sk2->sk_nx_info,
23846+ sk1_rcv_saddr, NXA_MASK_BIND);
23847+
23848+ if (!sk1_rcv_saddr && sk2_rcv_saddr)
23849+ return v4_addr_in_nx_info(sk1->sk_nx_info,
23850+ sk2_rcv_saddr, NXA_MASK_BIND);
09a55596 23851 }
cc23e853 23852 return 0;
4bf69007 23853 }
cc23e853
AM
23854@@ -408,6 +422,11 @@ static int compute_score(struct sock *sk
23855 if (inet->inet_rcv_saddr != daddr)
23856 return -1;
23857 score += 4;
4bf69007
AM
23858+ } else {
23859+ /* block non nx_info ips */
23860+ if (!v4_addr_in_nx_info(sk->sk_nx_info,
23861+ daddr, NXA_MASK_BIND))
23862+ return -1;
cc23e853
AM
23863 }
23864
23865 if (inet->inet_daddr) {
23866@@ -483,6 +502,7 @@ static struct sock *udp4_lib_lookup2(str
4bf69007
AM
23867 return result;
23868 }
23869
2ba6f0dd 23870+
4bf69007
AM
23871 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
23872 * harder than this. -DaveM
23873 */
cc23e853 23874@@ -603,7 +623,7 @@ static inline bool __udp_is_mcast_sock(s
c2e5f7c8
JR
23875 udp_sk(sk)->udp_port_hash != hnum ||
23876 (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
23877 (inet->inet_dport != rmt_port && inet->inet_dport) ||
23878- (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
23879+ !v4_sock_addr_match(sk->sk_nx_info, inet, loc_addr) ||
23880 ipv6_only_sock(sk) ||
23881 (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
23882 return false;
09a55596 23883@@ -1020,6 +1040,16 @@ int udp_sendmsg(struct sock *sk, struct
cc23e853 23884 flow_flags,
4bf69007
AM
23885 faddr, saddr, dport, inet->inet_sport);
23886
23887+ if (sk->sk_nx_info) {
23888+ rt = ip_v4_find_src(net, sk->sk_nx_info, fl4);
23889+ if (IS_ERR(rt)) {
23890+ err = PTR_ERR(rt);
23891+ rt = NULL;
23892+ goto out;
23893+ }
23894+ ip_rt_put(rt);
23895+ }
2ba6f0dd 23896+
4bf69007
AM
23897 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
23898 rt = ip_route_output_flow(net, fl4, sk);
23899 if (IS_ERR(rt)) {
09a55596 23900@@ -1317,7 +1347,8 @@ try_again:
4bf69007
AM
23901 if (sin) {
23902 sin->sin_family = AF_INET;
23903 sin->sin_port = udp_hdr(skb)->source;
23904- sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
23905+ sin->sin_addr.s_addr = nx_map_sock_lback(
23906+ skb->sk->sk_nx_info, ip_hdr(skb)->saddr);
23907 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
c2e5f7c8 23908 *addr_len = sizeof(*sin);
4bf69007 23909 }
09a55596 23910@@ -2271,6 +2302,8 @@ static struct sock *udp_get_first(struct
cc23e853 23911 sk_for_each(sk, &hslot->head) {
4bf69007
AM
23912 if (!net_eq(sock_net(sk), net))
23913 continue;
23914+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23915+ continue;
23916 if (sk->sk_family == state->family)
23917 goto found;
23918 }
09a55596 23919@@ -2288,7 +2321,9 @@ static struct sock *udp_get_next(struct
4bf69007
AM
23920
23921 do {
cc23e853 23922 sk = sk_next(sk);
4bf69007
AM
23923- } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
23924+ } while (sk && (!net_eq(sock_net(sk), net) ||
23925+ sk->sk_family != state->family ||
23926+ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
23927
23928 if (!sk) {
23929 if (state->bucket <= state->udp_table->mask)
09a55596 23930@@ -2384,8 +2419,8 @@ static void udp4_format_sock(struct sock
c2e5f7c8 23931 int bucket)
4bf69007
AM
23932 {
23933 struct inet_sock *inet = inet_sk(sp);
23934- __be32 dest = inet->inet_daddr;
23935- __be32 src = inet->inet_rcv_saddr;
23936+ __be32 dest = nx_map_sock_lback(current_nx_info(), inet->inet_daddr);
23937+ __be32 src = nx_map_sock_lback(current_nx_info(), inet->inet_rcv_saddr);
23938 __u16 destp = ntohs(inet->inet_dport);
23939 __u16 srcp = ntohs(inet->inet_sport);
23940
09a55596
AM
23941diff -NurpP --minimal linux-4.9.135/net/ipv4/udp_diag.c linux-4.9.135-vs2.3.9.8/net/ipv4/udp_diag.c
23942--- linux-4.9.135/net/ipv4/udp_diag.c 2016-12-11 19:17:54.000000000 +0000
23943+++ linux-4.9.135-vs2.3.9.8/net/ipv4/udp_diag.c 2018-10-20 06:31:18.000000000 +0000
23944@@ -120,6 +120,8 @@ static void udp_dump(struct udp_table *t
23945
23946 if (!net_eq(sock_net(sk), net))
23947 continue;
23948+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
23949+ continue;
23950 if (num < s_num)
23951 goto next;
23952 if (!(r->idiag_states & (1 << sk->sk_state)))
23953diff -NurpP --minimal linux-4.9.135/net/ipv6/addrconf.c linux-4.9.135-vs2.3.9.8/net/ipv6/addrconf.c
23954--- linux-4.9.135/net/ipv6/addrconf.c 2018-10-20 10:39:25.000000000 +0000
23955+++ linux-4.9.135-vs2.3.9.8/net/ipv6/addrconf.c 2018-10-20 05:55:44.000000000 +0000
cc23e853 23956@@ -92,6 +92,7 @@
4bf69007
AM
23957 #include <linux/proc_fs.h>
23958 #include <linux/seq_file.h>
23959 #include <linux/export.h>
23960+#include <linux/vs_network.h>
4bf69007
AM
23961
23962 /* Set to 3 to get tracing... */
23963 #define ACONF_DEBUG 2
09a55596 23964@@ -1497,7 +1498,8 @@ static int __ipv6_dev_get_saddr(struct n
cc23e853
AM
23965 struct ipv6_saddr_dst *dst,
23966 struct inet6_dev *idev,
23967 struct ipv6_saddr_score *scores,
23968- int hiscore_idx)
23969+ int hiscore_idx,
23970+ struct nx_info *nxi)
23971 {
23972 struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
23973
09a55596 23974@@ -1527,6 +1529,8 @@ static int __ipv6_dev_get_saddr(struct n
cc23e853
AM
23975 idev->dev->name);
23976 continue;
23977 }
23978+ if (!v6_addr_in_nx_info(nxi, &score->ifa->addr, -1))
23979+ continue;
23980
23981 score->rule = -1;
23982 bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
09a55596 23983@@ -1577,26 +1581,27 @@ static int ipv6_get_saddr_master(struct
cc23e853
AM
23984 const struct net_device *master,
23985 struct ipv6_saddr_dst *dst,
23986 struct ipv6_saddr_score *scores,
23987- int hiscore_idx)
23988+ int hiscore_idx,
23989+ struct nx_info *nxi)
23990 {
23991 struct inet6_dev *idev;
23992
23993 idev = __in6_dev_get(dst_dev);
23994 if (idev)
23995- hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
23996- scores, hiscore_idx);
23997+ hiscore_idx = __ipv6_dev_get_saddr(net, dst,
23998+ idev, scores, hiscore_idx, nxi);
23999
24000 idev = __in6_dev_get(master);
24001 if (idev)
24002- hiscore_idx = __ipv6_dev_get_saddr(net, dst, idev,
24003- scores, hiscore_idx);
24004+ hiscore_idx = __ipv6_dev_get_saddr(net, dst,
24005+ idev, scores, hiscore_idx, nxi);
24006
24007 return hiscore_idx;
24008 }
4bf69007
AM
24009
24010 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
24011 const struct in6_addr *daddr, unsigned int prefs,
24012- struct in6_addr *saddr)
24013+ struct in6_addr *saddr, struct nx_info *nxi)
24014 {
cc23e853
AM
24015 struct ipv6_saddr_score scores[2], *hiscore;
24016 struct ipv6_saddr_dst dst;
09a55596 24017@@ -1645,7 +1650,8 @@ int ipv6_dev_get_saddr(struct net *net,
cc23e853
AM
24018
24019 if (use_oif_addr) {
24020 if (idev)
24021- hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
24022+ hiscore_idx = __ipv6_dev_get_saddr(net,
24023+ &dst, idev, scores, hiscore_idx, nxi);
24024 } else {
24025 const struct net_device *master;
24026 int master_idx = 0;
09a55596 24027@@ -1659,8 +1665,8 @@ int ipv6_dev_get_saddr(struct net *net,
cc23e853
AM
24028 master_idx = master->ifindex;
24029
24030 hiscore_idx = ipv6_get_saddr_master(net, dst_dev,
24031- master, &dst,
24032- scores, hiscore_idx);
24033+ master, &dst, scores,
24034+ hiscore_idx, nxi);
24035
24036 if (scores[hiscore_idx].ifa)
24037 goto out;
09a55596 24038@@ -1675,7 +1681,8 @@ int ipv6_dev_get_saddr(struct net *net,
cc23e853
AM
24039 idev = __in6_dev_get(dev);
24040 if (!idev)
4bf69007 24041 continue;
cc23e853
AM
24042- hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
24043+ hiscore_idx = __ipv6_dev_get_saddr(net,
24044+ &dst, idev, scores, hiscore_idx, nxi);
24045 }
24046 }
4bf69007 24047
09a55596 24048@@ -4129,7 +4136,10 @@ static void if6_seq_stop(struct seq_file
4bf69007
AM
24049 static int if6_seq_show(struct seq_file *seq, void *v)
24050 {
24051 struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
24052- seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
2ba6f0dd 24053+
4bf69007
AM
24054+ if (nx_check(0, VS_ADMIN|VS_WATCH) ||
24055+ v6_addr_in_nx_info(current_nx_info(), &ifp->addr, -1))
24056+ seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n",
24057 &ifp->addr,
24058 ifp->idev->dev->ifindex,
24059 ifp->prefix_len,
09a55596 24060@@ -4713,6 +4723,11 @@ static int in6_dump_addrs(struct inet6_d
4bf69007
AM
24061 struct ifacaddr6 *ifaca;
24062 int err = 1;
24063 int ip_idx = *p_ip_idx;
24064+ struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
2ba6f0dd 24065+
4bf69007
AM
24066+ /* disable ipv6 on non v6 guests */
24067+ if (nxi && !nx_info_has_v6(nxi))
24068+ return skb->len;
24069
24070 read_lock_bh(&idev->lock);
24071 switch (type) {
09a55596 24072@@ -4723,6 +4738,8 @@ static int in6_dump_addrs(struct inet6_d
4bf69007 24073 list_for_each_entry(ifa, &idev->addr_list, if_list) {
b65d880e
JR
24074 if (ip_idx < s_ip_idx)
24075 goto next;
cc23e853 24076+ if (!v6_addr_in_nx_info(nxi, &ifa->addr, -1))
b65d880e 24077+ goto next;
4bf69007
AM
24078 err = inet6_fill_ifaddr(skb, ifa,
24079 NETLINK_CB(cb->skb).portid,
24080 cb->nlh->nlmsg_seq,
09a55596 24081@@ -4740,6 +4757,8 @@ static int in6_dump_addrs(struct inet6_d
4bf69007
AM
24082 ifmca = ifmca->next, ip_idx++) {
24083 if (ip_idx < s_ip_idx)
24084 continue;
cc23e853
AM
24085+ if (!v6_addr_in_nx_info(nxi, &ifmca->mca_addr, -1))
24086+ continue;
4bf69007
AM
24087 err = inet6_fill_ifmcaddr(skb, ifmca,
24088 NETLINK_CB(cb->skb).portid,
24089 cb->nlh->nlmsg_seq,
09a55596 24090@@ -4755,6 +4774,8 @@ static int in6_dump_addrs(struct inet6_d
4bf69007
AM
24091 ifaca = ifaca->aca_next, ip_idx++) {
24092 if (ip_idx < s_ip_idx)
24093 continue;
cc23e853
AM
24094+ if (!v6_addr_in_nx_info(nxi, &ifaca->aca_addr, -1))
24095+ continue;
4bf69007
AM
24096 err = inet6_fill_ifacaddr(skb, ifaca,
24097 NETLINK_CB(cb->skb).portid,
24098 cb->nlh->nlmsg_seq,
09a55596 24099@@ -4783,6 +4804,10 @@ static int inet6_dump_addr(struct sk_buf
4bf69007
AM
24100 struct inet6_dev *idev;
24101 struct hlist_head *head;
b00e13aa 24102
4bf69007
AM
24103+ /* FIXME: maybe disable ipv6 on non v6 guests?
24104+ if (skb->sk && skb->sk->sk_vx_info)
24105+ return skb->len; */
b00e13aa
AM
24106+
24107 s_h = cb->args[0];
24108 s_idx = idx = cb->args[1];
24109 s_ip_idx = ip_idx = cb->args[2];
09a55596 24110@@ -5301,6 +5326,7 @@ static int inet6_dump_ifinfo(struct sk_b
b00e13aa
AM
24111 struct net_device *dev;
24112 struct inet6_dev *idev;
24113 struct hlist_head *head;
24114+ struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
4bf69007
AM
24115
24116 s_h = cb->args[0];
24117 s_idx = cb->args[1];
09a55596 24118@@ -5312,6 +5338,8 @@ static int inet6_dump_ifinfo(struct sk_b
b00e13aa 24119 hlist_for_each_entry_rcu(dev, head, index_hlist) {
4bf69007
AM
24120 if (idx < s_idx)
24121 goto cont;
24122+ if (!v6_dev_in_nx_info(dev, nxi))
24123+ goto cont;
24124 idev = __in6_dev_get(dev);
24125 if (!idev)
24126 goto cont;
09a55596
AM
24127diff -NurpP --minimal linux-4.9.135/net/ipv6/af_inet6.c linux-4.9.135-vs2.3.9.8/net/ipv6/af_inet6.c
24128--- linux-4.9.135/net/ipv6/af_inet6.c 2018-10-20 10:39:25.000000000 +0000
24129+++ linux-4.9.135-vs2.3.9.8/net/ipv6/af_inet6.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 24130@@ -43,6 +43,7 @@
4bf69007
AM
24131 #include <linux/netdevice.h>
24132 #include <linux/icmpv6.h>
24133 #include <linux/netfilter_ipv6.h>
24134+#include <linux/vs_inet.h>
4bf69007
AM
24135
24136 #include <net/ip.h>
24137 #include <net/ipv6.h>
cc23e853 24138@@ -167,10 +168,13 @@ lookup_protocol:
4bf69007
AM
24139 }
24140
24141 err = -EPERM;
24142+ if ((protocol == IPPROTO_ICMPV6) &&
24143+ nx_capable(CAP_NET_RAW, NXC_RAW_ICMP))
24144+ goto override;
b00e13aa
AM
24145 if (sock->type == SOCK_RAW && !kern &&
24146 !ns_capable(net->user_ns, CAP_NET_RAW))
4bf69007
AM
24147 goto out_rcu_unlock;
24148-
24149+override:
24150 sock->ops = answer->ops;
24151 answer_prot = answer->prot;
bb20add7 24152 answer_flags = answer->flags;
cc23e853 24153@@ -272,6 +276,7 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24154 struct inet_sock *inet = inet_sk(sk);
24155 struct ipv6_pinfo *np = inet6_sk(sk);
24156 struct net *net = sock_net(sk);
24157+ struct nx_v6_sock_addr nsa;
24158 __be32 v4addr = 0;
24159 unsigned short snum;
cef7ea10
AM
24160 bool saved_ipv6only;
24161@@ -288,6 +293,10 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24162 if (addr->sin6_family != AF_INET6)
24163 return -EAFNOSUPPORT;
24164
24165+ err = v6_map_sock_addr(inet, addr, &nsa);
24166+ if (err)
24167+ return err;
2ba6f0dd 24168+
4bf69007
AM
24169 addr_type = ipv6_addr_type(&addr->sin6_addr);
24170 if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
24171 return -EINVAL;
cef7ea10 24172@@ -328,6 +337,10 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24173 err = -EADDRNOTAVAIL;
24174 goto out;
24175 }
24176+ if (!v4_addr_in_nx_info(sk->sk_nx_info, v4addr, NXA_MASK_BIND)) {
24177+ err = -EADDRNOTAVAIL;
24178+ goto out;
24179+ }
24180 } else {
24181 if (addr_type != IPV6_ADDR_ANY) {
24182 struct net_device *dev = NULL;
cef7ea10 24183@@ -354,6 +367,11 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24184 }
24185 }
24186
24187+ if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
24188+ err = -EADDRNOTAVAIL;
24189+ goto out_unlock;
24190+ }
2ba6f0dd 24191+
4bf69007
AM
24192 /* ipv4 addr of the socket is invalid. Only the
24193 * unspecified and mapped address have a v4 equivalent.
24194 */
cef7ea10 24195@@ -371,6 +389,9 @@ int inet6_bind(struct socket *sock, stru
4bf69007
AM
24196 }
24197 }
24198
24199+ /* what's that for? */
24200+ v6_set_sock_addr(inet, &nsa);
2ba6f0dd 24201+
4bf69007
AM
24202 inet->inet_rcv_saddr = v4addr;
24203 inet->inet_saddr = v4addr;
24204
cef7ea10 24205@@ -477,9 +498,11 @@ int inet6_getname(struct socket *sock, s
4bf69007
AM
24206 return -ENOTCONN;
24207 sin->sin6_port = inet->inet_dport;
c2e5f7c8 24208 sin->sin6_addr = sk->sk_v6_daddr;
4bf69007
AM
24209+ /* FIXME: remap lback? */
24210 if (np->sndflow)
24211 sin->sin6_flowinfo = np->flow_label;
24212 } else {
24213+ /* FIXME: remap lback? */
c2e5f7c8 24214 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
4bf69007
AM
24215 sin->sin6_addr = np->saddr;
24216 else
09a55596
AM
24217diff -NurpP --minimal linux-4.9.135/net/ipv6/datagram.c linux-4.9.135-vs2.3.9.8/net/ipv6/datagram.c
24218--- linux-4.9.135/net/ipv6/datagram.c 2018-10-20 10:39:25.000000000 +0000
24219+++ linux-4.9.135-vs2.3.9.8/net/ipv6/datagram.c 2018-10-20 05:55:44.000000000 +0000
24220@@ -780,7 +780,7 @@ int ip6_datagram_send_ctl(struct net *ne
4bf69007
AM
24221
24222 rcu_read_lock();
24223 if (fl6->flowi6_oif) {
24224- dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
24225+ dev = dev_get_by_index_real_rcu(net, fl6->flowi6_oif);
24226 if (!dev) {
24227 rcu_read_unlock();
24228 return -ENODEV;
09a55596
AM
24229diff -NurpP --minimal linux-4.9.135/net/ipv6/fib6_rules.c linux-4.9.135-vs2.3.9.8/net/ipv6/fib6_rules.c
24230--- linux-4.9.135/net/ipv6/fib6_rules.c 2018-10-20 10:39:25.000000000 +0000
24231+++ linux-4.9.135-vs2.3.9.8/net/ipv6/fib6_rules.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 24232@@ -102,7 +102,7 @@ static int fib6_rule_action(struct fib_r
4bf69007
AM
24233 ip6_dst_idev(&rt->dst)->dev,
24234 &flp6->daddr,
24235 rt6_flags2srcprefs(flags),
24236- &saddr))
24237+ &saddr, NULL))
24238 goto again;
24239 if (!ipv6_prefix_equal(&saddr, &r->src.addr,
24240 r->src.plen))
09a55596
AM
24241diff -NurpP --minimal linux-4.9.135/net/ipv6/inet6_hashtables.c linux-4.9.135-vs2.3.9.8/net/ipv6/inet6_hashtables.c
24242--- linux-4.9.135/net/ipv6/inet6_hashtables.c 2016-12-11 19:17:54.000000000 +0000
24243+++ linux-4.9.135-vs2.3.9.8/net/ipv6/inet6_hashtables.c 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
24244@@ -16,6 +16,7 @@
24245
24246 #include <linux/module.h>
24247 #include <linux/random.h>
24248+#include <linux/vs_inet6.h>
24249
cc23e853 24250 #include <net/addrconf.h>
4bf69007 24251 #include <net/inet_connection_sock.h>
cc23e853 24252@@ -108,6 +109,9 @@ static inline int compute_score(struct s
c2e5f7c8 24253 if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
4bf69007
AM
24254 return -1;
24255 score++;
24256+ } else {
24257+ if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
24258+ return -1;
24259 }
cc23e853 24260 if (sk->sk_bound_dev_if || exact_dif) {
4bf69007 24261 if (sk->sk_bound_dev_if != dif)
cc23e853
AM
24262@@ -282,39 +286,71 @@ EXPORT_SYMBOL_GPL(inet6_hash);
24263 * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
24264 * and 0.0.0.0 equals to 0.0.0.0 only
24265 */
24266-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
24267+int ipv6_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
24268 bool match_wildcard)
24269 {
24270+ const struct in6_addr *sk1_rcv_saddr6 = inet6_rcv_saddr(sk1);
24271 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
24272+ __be32 sk1_rcv_saddr = sk1->sk_rcv_saddr;
24273+ __be32 sk2_rcv_saddr = sk2->sk_rcv_saddr;
24274+ int sk1_ipv6only = inet_v6_ipv6only(sk1);
24275 int sk2_ipv6only = inet_v6_ipv6only(sk2);
24276- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
24277+ int addr_type1 = ipv6_addr_type(sk1_rcv_saddr6);
24278 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
24279
24280+
24281+ /* if one is mapped and the other is ipv6only exit early */
24282+ if (addr_type1 == IPV6_ADDR_MAPPED && sk2_ipv6only)
24283+ return 0;
24284+
24285+ if (addr_type2 == IPV6_ADDR_MAPPED && sk1_ipv6only)
24286+ return 0;
24287+
24288 /* if both are mapped, treat as IPv4 */
24289- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
24290- if (!sk2_ipv6only) {
24291- if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
24292- return 1;
24293- if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
24294- return match_wildcard;
24295- }
24296+ if (addr_type1 == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
24297+ if (sk1_rcv_saddr == sk2_rcv_saddr)
24298+ return 1;
24299+ if ((!sk1_rcv_saddr || !sk2_rcv_saddr) && match_wildcard)
24300+ goto vs_v4;
24301 return 0;
24302 }
24303
24304- if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
24305- return 1;
24306+ /* if both are wildcards, check for overlap */
24307+ if (addr_type1 == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
24308+ return nx_v6_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24309
24310- if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
24311- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
24312+ /* if both are valid ipv6 addresses, mapped handled above */
24313+ if (addr_type1 != IPV6_ADDR_ANY && addr_type2 != IPV6_ADDR_ANY &&
24314+ sk2_rcv_saddr6 && ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
24315 return 1;
24316
24317- if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
24318- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
24319- return 1;
24320+ if (addr_type1 == IPV6_ADDR_ANY && match_wildcard) {
24321+ /* ipv6only case handled above */
24322+ if (addr_type2 == IPV6_ADDR_MAPPED)
24323+ return v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, -1);
24324+ else
24325+ return v6_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr6, -1);
24326+ }
24327
24328- if (sk2_rcv_saddr6 &&
24329- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
24330- return 1;
24331+ if (addr_type2 == IPV6_ADDR_ANY && match_wildcard) {
24332+ /* ipv6only case handled above */
24333+ if (addr_type1 == IPV6_ADDR_MAPPED)
24334+ return v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, -1);
24335+ else
24336+ return v6_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr6, -1);
24337+ }
24338+
24339+ return 0;
24340+
24341+vs_v4:
24342+ if (!sk1_rcv_saddr && !sk2_rcv_saddr)
24343+ return nx_v4_addr_conflict(sk1->sk_nx_info, sk2->sk_nx_info);
24344+
24345+ if (!sk2_rcv_saddr)
24346+ return v4_addr_in_nx_info(sk1->sk_nx_info, sk2_rcv_saddr, -1);
24347+
24348+ if (!sk1_rcv_saddr)
24349+ return v4_addr_in_nx_info(sk2->sk_nx_info, sk1_rcv_saddr, -1);
24350
24351 return 0;
24352 }
09a55596
AM
24353diff -NurpP --minimal linux-4.9.135/net/ipv6/ip6_fib.c linux-4.9.135-vs2.3.9.8/net/ipv6/ip6_fib.c
24354--- linux-4.9.135/net/ipv6/ip6_fib.c 2018-10-20 10:39:25.000000000 +0000
24355+++ linux-4.9.135-vs2.3.9.8/net/ipv6/ip6_fib.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 24356@@ -1976,6 +1976,7 @@ static int ipv6_route_seq_show(struct se
c2e5f7c8
JR
24357 struct rt6_info *rt = v;
24358 struct ipv6_route_iter *iter = seq->private;
24359
24360+ /* FIXME: check for network context? */
24361 seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
24362
24363 #ifdef CONFIG_IPV6_SUBTREES
09a55596
AM
24364diff -NurpP --minimal linux-4.9.135/net/ipv6/ip6_output.c linux-4.9.135-vs2.3.9.8/net/ipv6/ip6_output.c
24365--- linux-4.9.135/net/ipv6/ip6_output.c 2018-10-20 10:39:25.000000000 +0000
24366+++ linux-4.9.135-vs2.3.9.8/net/ipv6/ip6_output.c 2018-10-20 05:55:44.000000000 +0000
24367@@ -960,7 +960,8 @@ static int ip6_dst_lookup_tail(struct ne
cc23e853 24368 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
4bf69007
AM
24369 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
24370 sk ? inet6_sk(sk)->srcprefs : 0,
24371- &fl6->saddr);
24372+ &fl6->saddr,
24373+ sk ? sk->sk_nx_info : NULL);
24374 if (err)
24375 goto out_err_release;
cc23e853 24376
09a55596
AM
24377diff -NurpP --minimal linux-4.9.135/net/ipv6/ip6_tunnel.c linux-4.9.135-vs2.3.9.8/net/ipv6/ip6_tunnel.c
24378--- linux-4.9.135/net/ipv6/ip6_tunnel.c 2018-10-20 10:39:25.000000000 +0000
24379+++ linux-4.9.135-vs2.3.9.8/net/ipv6/ip6_tunnel.c 2018-10-20 05:55:44.000000000 +0000
24380@@ -1115,7 +1115,7 @@ route_lookup:
cc23e853
AM
24381 }
24382 if (t->parms.collect_md &&
24383 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
24384- &fl6->daddr, 0, &fl6->saddr))
24385+ &fl6->daddr, 0, &fl6->saddr, NULL))
24386 goto tx_err_link_failure;
24387 ndst = dst;
4bf69007 24388 }
09a55596
AM
24389diff -NurpP --minimal linux-4.9.135/net/ipv6/ndisc.c linux-4.9.135-vs2.3.9.8/net/ipv6/ndisc.c
24390--- linux-4.9.135/net/ipv6/ndisc.c 2018-10-20 10:39:25.000000000 +0000
24391+++ linux-4.9.135-vs2.3.9.8/net/ipv6/ndisc.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 24392@@ -512,7 +512,7 @@ void ndisc_send_na(struct net_device *de
4bf69007
AM
24393 } else {
24394 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
24395 inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
24396- &tmpaddr))
24397+ &tmpaddr, NULL))
24398 return;
24399 src_addr = &tmpaddr;
24400 }
09a55596
AM
24401diff -NurpP --minimal linux-4.9.135/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c linux-4.9.135-vs2.3.9.8/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
24402--- linux-4.9.135/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c 2016-12-11 19:17:54.000000000 +0000
24403+++ linux-4.9.135-vs2.3.9.8/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 24404@@ -39,7 +39,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *s
4bf69007
AM
24405 ctinfo == IP_CT_RELATED_REPLY));
24406
cc23e853 24407 if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
4bf69007
AM
24408- &ipv6_hdr(skb)->daddr, 0, &src) < 0)
24409+ &ipv6_hdr(skb)->daddr, 0, &src, NULL) < 0)
24410 return NF_DROP;
24411
bb20add7 24412 nfct_nat(ct)->masq_index = out->ifindex;
09a55596
AM
24413diff -NurpP --minimal linux-4.9.135/net/ipv6/raw.c linux-4.9.135-vs2.3.9.8/net/ipv6/raw.c
24414--- linux-4.9.135/net/ipv6/raw.c 2018-10-20 10:39:25.000000000 +0000
24415+++ linux-4.9.135-vs2.3.9.8/net/ipv6/raw.c 2018-10-20 05:55:44.000000000 +0000
cc23e853 24416@@ -291,6 +291,13 @@ static int rawv6_bind(struct sock *sk, s
4bf69007
AM
24417 goto out_unlock;
24418 }
24419
24420+ if (!v6_addr_in_nx_info(sk->sk_nx_info, &addr->sin6_addr, -1)) {
24421+ err = -EADDRNOTAVAIL;
24422+ if (dev)
24423+ dev_put(dev);
24424+ goto out;
24425+ }
2ba6f0dd 24426+
4bf69007
AM
24427 /* ipv4 addr of the socket is invalid. Only the
24428 * unspecified and mapped address have a v4 equivalent.
24429 */
09a55596
AM
24430diff -NurpP --minimal linux-4.9.135/net/ipv6/route.c linux-4.9.135-vs2.3.9.8/net/ipv6/route.c
24431--- linux-4.9.135/net/ipv6/route.c 2018-10-20 10:39:25.000000000 +0000
24432+++ linux-4.9.135-vs2.3.9.8/net/ipv6/route.c 2018-10-20 04:58:15.000000000 +0000
24433@@ -3291,7 +3291,8 @@ static int rt6_fill_node(struct net *net
4bf69007
AM
24434 goto nla_put_failure;
24435 } else if (dst) {
24436 struct in6_addr saddr_buf;
24437- if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
24438+ if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf,
24439+ (skb->sk ? skb->sk->sk_nx_info : NULL)) == 0 &&
cc23e853 24440 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4bf69007
AM
24441 goto nla_put_failure;
24442 }
09a55596
AM
24443diff -NurpP --minimal linux-4.9.135/net/ipv6/tcp_ipv6.c linux-4.9.135-vs2.3.9.8/net/ipv6/tcp_ipv6.c
24444--- linux-4.9.135/net/ipv6/tcp_ipv6.c 2018-10-20 10:39:25.000000000 +0000
24445+++ linux-4.9.135-vs2.3.9.8/net/ipv6/tcp_ipv6.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 24446@@ -149,11 +149,18 @@ static int tcp_v6_connect(struct sock *s
4bf69007
AM
24447 */
24448
cc23e853
AM
24449 if (ipv6_addr_any(&usin->sin6_addr)) {
24450- if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
24451- ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
24452- &usin->sin6_addr);
24453- else
24454- usin->sin6_addr = in6addr_loopback;
4bf69007 24455+ struct nx_info *nxi = sk->sk_nx_info;
2ba6f0dd 24456+
4bf69007
AM
24457+ if (nxi && nx_info_has_v6(nxi))
24458+ /* FIXME: remap lback? */
24459+ usin->sin6_addr = nxi->v6.ip;
cc23e853
AM
24460+ else {
24461+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
24462+ ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
24463+ &usin->sin6_addr);
24464+ else
24465+ usin->sin6_addr = in6addr_loopback;
24466+ }
24467 }
4bf69007
AM
24468
24469 addr_type = ipv6_addr_type(&usin->sin6_addr);
09a55596
AM
24470diff -NurpP --minimal linux-4.9.135/net/ipv6/udp.c linux-4.9.135-vs2.3.9.8/net/ipv6/udp.c
24471--- linux-4.9.135/net/ipv6/udp.c 2018-10-20 10:39:25.000000000 +0000
24472+++ linux-4.9.135-vs2.3.9.8/net/ipv6/udp.c 2018-10-20 04:58:15.000000000 +0000
cc23e853
AM
24473@@ -135,6 +135,10 @@ static int compute_score(struct sock *sk
24474 if (inet->inet_dport != sport)
24475 return -1;
24476 score++;
4bf69007
AM
24477+ } else {
24478+ /* block non nx_info ips */
24479+ if (!v6_addr_in_nx_info(sk->sk_nx_info, daddr, -1))
24480+ return -1;
cc23e853
AM
24481 }
24482
24483 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
09a55596
AM
24484diff -NurpP --minimal linux-4.9.135/net/ipv6/xfrm6_policy.c linux-4.9.135-vs2.3.9.8/net/ipv6/xfrm6_policy.c
24485--- linux-4.9.135/net/ipv6/xfrm6_policy.c 2018-10-20 10:39:25.000000000 +0000
24486+++ linux-4.9.135-vs2.3.9.8/net/ipv6/xfrm6_policy.c 2018-10-20 04:58:15.000000000 +0000
cc23e853
AM
24487@@ -64,7 +64,8 @@ static int xfrm6_get_saddr(struct net *n
24488 return -EHOSTUNREACH;
24489
4bf69007 24490 dev = ip6_dst_idev(dst)->dev;
cc23e853
AM
24491- ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
24492+ ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6,
24493+ 0, &saddr->in6, NULL);
4bf69007
AM
24494 dst_release(dst);
24495 return 0;
24496 }
09a55596
AM
24497diff -NurpP --minimal linux-4.9.135/net/netfilter/ipvs/ip_vs_xmit.c linux-4.9.135-vs2.3.9.8/net/netfilter/ipvs/ip_vs_xmit.c
24498--- linux-4.9.135/net/netfilter/ipvs/ip_vs_xmit.c 2016-12-11 19:17:54.000000000 +0000
24499+++ linux-4.9.135-vs2.3.9.8/net/netfilter/ipvs/ip_vs_xmit.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 24500@@ -381,7 +381,7 @@ __ip_vs_route_output_v6(struct net *net,
4bf69007
AM
24501 return dst;
24502 if (ipv6_addr_any(&fl6.saddr) &&
24503 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
24504- &fl6.daddr, 0, &fl6.saddr) < 0)
24505+ &fl6.daddr, 0, &fl6.saddr, NULL) < 0)
24506 goto out_err;
24507 if (do_xfrm) {
24508 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
09a55596
AM
24509diff -NurpP --minimal linux-4.9.135/net/netlink/af_netlink.c linux-4.9.135-vs2.3.9.8/net/netlink/af_netlink.c
24510--- linux-4.9.135/net/netlink/af_netlink.c 2018-10-20 10:39:25.000000000 +0000
24511+++ linux-4.9.135-vs2.3.9.8/net/netlink/af_netlink.c 2018-10-20 05:56:16.000000000 +0000
24512@@ -63,6 +63,8 @@
bb20add7 24513 #include <linux/hash.h>
cc23e853 24514 #include <linux/genetlink.h>
e37b822e 24515 #include <linux/nospec.h>
4bf69007
AM
24516+#include <linux/vs_context.h>
24517+#include <linux/vs_network.h>
4bf69007
AM
24518
24519 #include <net/net_namespace.h>
bb20add7 24520 #include <net/sock.h>
09a55596 24521@@ -2477,7 +2479,8 @@ static void *__netlink_seq_next(struct s
cc23e853
AM
24522 if (err)
24523 return ERR_PTR(err);
24524 }
24525- } while (sock_net(&nlk->sk) != seq_file_net(seq));
24526+ } while ((sock_net(&nlk->sk) != seq_file_net(seq)) ||
24527+ !nx_check(nlk->sk.sk_nid, VS_WATCH_P | VS_IDENT));
bb20add7 24528
cc23e853
AM
24529 return nlk;
24530 }
09a55596
AM
24531diff -NurpP --minimal linux-4.9.135/net/packet/diag.c linux-4.9.135-vs2.3.9.8/net/packet/diag.c
24532--- linux-4.9.135/net/packet/diag.c 2016-12-11 19:17:54.000000000 +0000
24533+++ linux-4.9.135-vs2.3.9.8/net/packet/diag.c 2018-10-20 06:31:18.000000000 +0000
24534@@ -4,6 +4,7 @@
24535 #include <linux/netdevice.h>
24536 #include <linux/packet_diag.h>
24537 #include <linux/percpu.h>
24538+#include <linux/vs_network.h>
24539 #include <net/net_namespace.h>
24540 #include <net/sock.h>
24541
24542@@ -201,6 +202,8 @@ static int packet_diag_dump(struct sk_bu
24543 sk_for_each(sk, &net->packet.sklist) {
24544 if (!net_eq(sock_net(sk), net))
24545 continue;
24546+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24547+ continue;
24548 if (num < s_num)
24549 goto next;
24550
24551diff -NurpP --minimal linux-4.9.135/net/socket.c linux-4.9.135-vs2.3.9.8/net/socket.c
24552--- linux-4.9.135/net/socket.c 2018-10-20 10:39:26.000000000 +0000
24553+++ linux-4.9.135-vs2.3.9.8/net/socket.c 2018-10-20 05:55:45.000000000 +0000
24554@@ -100,10 +100,12 @@
4bf69007
AM
24555
24556 #include <net/sock.h>
24557 #include <linux/netfilter.h>
4bf69007
AM
24558+#include <linux/vs_socket.h>
24559+#include <linux/vs_inet.h>
24560+#include <linux/vs_inet6.h>
24561
24562 #include <linux/if_tun.h>
24563 #include <linux/ipv6_route.h>
cc23e853
AM
24564-#include <linux/route.h>
24565 #include <linux/sockios.h>
24566 #include <linux/atalk.h>
24567 #include <net/busy_poll.h>
09a55596 24568@@ -619,8 +621,24 @@ EXPORT_SYMBOL(__sock_tx_timestamp);
4bf69007 24569
cc23e853
AM
24570 static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
24571 {
24572- int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
24573- BUG_ON(ret == -EIOCBQUEUED);
24574+ size_t size = msg_data_left(msg);
24575+ int ret = sock->ops->sendmsg(sock, msg, size);
24576+#if 0
4bf69007 24577+ if (sock->sk) {
cc23e853 24578+ if (!ret)
4bf69007 24579+ vx_sock_fail(sock->sk, size);
cc23e853
AM
24580+ else
24581+ vx_sock_send(sock->sk, size);
4bf69007 24582+ }
cc23e853 24583+#endif
4bf69007 24584+ vxdprintk(VXD_CBIT(net, 7),
cc23e853 24585+ "sock_sendmsg_nosec: %p[%p,%p,%p;%d/%d]:%zu/%zu",
4bf69007
AM
24586+ sock, sock->sk,
24587+ (sock->sk)?sock->sk->sk_nx_info:0,
24588+ (sock->sk)?sock->sk->sk_vx_info:0,
24589+ (sock->sk)?sock->sk->sk_xid:0,
24590+ (sock->sk)?sock->sk->sk_nid:0,
cc23e853
AM
24591+ size, msg_data_left(msg));
24592 return ret;
4bf69007
AM
24593 }
24594
09a55596 24595@@ -1110,6 +1128,13 @@ int __sock_create(struct net *net, int f
4bf69007
AM
24596 if (type < 0 || type >= SOCK_MAX)
24597 return -EINVAL;
24598
24599+ if (!nx_check(0, VS_ADMIN)) {
24600+ if (family == PF_INET && !current_nx_info_has_v4())
24601+ return -EAFNOSUPPORT;
24602+ if (family == PF_INET6 && !current_nx_info_has_v6())
24603+ return -EAFNOSUPPORT;
24604+ }
2ba6f0dd 24605+
4bf69007
AM
24606 /* Compatibility.
24607
24608 This uglymoron is moved from INET layer to here to avoid
09a55596 24609@@ -1240,6 +1265,7 @@ SYSCALL_DEFINE3(socket, int, family, int
4bf69007
AM
24610 if (retval < 0)
24611 goto out;
24612
24613+ set_bit(SOCK_USER_SOCKET, &sock->flags);
24614 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
24615 if (retval < 0)
24616 goto out_release;
09a55596 24617@@ -1281,10 +1307,12 @@ SYSCALL_DEFINE4(socketpair, int, family,
4bf69007
AM
24618 err = sock_create(family, type, protocol, &sock1);
24619 if (err < 0)
24620 goto out;
24621+ set_bit(SOCK_USER_SOCKET, &sock1->flags);
24622
24623 err = sock_create(family, type, protocol, &sock2);
24624 if (err < 0)
24625 goto out_release_1;
24626+ set_bit(SOCK_USER_SOCKET, &sock2->flags);
24627
24628 err = sock1->ops->socketpair(sock1, sock2);
24629 if (err < 0)
09a55596
AM
24630diff -NurpP --minimal linux-4.9.135/net/sunrpc/auth.c linux-4.9.135-vs2.3.9.8/net/sunrpc/auth.c
24631--- linux-4.9.135/net/sunrpc/auth.c 2016-12-11 19:17:54.000000000 +0000
24632+++ linux-4.9.135-vs2.3.9.8/net/sunrpc/auth.c 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
24633@@ -15,6 +15,7 @@
24634 #include <linux/sunrpc/clnt.h>
24635 #include <linux/sunrpc/gss_api.h>
24636 #include <linux/spinlock.h>
24637+#include <linux/vs_tag.h>
24638
cc23e853 24639 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
4bf69007 24640 # define RPCDBG_FACILITY RPCDBG_AUTH
bb20add7 24641@@ -630,6 +631,7 @@ rpcauth_lookupcred(struct rpc_auth *auth
4bf69007
AM
24642 memset(&acred, 0, sizeof(acred));
24643 acred.uid = cred->fsuid;
24644 acred.gid = cred->fsgid;
a4a22af8 24645+ acred.tag = make_ktag(&init_user_ns, dx_current_tag());
bb20add7 24646 acred.group_info = cred->group_info;
4bf69007 24647 ret = auth->au_ops->lookup_cred(auth, &acred, flags);
bb20add7
AM
24648 return ret;
24649@@ -669,6 +671,7 @@ rpcauth_bind_root_cred(struct rpc_task *
4bf69007 24650 struct auth_cred acred = {
b00e13aa
AM
24651 .uid = GLOBAL_ROOT_UID,
24652 .gid = GLOBAL_ROOT_GID,
a4a22af8 24653+ .tag = KTAGT_INIT(dx_current_tag()),
4bf69007
AM
24654 };
24655
24656 dprintk("RPC: %5u looking up %s cred\n",
09a55596
AM
24657diff -NurpP --minimal linux-4.9.135/net/sunrpc/auth_unix.c linux-4.9.135-vs2.3.9.8/net/sunrpc/auth_unix.c
24658--- linux-4.9.135/net/sunrpc/auth_unix.c 2016-12-11 19:17:54.000000000 +0000
24659+++ linux-4.9.135-vs2.3.9.8/net/sunrpc/auth_unix.c 2018-10-20 04:58:15.000000000 +0000
4bf69007
AM
24660@@ -13,11 +13,13 @@
24661 #include <linux/sunrpc/clnt.h>
24662 #include <linux/sunrpc/auth.h>
24663 #include <linux/user_namespace.h>
24664+#include <linux/vs_tag.h>
24665
24666 #define NFS_NGROUPS 16
24667
24668 struct unx_cred {
24669 struct rpc_cred uc_base;
b00e13aa
AM
24670+ ktag_t uc_tag;
24671 kgid_t uc_gid;
24672 kgid_t uc_gids[NFS_NGROUPS];
4bf69007 24673 };
cc23e853 24674@@ -86,6 +88,7 @@ unx_create_cred(struct rpc_auth *auth, s
4bf69007
AM
24675 groups = NFS_NGROUPS;
24676
24677 cred->uc_gid = acred->gid;
24678+ cred->uc_tag = acred->tag;
b00e13aa 24679 for (i = 0; i < groups; i++)
cc23e853 24680 cred->uc_gids[i] = acred->group_info->gid[i];
b00e13aa 24681 if (i < NFS_NGROUPS)
cc23e853 24682@@ -127,7 +130,9 @@ unx_match(struct auth_cred *acred, struc
4bf69007
AM
24683 unsigned int i;
24684
24685
b00e13aa
AM
24686- if (!uid_eq(cred->uc_uid, acred->uid) || !gid_eq(cred->uc_gid, acred->gid))
24687+ if (!uid_eq(cred->uc_uid, acred->uid) ||
24688+ !gid_eq(cred->uc_gid, acred->gid) ||
24689+ !tag_eq(cred->uc_tag, acred->tag))
4bf69007
AM
24690 return 0;
24691
24692 if (acred->group_info != NULL)
cc23e853 24693@@ -152,7 +157,7 @@ unx_marshal(struct rpc_task *task, __be3
4bf69007
AM
24694 struct rpc_clnt *clnt = task->tk_client;
24695 struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
24696 __be32 *base, *hold;
24697- int i;
24698+ int i, tag;
24699
24700 *p++ = htonl(RPC_AUTH_UNIX);
24701 base = p++;
cc23e853 24702@@ -163,8 +168,11 @@ unx_marshal(struct rpc_task *task, __be3
4bf69007
AM
24703 */
24704 p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
4bf69007 24705
b00e13aa
AM
24706- *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
24707- *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
24708+ tag = task->tk_client->cl_tag;
a4a22af8
AM
24709+ *p++ = htonl((u32) from_kuid(&init_user_ns,
24710+ TAGINO_KUID(tag, cred->uc_uid, cred->uc_tag)));
24711+ *p++ = htonl((u32) from_kgid(&init_user_ns,
24712+ TAGINO_KGID(tag, cred->uc_gid, cred->uc_tag)));
4bf69007 24713 hold = p++;
b00e13aa
AM
24714 for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++)
24715 *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
09a55596
AM
24716diff -NurpP --minimal linux-4.9.135/net/sunrpc/clnt.c linux-4.9.135-vs2.3.9.8/net/sunrpc/clnt.c
24717--- linux-4.9.135/net/sunrpc/clnt.c 2018-10-20 10:39:26.000000000 +0000
24718+++ linux-4.9.135-vs2.3.9.8/net/sunrpc/clnt.c 2018-10-20 05:55:45.000000000 +0000
4bf69007 24719@@ -31,6 +31,7 @@
c2e5f7c8 24720 #include <linux/in.h>
4bf69007
AM
24721 #include <linux/in6.h>
24722 #include <linux/un.h>
4bf69007
AM
24723+#include <linux/vs_cvirt.h>
24724
24725 #include <linux/sunrpc/clnt.h>
b00e13aa 24726 #include <linux/sunrpc/addr.h>
cc23e853 24727@@ -496,6 +497,9 @@ static struct rpc_clnt *rpc_create_xprt(
4bf69007
AM
24728 if (!(args->flags & RPC_CLNT_CREATE_QUIET))
24729 clnt->cl_chatty = 1;
24730
24731+ /* TODO: handle RPC_CLNT_CREATE_TAGGED
24732+ if (args->flags & RPC_CLNT_CREATE_TAGGED)
24733+ clnt->cl_tag = 1; */
24734 return clnt;
24735 }
cc23e853 24736
09a55596
AM
24737diff -NurpP --minimal linux-4.9.135/net/unix/af_unix.c linux-4.9.135-vs2.3.9.8/net/unix/af_unix.c
24738--- linux-4.9.135/net/unix/af_unix.c 2018-10-20 10:39:26.000000000 +0000
24739+++ linux-4.9.135-vs2.3.9.8/net/unix/af_unix.c 2018-10-20 04:58:15.000000000 +0000
bb20add7 24740@@ -117,6 +117,8 @@
4bf69007
AM
24741 #include <net/checksum.h>
24742 #include <linux/security.h>
c2e5f7c8 24743 #include <linux/freezer.h>
4bf69007
AM
24744+#include <linux/vs_context.h>
24745+#include <linux/vs_limit.h>
24746
24747 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
24748 EXPORT_SYMBOL_GPL(unix_socket_table);
cc23e853 24749@@ -282,6 +284,8 @@ static struct sock *__unix_find_socket_b
4bf69007
AM
24750 if (!net_eq(sock_net(s), net))
24751 continue;
24752
24753+ if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
24754+ continue;
24755 if (u->addr->len == len &&
24756 !memcmp(u->addr->name, sunname, len))
24757 goto found;
cc23e853 24758@@ -2732,6 +2736,8 @@ static struct sock *unix_from_bucket(str
4bf69007
AM
24759 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
24760 if (sock_net(sk) != seq_file_net(seq))
24761 continue;
24762+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24763+ continue;
24764 if (++count == offset)
24765 break;
24766 }
cc23e853 24767@@ -2749,6 +2755,8 @@ static struct sock *unix_next_socket(str
4bf69007
AM
24768 sk = sk_next(sk);
24769 if (!sk)
24770 goto next_bucket;
24771+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24772+ continue;
24773 if (sock_net(sk) == seq_file_net(seq))
24774 return sk;
24775 }
09a55596
AM
24776diff -NurpP --minimal linux-4.9.135/net/unix/diag.c linux-4.9.135-vs2.3.9.8/net/unix/diag.c
24777--- linux-4.9.135/net/unix/diag.c 2018-10-20 10:39:26.000000000 +0000
24778+++ linux-4.9.135-vs2.3.9.8/net/unix/diag.c 2018-10-20 06:31:18.000000000 +0000
24779@@ -4,6 +4,7 @@
24780 #include <linux/unix_diag.h>
24781 #include <linux/skbuff.h>
24782 #include <linux/module.h>
24783+#include <linux/vs_network.h>
24784 #include <net/netlink.h>
24785 #include <net/af_unix.h>
24786 #include <net/tcp_states.h>
24787@@ -199,6 +200,8 @@ static int unix_diag_dump(struct sk_buff
24788 sk_for_each(sk, &unix_socket_table[slot]) {
24789 if (!net_eq(sock_net(sk), net))
24790 continue;
24791+ if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
24792+ continue;
24793 if (num < s_num)
24794 goto next;
24795 if (!(req->udiag_states & (1 << sk->sk_state)))
24796diff -NurpP --minimal linux-4.9.135/scripts/checksyscalls.sh linux-4.9.135-vs2.3.9.8/scripts/checksyscalls.sh
24797--- linux-4.9.135/scripts/checksyscalls.sh 2016-12-11 19:17:54.000000000 +0000
24798+++ linux-4.9.135-vs2.3.9.8/scripts/checksyscalls.sh 2018-10-20 04:58:15.000000000 +0000
bb20add7 24799@@ -196,7 +196,6 @@ cat << EOF
4bf69007
AM
24800 #define __IGNORE_afs_syscall
24801 #define __IGNORE_getpmsg
24802 #define __IGNORE_putpmsg
24803-#define __IGNORE_vserver
24804 EOF
24805 }
24806
09a55596
AM
24807diff -NurpP --minimal linux-4.9.135/security/commoncap.c linux-4.9.135-vs2.3.9.8/security/commoncap.c
24808--- linux-4.9.135/security/commoncap.c 2016-12-11 19:17:54.000000000 +0000
24809+++ linux-4.9.135-vs2.3.9.8/security/commoncap.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 24810@@ -71,6 +71,7 @@ static void warn_setuid_and_fcaps_mixed(
4bf69007
AM
24811 int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
24812 int cap, int audit)
24813 {
24814+ struct vx_info *vxi = current_vx_info(); /* FIXME: get vxi from cred? */
b00e13aa 24815 struct user_namespace *ns = targ_ns;
4bf69007 24816
b00e13aa 24817 /* See if cred has the capability in the target user namespace
cc23e853 24818@@ -79,8 +80,12 @@ int cap_capable(const struct cred *cred,
b00e13aa
AM
24819 */
24820 for (;;) {
4bf69007 24821 /* Do we have the necessary capabilities? */
b00e13aa 24822- if (ns == cred->user_ns)
4bf69007 24823- return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
b00e13aa 24824+ if (ns == cred->user_ns) {
4bf69007
AM
24825+ if (vx_info_flags(vxi, VXF_STATE_SETUP, 0) &&
24826+ cap_raised(cred->cap_effective, cap))
24827+ return 0;
24828+ return vx_cap_raised(vxi, cred->cap_effective, cap) ? 0 : -EPERM;
24829+ }
24830
24831 /* Have we tried all of the parent namespaces? */
b00e13aa 24832 if (ns == &init_user_ns)
cc23e853 24833@@ -667,7 +672,7 @@ int cap_inode_setxattr(struct dentry *de
4bf69007
AM
24834
24835 if (!strncmp(name, XATTR_SECURITY_PREFIX,
24836 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
24837- !capable(CAP_SYS_ADMIN))
24838+ !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
24839 return -EPERM;
24840 return 0;
24841 }
cc23e853 24842@@ -693,7 +698,7 @@ int cap_inode_removexattr(struct dentry
4bf69007
AM
24843
24844 if (!strncmp(name, XATTR_SECURITY_PREFIX,
24845 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
24846- !capable(CAP_SYS_ADMIN))
24847+ !vx_capable(CAP_SYS_ADMIN, VXC_FS_SECURITY))
24848 return -EPERM;
24849 return 0;
24850 }
09a55596
AM
24851diff -NurpP --minimal linux-4.9.135/security/selinux/hooks.c linux-4.9.135-vs2.3.9.8/security/selinux/hooks.c
24852--- linux-4.9.135/security/selinux/hooks.c 2018-10-20 10:39:26.000000000 +0000
24853+++ linux-4.9.135-vs2.3.9.8/security/selinux/hooks.c 2018-10-20 04:58:15.000000000 +0000
cc23e853 24854@@ -67,7 +67,6 @@
4bf69007
AM
24855 #include <linux/dccp.h>
24856 #include <linux/quota.h>
24857 #include <linux/un.h> /* for Unix socket types */
24858-#include <net/af_unix.h> /* for Unix socket types */
24859 #include <linux/parser.h>
24860 #include <linux/nfs_mount.h>
24861 #include <net/ipv6.h>
This page took 6.179663 seconds and 4 git commands to generate.